shithub: libvpx

--- a/test/avg_test.cc

+++ b/test/avg_test.cc

@@ -315,11 +315,13 @@

     ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c),

                       make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c)));

+#if !CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(C, SatdTest,

                         ::testing::Values(make_tuple(16, &vpx_satd_c),

                                           make_tuple(64, &vpx_satd_c),

                                           make_tuple(256, &vpx_satd_c),

                                           make_tuple(1024, &vpx_satd_c)));

+#endif

 #if HAVE_SSE2

 INSTANTIATE_TEST_CASE_P(

@@ -345,6 +347,7 @@

                       make_tuple(64, &vpx_int_pro_col_sse2,

                                  &vpx_int_pro_col_c)));

+#if !CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,

                         ::testing::Values(make_tuple(16, &vpx_satd_sse2),

                                           make_tuple(64, &vpx_satd_sse2),

@@ -351,6 +354,7 @@

                                           make_tuple(256, &vpx_satd_sse2),

                                           make_tuple(1024, &vpx_satd_sse2)));

 #endif

+#endif

 #if HAVE_NEON

 INSTANTIATE_TEST_CASE_P(

@@ -376,12 +380,14 @@

                       make_tuple(64, &vpx_int_pro_col_neon,

                                  &vpx_int_pro_col_c)));

+#if !CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(NEON, SatdTest,

                         ::testing::Values(make_tuple(16, &vpx_satd_neon),

                                           make_tuple(64, &vpx_satd_neon),

                                           make_tuple(256, &vpx_satd_neon),

                                           make_tuple(1024, &vpx_satd_neon)));

-#endif

+#endif  // !CONFIG_VP9_HIGHBITDEPTH

+#endif  // HAVE_NEON

 #if HAVE_MSA

 INSTANTIATE_TEST_CASE_P(

@@ -407,11 +413,13 @@

                       make_tuple(64, &vpx_int_pro_col_msa,

                                  &vpx_int_pro_col_c)));

+#if !CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(MSA, SatdTest,

                         ::testing::Values(make_tuple(16, &vpx_satd_msa),

                                           make_tuple(64, &vpx_satd_msa),

                                           make_tuple(256, &vpx_satd_msa),

                                           make_tuple(1024, &vpx_satd_msa)));

-#endif

+#endif  // !CONFIG_VP9_HIGHBITDEPTH

+#endif  // HAVE_MSA

 }  // namespace

--- a/test/hadamard_test.cc

+++ b/test/hadamard_test.cc

@@ -144,6 +144,7 @@

+#if !CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,

                         ::testing::Values(&vpx_hadamard_8x8_c));

@@ -166,6 +167,7 @@

 INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test,

                         ::testing::Values(&vpx_hadamard_8x8_msa));

 #endif  // HAVE_MSA

+#endif  // !CONFIG_VP9_HIGHBITDEPTH

 class Hadamard16x16Test : public HadamardTestBase {};

@@ -210,6 +212,7 @@

+#if !CONFIG_VP9_HIGHBITDEPTH

 INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,

                         ::testing::Values(&vpx_hadamard_16x16_c));

@@ -227,4 +230,5 @@

 INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test,

                         ::testing::Values(&vpx_hadamard_16x16_msa));

 #endif  // HAVE_MSA

+#endif  // !CONFIG_VP9_HIGHBITDEPTH

 }  // namespace

--- a/vp9/common/vp9_rtcd_defs.pl

+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -132,6 +132,9 @@

   add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";

   specialize qw/vp9_highbd_block_error_8bit sse2 avx/;

+  add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";

+  specialize qw/vp9_block_error_fp/;

   add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

   add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -1815,7 +1815,9 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-  {

+  // TODO(jingning): Implement integral projection functions for high bit-depth

+  // setting and remove this part of code.

+  if (xd->bd != 8) {

     unsigned int this_sad;

     tmp_mv->row = 0;

     tmp_mv->col = 0;

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -590,25 +590,10 @@

   *out_dist_sum += dist << 4;

-#if CONFIG_VP9_HIGHBITDEPTH

 static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,

                       int *skippable, int64_t *sse, BLOCK_SIZE bsize,

                       TX_SIZE tx_size) {

   MACROBLOCKD *xd = &x->e_mbd;

-  unsigned int var_y, sse_y;

-  (void)tx_size;

-  model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,

-                    &sse_y);

-  *sse = INT_MAX;

-  *skippable = 0;

-  return;

-}

-#else

-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,

-                      int *skippable, int64_t *sse, BLOCK_SIZE bsize,

-                      TX_SIZE tx_size) {

-  MACROBLOCKD *xd = &x->e_mbd;

   const struct macroblockd_plane *pd = &xd->plane[0];

   struct macroblock_plane *const p = &x->plane[0];

   const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];

@@ -624,6 +609,20 @@

   const int bw = 4 * num_4x4_w;

   const int bh = 4 * num_4x4_h;

+#if CONFIG_VP9_HIGHBITDEPTH

+  // TODO(jingning): Implement the high bit-depth Hadamard transforms and

+  // remove this check condition.

+  if (xd->bd != 8) {

+    unsigned int var_y, sse_y;

+    (void)tx_size;

+    model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist,

+                      &var_y, &sse_y);

+    *sse = INT_MAX;

+    *skippable = 0;

+    return;

+  }

+#endif

   (void)cpi;

   // The max tx_size passed in is TX_16X16.

@@ -648,7 +647,7 @@

         switch (tx_size) {

           case TX_16X16:

-            vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);

+            vpx_hadamard_16x16(src_diff, diff_stride, coeff);

             vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,

                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,

                             pd->dequant, eob, scan_order->scan,

@@ -655,7 +654,7 @@

                             scan_order->iscan);

             break;

           case TX_8X8:

-            vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);

+            vpx_hadamard_8x8(src_diff, diff_stride, coeff);

             vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,

                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,

                             pd->dequant, eob, scan_order->scan,

@@ -699,7 +698,7 @@

         if (*eob == 1)

           this_rdc->rate += (int)abs(qcoeff[0]);

         else if (*eob > 1)

-          this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4);

+          this_rdc->rate += vpx_satd(qcoeff, step << 4);

         this_rdc->dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;

@@ -711,7 +710,6 @@

   this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);

   this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);

-#endif

 static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,

                                MACROBLOCK *x, MACROBLOCKD *xd,

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -321,7 +321,7 @@

   return error;

-int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,

+int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,

                              int block_size) {

   int i;

   int64_t error = 0;

--- a/vpx_dsp/avg.c

+++ b/vpx_dsp/avg.c

@@ -67,9 +67,10 @@

 // The order of the output coeff of the hadamard is not important. For

 // optimization purposes the final transpose may be skipped.

 void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,

-                        int16_t *coeff) {

+                        tran_low_t *coeff) {

   int idx;

   int16_t buffer[64];

+  int16_t buffer2[64];

   int16_t *tmp_buf = &buffer[0];

   for (idx = 0; idx < 8; ++idx) {

     hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit

@@ -80,17 +81,19 @@

   tmp_buf = &buffer[0];

   for (idx = 0; idx < 8; ++idx) {

-    hadamard_col8(tmp_buf, 8, coeff);  // tmp_buf: 12 bit

-                                       // dynamic range [-2040, 2040]

-    coeff += 8;                        // coeff: 15 bit

-                                       // dynamic range [-16320, 16320]

+    hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx);  // tmp_buf: 12 bit

+    // dynamic range [-2040, 2040]

+    // buffer2: 15 bit

+    // dynamic range [-16320, 16320]

     ++tmp_buf;

+  for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];

 // In place 16x16 2D Hadamard transform

 void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,

-                          int16_t *coeff) {

+                          tran_low_t *coeff) {

   int idx;

   for (idx = 0; idx < 4; ++idx) {

     // src_diff: 9 bit, dynamic range [-255, 255]

@@ -101,15 +104,15 @@

   // coeff: 15 bit, dynamic range [-16320, 16320]

   for (idx = 0; idx < 64; ++idx) {

-    int16_t a0 = coeff[0];

-    int16_t a1 = coeff[64];

-    int16_t a2 = coeff[128];

-    int16_t a3 = coeff[192];

+    tran_low_t a0 = coeff[0];

+    tran_low_t a1 = coeff[64];

+    tran_low_t a2 = coeff[128];

+    tran_low_t a3 = coeff[192];

-    int16_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]

-    int16_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range

-    int16_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]

-    int16_t b3 = (a2 - a3) >> 1;

+    tran_low_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]

+    tran_low_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range

+    tran_low_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]

+    tran_low_t b3 = (a2 - a3) >> 1;

     coeff[0] = b0 + b2;  // 16 bit, [-32640, 32640]

     coeff[64] = b1 + b3;

@@ -122,7 +125,7 @@

 // coeff: 16 bits, dynamic range [-32640, 32640].

 // length: value range {16, 64, 256, 1024}.

-int vpx_satd_c(const int16_t *coeff, int length) {

+int vpx_satd_c(const tran_low_t *coeff, int length) {

   int i;

   int satd = 0;

   for (i = 0; i < length; ++i) satd += abs(coeff[i]);

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -885,14 +885,26 @@

   add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";

   specialize qw/vpx_minmax_8x8 sse2 neon msa/;

-  add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";

-  specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";

-  add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";

-  specialize qw/vpx_hadamard_16x16 sse2 neon msa/;

+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

+    add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff";

+    specialize qw/vpx_hadamard_8x8/;

-  add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";

-  specialize qw/vpx_satd sse2 neon msa/;

+    add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, tran_low_t *coeff";

+    specialize qw/vpx_hadamard_16x16/;

+    add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";

+    specialize qw/vpx_satd/;

+  } else {

+    add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";

+    specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";

+    add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";

+    specialize qw/vpx_hadamard_16x16 sse2 neon msa/;

+    add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";

+    specialize qw/vpx_satd sse2 neon msa/;

+  }

   add_proto qw/void vpx_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height";

   specialize qw/vpx_int_pro_row sse2 neon msa/;

--

⑨