shithub: libvpx

--- a/test/dct16x16_test.cc

+++ b/test/dct16x16_test.cc

@@ -901,14 +901,6 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, Trans16x16HT,

     ::testing::Values(

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),

-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),

         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),

         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),

         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),

--- a/test/fdct4x4_test.cc

+++ b/test/fdct4x4_test.cc

@@ -531,14 +531,6 @@

 INSTANTIATE_TEST_CASE_P(

     SSE2, Trans4x4HT,

     ::testing::Values(

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),

-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),

         make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),

         make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),

         make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),

--- a/vp9/common/vp9_rtcd_defs.pl

+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -933,13 +933,13 @@

   # fdct functions

   add_proto qw/void vp9_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_highbd_fht4x4 sse2/;

+  specialize qw/vp9_highbd_fht4x4/;

   add_proto qw/void vp9_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_highbd_fht8x8 sse2/;

+  specialize qw/vp9_highbd_fht8x8/;

   add_proto qw/void vp9_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_highbd_fht16x16 sse2/;

+  specialize qw/vp9_highbd_fht16x16/;

   add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";

   specialize qw/vp9_highbd_fwht4x4/;

--- a/vp9/encoder/x86/vp9_dct_sse2.c

+++ b/vp9/encoder/x86/vp9_dct_sse2.c

@@ -2266,108 +2266,6 @@

   store_output(&in1, output);

-#if CONFIG_VP9_HIGHBITDEPTH

-/* These SSE2 versions of the FHT functions only actually use SSE2 in the

- * DCT_DCT case in all other cases, they revert to C code which is identical

- * to that used by the C versions of them.

- */

-void vp9_highbd_fht4x4_sse2(const int16_t *input, tran_low_t *output,

-                            int stride, int tx_type) {

-  if (tx_type == DCT_DCT) {

-    vp9_highbd_fdct4x4_sse2(input, output, stride);

-  } else {

-    tran_low_t out[4 * 4];

-    tran_low_t *outptr = &out[0];

-    int i, j;

-    tran_low_t temp_in[4], temp_out[4];

-    const transform_2d ht = FHT_4[tx_type];

-    // Columns

-    for (i = 0; i < 4; ++i) {

-      for (j = 0; j < 4; ++j)

-        temp_in[j] = input[j * stride + i] * 16;

-      if (i == 0 && temp_in[0])

-        temp_in[0] += 1;

-      ht.cols(temp_in, temp_out);

-      for (j = 0; j < 4; ++j)

-        outptr[j * 4 + i] = temp_out[j];

-    }

-    // Rows

-    for (i = 0; i < 4; ++i) {

-      for (j = 0; j < 4; ++j)

-        temp_in[j] = out[j + i * 4];

-      ht.rows(temp_in, temp_out);

-      for (j = 0; j < 4; ++j)

-        output[j + i * 4] = (temp_out[j] + 1) >> 2;

-    }

-  }

-}

-void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output,

-                            int stride, int tx_type) {

-  if (tx_type == DCT_DCT) {

-    vp9_highbd_fdct8x8_sse2(input, output, stride);

-  } else {

-    tran_low_t out[64];

-    tran_low_t *outptr = &out[0];

-    int i, j;

-    tran_low_t temp_in[8], temp_out[8];

-    const transform_2d ht = FHT_8[tx_type];

-    // Columns

-    for (i = 0; i < 8; ++i) {

-      for (j = 0; j < 8; ++j)

-        temp_in[j] = input[j * stride + i] * 4;

-      ht.cols(temp_in, temp_out);

-      for (j = 0; j < 8; ++j)

-        outptr[j * 8 + i] = temp_out[j];

-    }

-    // Rows

-    for (i = 0; i < 8; ++i) {

-      for (j = 0; j < 8; ++j)

-        temp_in[j] = out[j + i * 8];

-      ht.rows(temp_in, temp_out);

-      for (j = 0; j < 8; ++j)

-        output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;

-    }

-  }

-}

-void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,

-                              int stride, int tx_type) {

-  if (tx_type == DCT_DCT) {

-    vp9_highbd_fdct16x16_sse2(input, output, stride);

-  } else {

-    tran_low_t out[256];

-    tran_low_t *outptr = &out[0];

-    int i, j;

-    tran_low_t temp_in[16], temp_out[16];

-    const transform_2d ht = FHT_16[tx_type];

-    // Columns

-    for (i = 0; i < 16; ++i) {

-      for (j = 0; j < 16; ++j)

-        temp_in[j] = input[j * stride + i] * 4;

-      ht.cols(temp_in, temp_out);

-      for (j = 0; j < 16; ++j)

-        outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;

-    }

-    // Rows

-    for (i = 0; i < 16; ++i) {

-      for (j = 0; j < 16; ++j)

-        temp_in[j] = out[j + i * 16];

-      ht.rows(temp_in, temp_out);

-      for (j = 0; j < 16; ++j)

-        output[j + i * 16] = temp_out[j];

-    }

-  }

-}

-#endif  // CONFIG_VP9_HIGHBITDEPTH

/*

  * The DCTnxn functions are defined using the macros below. The main code for

  * them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h &

--

⑨