shithub: libvpx

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -701,9 +701,6 @@

 prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"

 specialize vp9_short_fdct4x4 sse2

-prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch"

-specialize vp9_short_fdct8x4 sse2

 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"

 specialize vp9_short_fdct32x32 sse2

@@ -715,9 +712,6 @@

 prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"

 specialize vp9_short_walsh4x4

-prototype void vp9_short_walsh8x4 "int16_t *InputData, int16_t *OutputData, int pitch"

-specialize vp9_short_walsh8x4

 # Motion search

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -172,7 +172,6 @@

   BLOCK_SIZE sb64_partitioning;

   void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);

-  void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);

   void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);

   void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);

   void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,

--- a/vp9/encoder/vp9_dct.c

+++ b/vp9/encoder/vp9_dct.c

@@ -178,11 +178,6 @@

-void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) {

-    vp9_short_fdct4x4_c(input, output, pitch);

-    vp9_short_fdct4x4_c(input + 4, output + 16, pitch);

-}

 static void fdct8(const int16_t *input, int16_t *output) {

   /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;

   /*needs32*/ int t0, t1, t2, t3;

@@ -646,12 +641,6 @@

     op += 4;

-void vp9_short_walsh8x4_c(int16_t *input, int16_t *output, int pitch) {

-  vp9_short_walsh4x4_c(input,   output,    pitch);

-  vp9_short_walsh4x4_c(input + 4, output + 16, pitch);

-}

 // Rewrote to use same algorithm as others.

 static void fdct16(const int16_t in[16], int16_t out[16]) {

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -1861,7 +1861,6 @@

 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {

   if (lossless) {

     // printf("Switching to lossless\n");

-    cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;

     cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;

     cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;

     cpi->mb.optimize = 0;

@@ -1870,7 +1869,6 @@

     cpi->common.tx_mode = ONLY_4X4;

   } else {

     // printf("Not lossless\n");

-    cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;

     cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;

     cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -955,10 +955,8 @@

   cpi->mb.fwd_txm16x16  = vp9_short_fdct16x16;

   cpi->mb.fwd_txm8x8    = vp9_short_fdct8x8;

-  cpi->mb.fwd_txm8x4    = vp9_short_fdct8x4;

   cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;

   if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {

-    cpi->mb.fwd_txm8x4    = vp9_short_walsh8x4;

     cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4;

--- a/vp9/encoder/x86/vp9_dct_sse2.c

+++ b/vp9/encoder/x86/vp9_dct_sse2.c

@@ -112,11 +112,6 @@

-void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) {

-  vp9_short_fdct4x4_sse2(input, output, pitch);

-  vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch);

-}

 static INLINE void load_buffer_4x4(int16_t *input, __m128i *in, int stride) {

   const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);

   const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);

--

⑨