shithub: libvpx

--- a/test/dct16x16_test.cc

+++ b/test/dct16x16_test.cc

@@ -395,8 +395,7 @@

       for (int j = 0; j < kNumCoeffs; ++j)

         coeff[j] = round(out_r[j]);

-      const int pitch = 32;

-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));

+      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));

       for (int j = 0; j < kNumCoeffs; ++j) {

         const uint32_t diff = dst[j] - src[j];

@@ -421,7 +420,7 @@

     fwd_txfm_ = GET_PARAM(0);

     inv_txfm_ = GET_PARAM(1);

     tx_type_  = GET_PARAM(2);

-    pitch_    = 32;

+    pitch_    = 16;

     fwd_txfm_ref = fdct16x16_ref;

   virtual void TearDown() { libvpx_test::ClearSystemState(); }

@@ -431,7 +430,7 @@

     fwd_txfm_(in, out, stride);

   void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {

-    inv_txfm_(out, dst, stride >> 1);

+    inv_txfm_(out, dst, stride);

   fdct_t fwd_txfm_;

--- a/vp9/common/vp9_rtcd_defs.sh

+++ b/vp9/common/vp9_rtcd_defs.sh

@@ -707,7 +707,7 @@

 prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int stride"

 specialize vp9_short_fdct32x32_rd sse2

-prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch"

+prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int stride"

 specialize vp9_short_fdct16x16 sse2

 prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"

--- a/vp9/encoder/vp9_dct.c

+++ b/vp9/encoder/vp9_dct.c

@@ -302,7 +302,7 @@

-void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {

+void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int stride) {

   // The 2D transform is done with two passes which are actually pretty

   // similar. In the first one, we transform the columns and transpose

   // the results. In the second one, we transform the rows. To achieve that,

@@ -309,7 +309,6 @@

   // as the first pass results are transposed, we tranpose the columns (that

   // is the transposed rows) and transpose the results (so that it goes back

   // in normal/row positions).

-  const int stride = pitch >> 1;

   int pass;

   // We need an intermediate buffer between passes.

   int16_t intermediate[256];

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -379,7 +379,7 @@

       xoff = 16 * (block & twmask);

       yoff = 16 * (block >> twl);

       src_diff = p->src_diff + 4 * bw * yoff + xoff;

-      vp9_short_fdct16x16(src_diff, coeff, bw * 8);

+      vp9_short_fdct16x16(src_diff, coeff, bw * 4);

       vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,

                      p->quant, p->quant_shift, qcoeff, dqcoeff,

                      pd->dequant, p->zbin_extra, eob, scan, iscan);

@@ -559,7 +559,7 @@

       if (tx_type != DCT_DCT)

         vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type);

       else

-        vp9_short_fdct16x16(src_diff, coeff, bw * 8);

+        vp9_short_fdct16x16(src_diff, coeff, bw * 4);

       vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,

                      p->quant, p->quant_shift, qcoeff, dqcoeff,

                      pd->dequant, p->zbin_extra, eob, scan, iscan);

--- a/vp9/encoder/x86/vp9_dct_sse2.c

+++ b/vp9/encoder/x86/vp9_dct_sse2.c

@@ -1056,7 +1056,7 @@

   write_buffer_8x8(output, in, 8);

-void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int pitch) {

+void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int stride) {

   // The 2D transform is done with two passes which are actually pretty

   // similar. In the first one, we transform the columns and transpose

   // the results. In the second one, we transform the rows. To achieve that,

@@ -1063,7 +1063,6 @@

   // as the first pass results are transposed, we tranpose the columns (that

   // is the transposed rows) and transpose the results (so that it goes back

   // in normal/row positions).

-  const int stride = pitch >> 1;

   int pass;

   // We need an intermediate buffer between passes.

   DECLARE_ALIGNED_ARRAY(16, int16_t, intermediate, 256);

--

⑨