shithub: libvpx

--- a/vp9/common/vp9_idctllm.c

+++ b/vp9/common/vp9_idctllm.c

@@ -122,7 +122,7 @@

-void idct4_1d(int16_t *input, int16_t *output) {

+static void idct4_1d(int16_t *input, int16_t *output) {

   int16_t step[4];

   int temp1, temp2;

   // stage 1

@@ -200,7 +200,7 @@

-void idct8_1d(int16_t *input, int16_t *output) {

+static void idct8_1d(int16_t *input, int16_t *output) {

   int16_t step1[8], step2[8];

   int temp1, temp2;

   // stage 1

@@ -320,10 +320,9 @@

 void vp9_short_iht4x4_c(int16_t *input, int16_t *output,

                         int pitch, TX_TYPE tx_type) {

+  int i, j;

   int16_t out[4 * 4];

   int16_t *outptr = out;

-  const int half_pitch = pitch >> 1;

-  int i, j;

   int16_t temp_in[4], temp_out[4];

   const transform_2d ht = IHT_4[tx_type];

@@ -340,7 +339,7 @@

       temp_in[j] = out[j * 4 + i];

     ht.cols(temp_in, temp_out);

     for (j = 0; j < 4; ++j)

-      output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);

+      output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);

@@ -430,10 +429,9 @@

 void vp9_short_iht8x8_c(int16_t *input, int16_t *output,

                         int pitch, TX_TYPE tx_type) {

+  int i, j;

   int16_t out[8 * 8];

   int16_t *outptr = out;

-  const int half_pitch = pitch >> 1;

-  int i, j;

   int16_t temp_in[8], temp_out[8];

   const transform_2d ht = IHT_8[tx_type];

@@ -450,7 +448,7 @@

       temp_in[j] = out[j * 8 + i];

     ht.cols(temp_in, temp_out);

     for (j = 0; j < 8; ++j)

-      output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);

+      output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);

@@ -486,7 +484,7 @@

   output[0] = ROUND_POWER_OF_TWO(out, 5);

-void idct16_1d(int16_t *input, int16_t *output) {

+static void idct16_1d(int16_t *input, int16_t *output) {

   int16_t step1[16], step2[16];

   int temp1, temp2;

@@ -853,11 +851,10 @@

};

 void vp9_short_iht16x16_c(int16_t *input, int16_t *output,

-                          int pitch, TX_TYPE tx_type) {

+                          int input_pitch, TX_TYPE tx_type) {

+  int i, j;

   int16_t out[16 * 16];

   int16_t *outptr = out;

-  const int half_pitch = pitch >> 1;

-  int i, j;

   int16_t temp_in[16], temp_out[16];

   const transform_2d ht = IHT_16[tx_type];

@@ -864,7 +861,7 @@

   // Rows

   for (i = 0; i < 16; ++i) {

     ht.rows(input, outptr);

-    input += half_pitch;

+    input += input_pitch;

     outptr += 16;

@@ -912,7 +909,7 @@

   output[0] = ROUND_POWER_OF_TWO(out, 6);

-void idct32_1d(int16_t *input, int16_t *output) {

+static void idct32_1d(int16_t *input, int16_t *output) {

   int16_t step1[32], step2[32];

   int temp1, temp2;

--- a/vp9/common/vp9_invtrans.c

+++ b/vp9/common/vp9_invtrans.c

@@ -25,8 +25,7 @@

   for (i = 0; i < 16; i++) {

     TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff,

-                       32, tx_type);

+      vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);

     } else {

       vp9_inverse_transform_b_4x4(xd, i, 32);

@@ -58,8 +57,7 @@

   for (i = 0; i < 9; i += 8) {

     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,

-                           32, tx_type);

+      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);

     } else {

       vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],

                                   &blockd[i].diff[0], 32);

@@ -69,7 +67,7 @@

     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

       vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,

-                           32, tx_type);

+                           16, tx_type);

     } else {

       vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],

                                   &blockd[i].diff[0], 32);

@@ -101,7 +99,7 @@

   BLOCKD *bd = &xd->block[0];

   TX_TYPE tx_type = get_tx_type_16x16(xd, bd);

   if (tx_type != DCT_DCT) {

-    vp9_short_iht16x16(bd->dqcoeff, bd->diff, 32, tx_type);

+    vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);

   } else {

     vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],

                                   &xd->block[0].diff[0], 32);

--- a/vp9/decoder/vp9_dequantize.c

+++ b/vp9/decoder/vp9_dequantize.c

@@ -65,7 +65,7 @@

   for (i = 0; i < 16; i++)

     input[i] = dq[i] * input[i];

-  vp9_short_iht4x4(input, output, 8, tx_type);

+  vp9_short_iht4x4(input, output, 4, tx_type);

   vpx_memset(input, 0, 32);

   add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);

@@ -86,7 +86,7 @@

     for (i = 1; i < 64; i++)

       input[i] *= dq[1];

-    vp9_short_iht8x8(input, output, 16, tx_type);

+    vp9_short_iht8x8(input, output, 8, tx_type);

     vpx_memset(input, 0, 128);

     add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);

@@ -247,7 +247,7 @@

       input[i] *= dq[1];

     // inverse hybrid transform

-    vp9_short_iht16x16(input, output, 32, tx_type);

+    vp9_short_iht16x16(input, output, 16, tx_type);

     // the idct halves ( >> 1) the pitch

     // vp9_short_idct16x16_c(input, output, 32);

--- a/vp9/encoder/vp9_dct.c

+++ b/vp9/encoder/vp9_dct.c

@@ -105,7 +105,6 @@

                         int pitch, TX_TYPE tx_type) {

   int16_t out[4 * 4];

   int16_t *outptr = &out[0];

-  const int short_pitch = pitch >> 1;

   int i, j;

   int16_t temp_in[4], temp_out[4];

@@ -137,7 +136,7 @@

   // column transform

   for (i = 0; i < 4; ++i) {

     for (j = 0; j < 4; ++j)

-      temp_in[j] = input[j * short_pitch + i] << 4;

+      temp_in[j] = input[j * pitch + i] << 4;

     if (i == 0 && temp_in[0])

       temp_in[0] += 1;

     fwdc(temp_in, temp_out);

@@ -308,7 +307,6 @@

                         int pitch, TX_TYPE tx_type) {

   int16_t out[64];

   int16_t *outptr = &out[0];

-  const int short_pitch = pitch >> 1;

   int i, j;

   int16_t temp_in[8], temp_out[8];

@@ -339,7 +337,7 @@

   // column transform

   for (i = 0; i < 8; ++i) {

     for (j = 0; j < 8; ++j)

-      temp_in[j] = input[j * short_pitch + i] << 2;

+      temp_in[j] = input[j * pitch + i] << 2;

     fwdc(temp_in, temp_out);

     for (j = 0; j < 8; ++j)

       outptr[j * 8 + i] = temp_out[j];

@@ -697,7 +695,6 @@

                           int pitch, TX_TYPE tx_type) {

   int16_t out[256];

   int16_t *outptr = &out[0];

-  const int short_pitch = pitch >> 1;

   int i, j;

   int16_t temp_in[16], temp_out[16];

@@ -728,7 +725,7 @@

   // column transform

   for (i = 0; i < 16; ++i) {

     for (j = 0; j < 16; ++j)

-      temp_in[j] = input[j * short_pitch + i] << 2;

+      temp_in[j] = input[j * pitch + i] << 2;

     fwdc(temp_in, temp_out);

     for (j = 0; j < 16; ++j)

       outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

--- a/vp9/encoder/vp9_encodeintra.c

+++ b/vp9/encoder/vp9_encodeintra.c

@@ -54,9 +54,9 @@

   tx_type = get_tx_type_4x4(&x->e_mbd, b);

   if (tx_type != DCT_DCT) {

-    vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);

+    vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);

     vp9_ht_quantize_b_4x4(be, b, tx_type);

-    vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type);

+    vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);

   } else {

     x->fwd_txm4x4(be->src_diff, be->coeff, 32);

     x->quantize_b_4x4(be, b) ;

@@ -149,10 +149,10 @@

     tx_type = get_tx_type_8x8(xd, &xd->block[ib]);

     if (tx_type != DCT_DCT) {

-      vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type);

+      vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

       vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,

-                            32, tx_type);

+                            16, tx_type);

     } else {

       x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

@@ -164,9 +164,9 @@

       be = &x->block[ib + iblock[i]];

       tx_type = get_tx_type_4x4(xd, b);

       if (tx_type != DCT_DCT) {

-        vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);

+        vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);

         vp9_ht_quantize_b_4x4(be, b, tx_type);

-        vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type);

+        vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);

       } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {

         x->fwd_txm8x4(be->src_diff, be->coeff, 32);

         x->quantize_b_4x4_pair(be, be + 1, b, b + 1);

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -174,7 +174,7 @@

     BLOCK *b = &x->block[i];

     TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_short_fht4x4(b->src_diff, b->coeff, 32, tx_type);

+      vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type);

     } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {

       x->fwd_txm8x4(&x->block[i].src_diff[0],

                            &x->block[i].coeff[0], 32);

@@ -209,7 +209,7 @@

     BLOCK *b = &x->block[i];

     tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_short_fht8x8(b->src_diff, b->coeff, 32, tx_type);

+      vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type);

     } else {

       x->fwd_txm8x8(&x->block[i].src_diff[0],

                            &x->block[i].coeff[0], 32);

@@ -219,7 +219,7 @@

     BLOCK *b = &x->block[i];

     tx_type = get_tx_type_8x8(xd, &xd->block[i]);

     if (tx_type != DCT_DCT) {

-      vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 32, tx_type);

+      vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type);

     } else {

       x->fwd_txm8x8(&x->block[i].src_diff[0],

                            &x->block[i + 2].coeff[0], 32);

@@ -247,7 +247,7 @@

   TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);

   vp9_clear_system_state();

   if (tx_type != DCT_DCT) {

-    vp9_short_fht16x16(b->src_diff, b->coeff, 32, tx_type);

+    vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type);

   } else {

     x->fwd_txm16x16(&x->block[0].src_diff[0],

                            &x->block[0].coeff[0], 32);

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1013,7 +1013,7 @@

     b->bmi.as_mode.first = mode;

     tx_type = get_tx_type_4x4(xd, b);

     if (tx_type != DCT_DCT) {

-      vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);

+      vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);

       vp9_ht_quantize_b_4x4(be, b, tx_type);

     } else {

       x->fwd_txm4x4(be->src_diff, be->coeff, 32);

@@ -1046,7 +1046,7 @@

   // inverse transform

   if (best_tx_type != DCT_DCT)

-    vp9_short_iht4x4(best_dqcoeff, b->diff, 32, best_tx_type);

+    vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type);

   else

     xd->inv_txm4x4(best_dqcoeff, b->diff, 32);

@@ -1309,7 +1309,7 @@

     if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {

       TX_TYPE tx_type = get_tx_type_8x8(xd, b);

       if (tx_type != DCT_DCT)

-        vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type);

+        vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);

       else

         x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);

       x->quantize_b_8x8(x->block + idx, xd->block + idx);

@@ -1342,7 +1342,7 @@

         be = &x->block[ib + iblock[i]];

         tx_type = get_tx_type_4x4(xd, b);

         if (tx_type != DCT_DCT) {

-          vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);

+          vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);

           vp9_ht_quantize_b_4x4(be, b, tx_type);

         } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {

           x->fwd_txm8x4(be->src_diff, be->coeff, 32);

--

⑨