shithub: libvpx

--- a/vp8/common/arm/dequantize_arm.c

+++ b/vp8/common/arm/dequantize_arm.c

@@ -23,11 +23,10 @@

 #if HAVE_ARMV7

-void vp8_dequantize_b_neon(BLOCKD *d)

+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)

     short *DQ  = d->dqcoeff;

     short *Q   = d->qcoeff;

-    short *DQC = d->dequant;

     vp8_dequantize_b_loop_neon(Q, DQC, DQ);

@@ -34,11 +33,10 @@

 #endif

 #if HAVE_ARMV6

-void vp8_dequantize_b_v6(BLOCKD *d)

+void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)

     short *DQ  = d->dqcoeff;

     short *Q   = d->qcoeff;

-    short *DQC = d->dequant;

     vp8_dequantize_b_loop_v6(Q, DQC, DQ);

--- a/vp8/common/blockd.h

+++ b/vp8/common/blockd.h

@@ -209,6 +209,11 @@

     DECLARE_ALIGNED(16, short, dqcoeff[400]);

     DECLARE_ALIGNED(16, char,  eobs[25]);

+    DECLARE_ALIGNED(16, short,  dequant_y1[16]);

+    DECLARE_ALIGNED(16, short,  dequant_y1_dc[16]);

+    DECLARE_ALIGNED(16, short,  dequant_y2[16]);

+    DECLARE_ALIGNED(16, short,  dequant_uv[16]);

     /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */

     BLOCKD block[25];

     int fullpixel_mask;

--- a/vp8/common/dequantize.c

+++ b/vp8/common/dequantize.c

@@ -14,12 +14,11 @@

 #include "vp8/common/idct.h"

 #include "vpx_mem/vpx_mem.h"

-void vp8_dequantize_b_c(BLOCKD *d)

+void vp8_dequantize_b_c(BLOCKD *d, short *DQC)

     int i;

     short *DQ  = d->dqcoeff;

     short *Q   = d->qcoeff;

-    short *DQC = d->dequant;

     for (i = 0; i < 16; i++)

--- a/vp8/common/dequantize.h

+++ b/vp8/common/dequantize.h

@@ -14,7 +14,7 @@

 #include "vp8/common/blockd.h"

 #define prototype_dequant_block(sym) \

-    void sym(BLOCKD *x)

+    void sym(BLOCKD *x, short *DQC)

 #define prototype_dequant_idct_add(sym) \

     void sym(short *input, short *dq, \

--- a/vp8/common/invtrans.h

+++ b/vp8/common/invtrans.h

@@ -36,14 +36,8 @@

 static void vp8_inverse_transform_mby(MACROBLOCKD *xd,

                                       const VP8_COMMON_RTCD *rtcd)

-    short *DQC = xd->block[0].dequant;

-    /* save the dc dequant constant in case it is overridden */

-    short dc_dequant_temp = DQC[0];

+    short *DQC = xd->dequant_y1;

-#if CONFIG_MULTITHREAD

-    DECLARE_ALIGNED(16, short, local_dequant[16]);

-#endif

     if (xd->mode_info_context->mbmi.mode != SPLITMV)

         /* do 2nd order transform on the dc block */

@@ -59,22 +53,11 @@

         eob_adjust(xd->eobs, xd->qcoeff);

-#if CONFIG_MULTITHREAD

-        DQC = local_dequant;

-        vpx_memcpy(DQC, xd->block[0].dequant,

-                   sizeof(local_dequant));

-#endif

-        /* override the dc dequant constant */

-        DQC[0] = 1;

+        DQC = xd->dequant_y1_dc;

     DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)

                     (xd->qcoeff, DQC,

                      xd->dst.y_buffer,

                      xd->dst.y_stride, xd->eobs);

-    /* restore the dc dequant constant */

-    DQC[0] = dc_dequant_temp;

 #endif

--- a/vp8/common/onyxc_int.h

+++ b/vp8/common/onyxc_int.h

@@ -93,9 +93,9 @@

     struct vpx_internal_error_info  error;

-    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);

-    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);

-    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);

+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);

+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);

+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);

     int Width;

     int Height;

--- a/vp8/common/x86/idct_blk_mmx.c

+++ b/vp8/common/x86/idct_blk_mmx.c

@@ -14,12 +14,12 @@

 extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);

-void vp8_dequantize_b_mmx(BLOCKD *d)

+void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)

     short *sq = (short *) d->qcoeff;

     short *dq = (short *) d->dqcoeff;

-    short *q = (short *) d->dequant;

-    vp8_dequantize_b_impl_mmx(sq, dq, q);

+    vp8_dequantize_b_impl_mmx(sq, dq, DQC);

 void vp8_dequant_idct_add_y_block_mmx

--- a/vp8/decoder/decodframe.c

+++ b/vp8/decoder/decodframe.c

@@ -42,7 +42,6 @@

 void vp8cx_init_de_quantizer(VP8D_COMP *pbi)

-    int i;

     int Q;

     VP8_COMMON *const pc = & pbi->common;

@@ -52,15 +51,9 @@

         pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);

         pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);

-        /* all the ac values = ; */

-        for (i = 1; i < 16; i++)

-        {

-            int rc = vp8_default_zig_zag1d[i];

-            pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);

-            pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);

-            pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);

-        }

+        pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q);

+        pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);

+        pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);

@@ -88,19 +81,19 @@

     else

         QIndex = pc->base_qindex;

-    /* Set up the block level dequant pointers */

-    for (i = 0; i < 16; i++)

-    {

-        xd->block[i].dequant = pc->Y1dequant[QIndex];

-    }

+    /* Set up the macroblock dequant constants */

+    xd->dequant_y1_dc[0] = 1;

+    xd->dequant_y1[0] = pc->Y1dequant[QIndex][0];

+    xd->dequant_y2[0] = pc->Y2dequant[QIndex][0];

+    xd->dequant_uv[0] = pc->UVdequant[QIndex][0];

-    for (i = 16; i < 24; i++)

+    for (i = 1; i < 16; i++)

-        xd->block[i].dequant = pc->UVdequant[QIndex];

+        xd->dequant_y1_dc[i] =

+        xd->dequant_y1[i] = pc->Y1dequant[QIndex][1];

+        xd->dequant_y2[i] = pc->Y2dequant[QIndex][1];

+        xd->dequant_uv[i] = pc->UVdequant[QIndex][1];

-    xd->block[24].dequant = pc->Y2dequant[QIndex];

 #if CONFIG_RUNTIME_CPU_DETECT

@@ -180,6 +173,8 @@

         else

+            short *DQC = xd->dequant_y1;

             /* clear out residual eob info */

             if(xd->mode_info_context->mbmi.mb_skip_coeff)

                 vpx_memset(xd->eobs, 0, 25);

@@ -200,13 +195,13 @@

                     if (xd->eobs[i] > 1)

                         DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)

-                            (b->qcoeff, b->dequant,

+                            (b->qcoeff, DQC,

                             *(b->base_dst) + b->dst, b->dst_stride);

                     else

                         IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)

-                            (b->qcoeff[0] * b->dequant[0],

+                            (b->qcoeff[0] * DQC[0],

                             *(b->base_dst) + b->dst, b->dst_stride,

                             *(b->base_dst) + b->dst, b->dst_stride);

                         ((int *)b->qcoeff)[0] = 0;

@@ -233,11 +228,8 @@

         /* dequantization and idct */

         if (mode != B_PRED)

-            short *DQC = xd->block[0].dequant;

+            short *DQC = xd->dequant_y1;

-            /* save the dc dequant constant in case it is overridden */

-            short dc_dequant_temp = DQC[0];

             if (mode != SPLITMV)

                 BLOCKD *b = &xd->block[24];

@@ -245,7 +237,8 @@

                 /* do 2nd order transform on the dc block */

                 if (xd->eobs[24] > 1)

-                    DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);

+                    DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b,

+                        xd->dequant_y2);

                     IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],

                         xd->qcoeff);

@@ -260,7 +253,7 @@

                 else

-                    b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];

+                    b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];

                     IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0],

                         xd->qcoeff);

                     ((int *)b->qcoeff)[0] = 0;

@@ -269,20 +262,17 @@

                 /* override the dc dequant constant in order to preserve the

                  * dc components

*/

-                DQC[0] = 1;

+                DQC = xd->dequant_y1_dc;

             DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)

-                            (xd->qcoeff, xd->block[0].dequant,

+                            (xd->qcoeff, DQC,

                              xd->dst.y_buffer,

                              xd->dst.y_stride, xd->eobs);

-            /* restore the dc dequant constant */

-            DQC[0] = dc_dequant_temp;

         DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)

-                        (xd->qcoeff+16*16, xd->block[16].dequant,

+                        (xd->qcoeff+16*16, xd->dequant_uv,

                          xd->dst.u_buffer, xd->dst.v_buffer,

                          xd->dst.uv_stride, xd->eobs+16);

--- a/vp8/decoder/threading.c

+++ b/vp8/decoder/threading.c

@@ -37,7 +37,7 @@

 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)

     VP8_COMMON *const pc = & pbi->common;

-    int i, j;

+    int i;

     for (i = 0; i < count; i++)

@@ -77,10 +77,10 @@

         mbd->current_bc = &pbi->bc2;

-        for (j = 0; j < 25; j++)

-        {

-            mbd->block[j].dequant = xd->block[j].dequant;

-        }

+        vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));

+        vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));

+        vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));

+        vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));

         mbd->fullpixel_mask = 0xffffffff;

         if(pc->full_pixel)

@@ -177,6 +177,8 @@

     /* dequantization and idct */

     if (xd->mode_info_context->mbmi.mode == B_PRED)

+        short *DQC = xd->dequant_y1;

         for (i = 0; i < 16; i++)

             BLOCKD *b = &xd->block[i];

@@ -190,13 +192,13 @@

                 if (xd->eobs[i] > 1)

                     DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)

-                        (b->qcoeff, b->dequant,

+                        (b->qcoeff, DQC,

                         *(b->base_dst) + b->dst, b->dst_stride);

                 else

                     IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)

-                        (b->qcoeff[0] * b->dequant[0],

+                        (b->qcoeff[0] * DQC[0],

                         *(b->base_dst) + b->dst, b->dst_stride,

                         *(b->base_dst) + b->dst, b->dst_stride);

                     ((int *)b->qcoeff)[0] = 0;

@@ -206,10 +208,8 @@

     else

-        short *DQC = xd->block[0].dequant;

+        short *DQC = xd->dequant_y1;

-        DECLARE_ALIGNED(16, short, local_dequant[16]);

         if (xd->mode_info_context->mbmi.mode != SPLITMV)

             BLOCKD *b = &xd->block[24];

@@ -217,7 +217,7 @@

             /* do 2nd order transform on the dc block */

             if (xd->eobs[24] > 1)

-                DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);

+                DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b, xd->dequant_y2);

                 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],

                     xd->qcoeff);

@@ -232,20 +232,13 @@

             else

-                b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];

+                b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];

                 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], xd->qcoeff);

                 ((int *)b->qcoeff)[0] = 0;

-            /* make a local copy of the dequant constants */

-            vpx_memcpy(local_dequant, xd->block[0].dequant,

-                       sizeof(local_dequant));

             /* override the dc dequant constant */

-            local_dequant[0] = 1;

-            /* use the new dequant constants */

-            DQC = local_dequant;

+            DQC = xd->dequant_y1_dc;

         DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)

@@ -255,7 +248,7 @@

     DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)

-                    (xd->qcoeff+16*16, xd->block[16].dequant,

+                    (xd->qcoeff+16*16, xd->dequant_uv,

                      xd->dst.u_buffer, xd->dst.v_buffer,

                      xd->dst.uv_stride, xd->eobs+16);

--- a/vp8/encoder/encodeframe.c

+++ b/vp8/encoder/encodeframe.c

@@ -1120,7 +1120,7 @@

         vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));

     DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)

-                    (xd->qcoeff+16*16, xd->block[16].dequant,

+                    (xd->qcoeff+16*16, xd->dequant_uv,

                      xd->dst.u_buffer, xd->dst.v_buffer,

                      xd->dst.uv_stride, xd->eobs+16);

     return rate;

@@ -1305,7 +1305,7 @@

             vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));

         DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)

-                        (xd->qcoeff+16*16, xd->block[16].dequant,

+                        (xd->qcoeff+16*16, xd->dequant_uv,

                          xd->dst.u_buffer, xd->dst.v_buffer,

                          xd->dst.uv_stride, xd->eobs+16);

--- a/vp8/encoder/ethreading.c

+++ b/vp8/encoder/ethreading.c

@@ -384,10 +384,22 @@

         zd->mb_segement_abs_delta      = xd->mb_segement_abs_delta;

         vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));

-        for (i = 0; i < 25; i++)

-        {

-            zd->block[i].dequant = xd->block[i].dequant;

-        }

+        vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));

+        vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));

+        vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));

+        vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));

+#if 1

+        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until

+         * the quantizer code uses a passed in pointer to the dequant constants.

+         * This will also require modifications to the x86 and neon assembly.

+         * */

+        for (i = 0; i < 16; i++)

+            zd->block[i].dequant = zd->dequant_y1;

+        for (i = 16; i < 24; i++)

+            zd->block[i].dequant = zd->dequant_uv;

+        zd->block[24].dequant = zd->dequant_y2;

+#endif

--- a/vp8/encoder/quantize.c

+++ b/vp8/encoder/quantize.c

@@ -504,7 +504,6 @@

             cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];

             cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];

             cpi->Y1round[Q][i] = cpi->Y1round[Q][1];

-            cpi->common.Y1dequant[Q][i] = cpi->common.Y1dequant[Q][1];

             cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *

                                              zbin_boost[i]) >> 7;

@@ -513,7 +512,6 @@

             cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];

             cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];

             cpi->Y2round[Q][i] = cpi->Y2round[Q][1];

-            cpi->common.Y2dequant[Q][i] = cpi->common.Y2dequant[Q][1];

             cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *

                                              zbin_boost[i]) >> 7;

@@ -522,7 +520,6 @@

             cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];

             cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];

             cpi->UVround[Q][i] = cpi->UVround[Q][1];

-            cpi->common.UVdequant[Q][i] = cpi->common.UVdequant[Q][1];

             cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *

                                              zbin_boost[i]) >> 7;

@@ -641,6 +638,31 @@

*/

     if (!ok_to_skip || QIndex != x->q_index)

+        xd->dequant_y1_dc[0] = 1;

+        xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];

+        xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];

+        xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];

+        for (i = 1; i < 16; i++)

+        {

+            xd->dequant_y1_dc[i] =

+            xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];

+            xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];

+            xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];

+        }

+#if 1

+        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until

+         * the quantizer code uses a passed in pointer to the dequant constants.

+         * This will also require modifications to the x86 and neon assembly.

+         * */

+        for (i = 0; i < 16; i++)

+            x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex];

+        for (i = 16; i < 24; i++)

+            x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex];

+        x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex];

+#endif

         // Y

         zbin_extra = ZBIN_EXTRA_Y;

@@ -651,7 +673,6 @@

             x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];

             x->block[i].zbin = cpi->Y1zbin[QIndex];

             x->block[i].round = cpi->Y1round[QIndex];

-            x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];

             x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];

             x->block[i].zbin_extra = (short)zbin_extra;

@@ -666,7 +687,6 @@

             x->block[i].quant_shift = cpi->UVquant_shift[QIndex];

             x->block[i].zbin = cpi->UVzbin[QIndex];

             x->block[i].round = cpi->UVround[QIndex];

-            x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];

             x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];

             x->block[i].zbin_extra = (short)zbin_extra;

@@ -679,7 +699,6 @@

         x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];

         x->block[24].zbin = cpi->Y2zbin[QIndex];

         x->block[24].round = cpi->Y2round[QIndex];

-        x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];

         x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];

         x->block[24].zbin_extra = (short)zbin_extra;

@@ -689,6 +708,9 @@

         cpi->last_zbin_over_quant = cpi->zbin_over_quant;

         cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;

         x->last_act_zbin_adj = x->act_zbin_adj;

     else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant

             || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost

--

⑨