shithub: libvpx

Download patch

ref: 5f25d4c1752a40bafec3334bd0e4a1a0047c1d9d
parent: 5bfa29b6c54e31578cc377d49add31bddedc83a4
author: Scott LaVarnway <slavarnway@google.com>
date: Wed Jan 4 06:56:50 EST 2012

Reduced the size of Y1Dequant and friends to [128][2]

This patch removes the local copies of the dequantize
constants and implements John's idea as described
in "Make a local copy of the dequantized data" commit.

Change-Id: Ic6b7d681f00bf63263f71ff1e39ab2f80729e8b2

--- a/vp8/common/arm/dequantize_arm.c
+++ b/vp8/common/arm/dequantize_arm.c
@@ -23,11 +23,10 @@
 
 #if HAVE_ARMV7
 
-void vp8_dequantize_b_neon(BLOCKD *d)
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = d->dequant;
 
     vp8_dequantize_b_loop_neon(Q, DQC, DQ);
 }
@@ -34,11 +33,10 @@
 #endif
 
 #if HAVE_ARMV6
-void vp8_dequantize_b_v6(BLOCKD *d)
+void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = d->dequant;
 
     vp8_dequantize_b_loop_v6(Q, DQC, DQ);
 }
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -209,6 +209,11 @@
     DECLARE_ALIGNED(16, short, dqcoeff[400]);
     DECLARE_ALIGNED(16, char,  eobs[25]);
 
+    DECLARE_ALIGNED(16, short,  dequant_y1[16]);
+    DECLARE_ALIGNED(16, short,  dequant_y1_dc[16]);
+    DECLARE_ALIGNED(16, short,  dequant_y2[16]);
+    DECLARE_ALIGNED(16, short,  dequant_uv[16]);
+
     /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
     BLOCKD block[25];
     int fullpixel_mask;
--- a/vp8/common/dequantize.c
+++ b/vp8/common/dequantize.c
@@ -14,12 +14,11 @@
 #include "vp8/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
 
-void vp8_dequantize_b_c(BLOCKD *d)
+void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
 {
     int i;
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = d->dequant;
 
     for (i = 0; i < 16; i++)
     {
--- a/vp8/common/dequantize.h
+++ b/vp8/common/dequantize.h
@@ -14,7 +14,7 @@
 #include "vp8/common/blockd.h"
 
 #define prototype_dequant_block(sym) \
-    void sym(BLOCKD *x)
+    void sym(BLOCKD *x, short *DQC)
 
 #define prototype_dequant_idct_add(sym) \
     void sym(short *input, short *dq, \
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -36,14 +36,8 @@
 static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
                                       const VP8_COMMON_RTCD *rtcd)
 {
-    short *DQC = xd->block[0].dequant;
-    /* save the dc dequant constant in case it is overridden */
-    short dc_dequant_temp = DQC[0];
+    short *DQC = xd->dequant_y1;
 
-#if CONFIG_MULTITHREAD
-    DECLARE_ALIGNED(16, short, local_dequant[16]);
-#endif
-
     if (xd->mode_info_context->mbmi.mode != SPLITMV)
     {
         /* do 2nd order transform on the dc block */
@@ -59,22 +53,11 @@
         }
         eob_adjust(xd->eobs, xd->qcoeff);
 
-#if CONFIG_MULTITHREAD
-        DQC = local_dequant;
-
-        vpx_memcpy(DQC, xd->block[0].dequant,
-                   sizeof(local_dequant));
-#endif
-
-        /* override the dc dequant constant */
-        DQC[0] = 1;
+        DQC = xd->dequant_y1_dc;
     }
     DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)
                     (xd->qcoeff, DQC,
                      xd->dst.y_buffer,
                      xd->dst.y_stride, xd->eobs);
-
-    /* restore the dc dequant constant */
-    DQC[0] = dc_dequant_temp;
 }
 #endif
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -93,9 +93,9 @@
 {
     struct vpx_internal_error_info  error;
 
-    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
-    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
-    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
 
     int Width;
     int Height;
--- a/vp8/common/x86/idct_blk_mmx.c
+++ b/vp8/common/x86/idct_blk_mmx.c
@@ -14,12 +14,12 @@
 
 extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
 
-void vp8_dequantize_b_mmx(BLOCKD *d)
+void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
 {
     short *sq = (short *) d->qcoeff;
     short *dq = (short *) d->dqcoeff;
-    short *q = (short *) d->dequant;
-    vp8_dequantize_b_impl_mmx(sq, dq, q);
+
+    vp8_dequantize_b_impl_mmx(sq, dq, DQC);
 }
 
 void vp8_dequant_idct_add_y_block_mmx
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -42,7 +42,6 @@
 
 void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
 {
-    int i;
     int Q;
     VP8_COMMON *const pc = & pbi->common;
 
@@ -52,15 +51,9 @@
         pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
         pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
 
-        /* all the ac values = ; */
-        for (i = 1; i < 16; i++)
-        {
-            int rc = vp8_default_zig_zag1d[i];
-
-            pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
-            pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
-            pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
-        }
+        pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q);
+        pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+        pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
     }
 }
 
@@ -88,19 +81,19 @@
     else
         QIndex = pc->base_qindex;
 
-    /* Set up the block level dequant pointers */
-    for (i = 0; i < 16; i++)
-    {
-        xd->block[i].dequant = pc->Y1dequant[QIndex];
-    }
+    /* Set up the macroblock dequant constants */
+    xd->dequant_y1_dc[0] = 1;
+    xd->dequant_y1[0] = pc->Y1dequant[QIndex][0];
+    xd->dequant_y2[0] = pc->Y2dequant[QIndex][0];
+    xd->dequant_uv[0] = pc->UVdequant[QIndex][0];
 
-    for (i = 16; i < 24; i++)
+    for (i = 1; i < 16; i++)
     {
-        xd->block[i].dequant = pc->UVdequant[QIndex];
+        xd->dequant_y1_dc[i] =
+        xd->dequant_y1[i] = pc->Y1dequant[QIndex][1];
+        xd->dequant_y2[i] = pc->Y2dequant[QIndex][1];
+        xd->dequant_uv[i] = pc->UVdequant[QIndex][1];
     }
-
-    xd->block[24].dequant = pc->Y2dequant[QIndex];
-
 }
 
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -180,6 +173,8 @@
         }
         else
         {
+            short *DQC = xd->dequant_y1;
+
             /* clear out residual eob info */
             if(xd->mode_info_context->mbmi.mb_skip_coeff)
                 vpx_memset(xd->eobs, 0, 25);
@@ -200,13 +195,13 @@
                     if (xd->eobs[i] > 1)
                     {
                         DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
-                            (b->qcoeff, b->dequant,
+                            (b->qcoeff, DQC,
                             *(b->base_dst) + b->dst, b->dst_stride);
                     }
                     else
                     {
                         IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                            (b->qcoeff[0] * b->dequant[0],
+                            (b->qcoeff[0] * DQC[0],
                             *(b->base_dst) + b->dst, b->dst_stride,
                             *(b->base_dst) + b->dst, b->dst_stride);
                         ((int *)b->qcoeff)[0] = 0;
@@ -233,11 +228,8 @@
         /* dequantization and idct */
         if (mode != B_PRED)
         {
-            short *DQC = xd->block[0].dequant;
+            short *DQC = xd->dequant_y1;
 
-            /* save the dc dequant constant in case it is overridden */
-            short dc_dequant_temp = DQC[0];
-
             if (mode != SPLITMV)
             {
                 BLOCKD *b = &xd->block[24];
@@ -245,7 +237,8 @@
                 /* do 2nd order transform on the dc block */
                 if (xd->eobs[24] > 1)
                 {
-                    DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
+                    DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b,
+                        xd->dequant_y2);
 
                     IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
                         xd->qcoeff);
@@ -260,7 +253,7 @@
                 }
                 else
                 {
-                    b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
+                    b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
                     IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0],
                         xd->qcoeff);
                     ((int *)b->qcoeff)[0] = 0;
@@ -269,20 +262,17 @@
                 /* override the dc dequant constant in order to preserve the
                  * dc components
                  */
-                DQC[0] = 1;
+                DQC = xd->dequant_y1_dc;
             }
 
             DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
-                            (xd->qcoeff, xd->block[0].dequant,
+                            (xd->qcoeff, DQC,
                              xd->dst.y_buffer,
                              xd->dst.y_stride, xd->eobs);
-
-            /* restore the dc dequant constant */
-            DQC[0] = dc_dequant_temp;
         }
 
         DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
-                        (xd->qcoeff+16*16, xd->block[16].dequant,
+                        (xd->qcoeff+16*16, xd->dequant_uv,
                          xd->dst.u_buffer, xd->dst.v_buffer,
                          xd->dst.uv_stride, xd->eobs+16);
     }
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -37,7 +37,7 @@
 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
 {
     VP8_COMMON *const pc = & pbi->common;
-    int i, j;
+    int i;
 
     for (i = 0; i < count; i++)
     {
@@ -77,10 +77,10 @@
 
         mbd->current_bc = &pbi->bc2;
 
-        for (j = 0; j < 25; j++)
-        {
-            mbd->block[j].dequant = xd->block[j].dequant;
-        }
+        vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
+        vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
+        vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
+        vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
 
         mbd->fullpixel_mask = 0xffffffff;
         if(pc->full_pixel)
@@ -177,6 +177,8 @@
     /* dequantization and idct */
     if (xd->mode_info_context->mbmi.mode == B_PRED)
     {
+        short *DQC = xd->dequant_y1;
+
         for (i = 0; i < 16; i++)
         {
             BLOCKD *b = &xd->block[i];
@@ -190,13 +192,13 @@
                 if (xd->eobs[i] > 1)
                 {
                     DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
-                        (b->qcoeff, b->dequant,
+                        (b->qcoeff, DQC,
                         *(b->base_dst) + b->dst, b->dst_stride);
                 }
                 else
                 {
                     IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                        (b->qcoeff[0] * b->dequant[0],
+                        (b->qcoeff[0] * DQC[0],
                         *(b->base_dst) + b->dst, b->dst_stride,
                         *(b->base_dst) + b->dst, b->dst_stride);
                     ((int *)b->qcoeff)[0] = 0;
@@ -206,10 +208,8 @@
     }
     else
     {
-        short *DQC = xd->block[0].dequant;
+        short *DQC = xd->dequant_y1;
 
-        DECLARE_ALIGNED(16, short, local_dequant[16]);
-
         if (xd->mode_info_context->mbmi.mode != SPLITMV)
         {
             BLOCKD *b = &xd->block[24];
@@ -217,7 +217,7 @@
             /* do 2nd order transform on the dc block */
             if (xd->eobs[24] > 1)
             {
-                DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
+                DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b, xd->dequant_y2);
 
                 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
                     xd->qcoeff);
@@ -232,20 +232,13 @@
             }
             else
             {
-                b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
+                b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
                 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], xd->qcoeff);
                 ((int *)b->qcoeff)[0] = 0;
             }
 
-            /* make a local copy of the dequant constants */
-            vpx_memcpy(local_dequant, xd->block[0].dequant,
-                       sizeof(local_dequant));
-
             /* override the dc dequant constant */
-            local_dequant[0] = 1;
-
-            /* use the new dequant constants */
-            DQC = local_dequant;
+            DQC = xd->dequant_y1_dc;
         }
 
         DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
@@ -255,7 +248,7 @@
     }
 
     DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, xd->block[16].dequant,
+                    (xd->qcoeff+16*16, xd->dequant_uv,
                      xd->dst.u_buffer, xd->dst.v_buffer,
                      xd->dst.uv_stride, xd->eobs+16);
 }
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1120,7 +1120,7 @@
         vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
 
     DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, xd->block[16].dequant,
+                    (xd->qcoeff+16*16, xd->dequant_uv,
                      xd->dst.u_buffer, xd->dst.v_buffer,
                      xd->dst.uv_stride, xd->eobs+16);
     return rate;
@@ -1305,7 +1305,7 @@
             vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
 
         DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
-                        (xd->qcoeff+16*16, xd->block[16].dequant,
+                        (xd->qcoeff+16*16, xd->dequant_uv,
                          xd->dst.u_buffer, xd->dst.v_buffer,
                          xd->dst.uv_stride, xd->eobs+16);
     }
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -384,10 +384,22 @@
         zd->mb_segement_abs_delta      = xd->mb_segement_abs_delta;
         vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        for (i = 0; i < 25; i++)
-        {
-            zd->block[i].dequant = xd->block[i].dequant;
-        }
+        vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
+        vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
+        vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
+        vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
+
+#if 1
+        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until
+         * the quantizer code uses a passed in pointer to the dequant constants.
+         * This will also require modifications to the x86 and neon assembly.
+         * */
+        for (i = 0; i < 16; i++)
+            zd->block[i].dequant = zd->dequant_y1;
+        for (i = 16; i < 24; i++)
+            zd->block[i].dequant = zd->dequant_uv;
+        zd->block[24].dequant = zd->dequant_y2;
+#endif
     }
 }
 
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -504,7 +504,6 @@
             cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
             cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
             cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
-            cpi->common.Y1dequant[Q][i] = cpi->common.Y1dequant[Q][1];
             cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
                                              zbin_boost[i]) >> 7;
 
@@ -513,7 +512,6 @@
             cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
             cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
             cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
-            cpi->common.Y2dequant[Q][i] = cpi->common.Y2dequant[Q][1];
             cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
                                              zbin_boost[i]) >> 7;
 
@@ -522,7 +520,6 @@
             cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
             cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
             cpi->UVround[Q][i] = cpi->UVround[Q][1];
-            cpi->common.UVdequant[Q][i] = cpi->common.UVdequant[Q][1];
             cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
                                              zbin_boost[i]) >> 7;
         }
@@ -641,6 +638,31 @@
      */
     if (!ok_to_skip || QIndex != x->q_index)
     {
+
+        xd->dequant_y1_dc[0] = 1;
+        xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
+        xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
+        xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
+
+        for (i = 1; i < 16; i++)
+        {
+            xd->dequant_y1_dc[i] =
+            xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
+            xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
+            xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
+        }
+#if 1
+        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until
+         * the quantizer code uses a passed in pointer to the dequant constants.
+         * This will also require modifications to the x86 and neon assembly.
+         * */
+        for (i = 0; i < 16; i++)
+            x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex];
+        for (i = 16; i < 24; i++)
+            x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex];
+        x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex];
+#endif
+
         // Y
         zbin_extra = ZBIN_EXTRA_Y;
 
@@ -651,7 +673,6 @@
             x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
             x->block[i].zbin = cpi->Y1zbin[QIndex];
             x->block[i].round = cpi->Y1round[QIndex];
-            x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
             x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
             x->block[i].zbin_extra = (short)zbin_extra;
         }
@@ -666,7 +687,6 @@
             x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
             x->block[i].zbin = cpi->UVzbin[QIndex];
             x->block[i].round = cpi->UVround[QIndex];
-            x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
             x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
             x->block[i].zbin_extra = (short)zbin_extra;
         }
@@ -679,7 +699,6 @@
         x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
         x->block[24].zbin = cpi->Y2zbin[QIndex];
         x->block[24].round = cpi->Y2round[QIndex];
-        x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
         x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
         x->block[24].zbin_extra = (short)zbin_extra;
 
@@ -689,6 +708,9 @@
         cpi->last_zbin_over_quant = cpi->zbin_over_quant;
         cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
         x->last_act_zbin_adj = x->act_zbin_adj;
+
+
+
     }
     else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant
             || cpi->last_zbin_mode_boost != cpi->zbin_mode_boost