shithub: libvpx

--- a/vp8/encoder/arm/csystemdependent.c

+++ b/vp8/encoder/arm/csystemdependent.c

@@ -63,7 +63,7 @@

     cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;

     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;

-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;

+    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/

 #elif HAVE_ARMV6

     cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;

     cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;

--- a/vp8/encoder/block.h

+++ b/vp8/encoder/block.h

@@ -33,6 +33,7 @@

     // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries

     short(*quant)[4];

+    short(*quant_shift)[4];

     short(*zbin)[4];

     short(*zrun_zbin_boost);

     short(*round)[4];

--- a/vp8/encoder/encodeframe.c

+++ b/vp8/encoder/encodeframe.c

@@ -103,6 +103,18 @@

80,

};

+static void vp8cx_invert_quant(short *quant, short *shift, short d)

+{

+    unsigned t;

+    int l;

+    t = d;

+    for(l = 0; t > 1; l++)

+        t>>=1;

+    t = 1 + (1<<(16+l))/d;

+    *quant = (short)(t - (1<<16));

+    *shift = l;

+}

 void vp8cx_init_quantizer(VP8_COMP *cpi)

     int r, c;

@@ -116,7 +128,8 @@

         // dc values

         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);

-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;

+        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,

+                           cpi->Y1quant_shift[Q][0] + 0, quant_val);

         cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

         cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;

         cpi->common.Y1dequant[Q][0][0] = quant_val;

@@ -123,7 +136,8 @@

         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;

         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);

-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;

+        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,

+                           cpi->Y2quant_shift[Q][0] + 0, quant_val);

         cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

         cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;

         cpi->common.Y2dequant[Q][0][0] = quant_val;

@@ -130,7 +144,8 @@

         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;

         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);

-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;

+        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,

+                           cpi->UVquant_shift[Q][0] + 0, quant_val);

         cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;

         cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;

         cpi->common.UVdequant[Q][0][0] = quant_val;

@@ -144,7 +159,8 @@

             c = (rc & 3);

             quant_val = vp8_ac_yquant(Q);

-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;

+            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,

+                               cpi->Y1quant_shift[Q][r] + c, quant_val);

             cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

             cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;

             cpi->common.Y1dequant[Q][r][c] = quant_val;

@@ -151,7 +167,8 @@

             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;

             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);

-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;

+            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,

+                               cpi->Y2quant_shift[Q][r] + c, quant_val);

             cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

             cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;

             cpi->common.Y2dequant[Q][r][c] = quant_val;

@@ -158,7 +175,8 @@

             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;

             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);

-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;

+            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,

+                               cpi->UVquant_shift[Q][r] + c, quant_val);

             cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

             cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;

             cpi->common.UVdequant[Q][r][c] = quant_val;

@@ -198,6 +216,7 @@

     for (i = 0; i < 16; i++)

         x->block[i].quant = cpi->Y1quant[QIndex];

+        x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];

         x->block[i].zbin = cpi->Y1zbin[QIndex];

         x->block[i].round = cpi->Y1round[QIndex];

         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];

@@ -211,6 +230,7 @@

     for (i = 16; i < 24; i++)

         x->block[i].quant = cpi->UVquant[QIndex];

+        x->block[i].quant_shift = cpi->UVquant_shift[QIndex];

         x->block[i].zbin = cpi->UVzbin[QIndex];

         x->block[i].round = cpi->UVround[QIndex];

         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];

@@ -221,6 +241,7 @@

     // Y2

     zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;

     x->block[24].quant = cpi->Y2quant[QIndex];

+    x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];

     x->block[24].zbin = cpi->Y2zbin[QIndex];

     x->block[24].round = cpi->Y2round[QIndex];

     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];

--- a/vp8/encoder/ethreading.c

+++ b/vp8/encoder/ethreading.c

@@ -286,6 +286,7 @@

     for (i = 0; i < 25; i++)

         z->block[i].quant           = x->block[i].quant;

+        z->block[i].quant_shift     = x->block[i].quant_shift;

         z->block[i].zbin            = x->block[i].zbin;

         z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;

         z->block[i].round           = x->block[i].round;

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -234,14 +234,17 @@

     DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);

+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);

+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);

+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);

     DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);

--- a/vp8/encoder/quantize.c

+++ b/vp8/encoder/quantize.c

@@ -25,6 +25,7 @@

     short *zbin_ptr   = &b->zbin[0][0];

     short *round_ptr  = &b->round[0][0];

     short *quant_ptr  = &b->quant[0][0];

+    short *quant_shift_ptr = &b->quant_shift[0][0];

     short *qcoeff_ptr = d->qcoeff;

     short *dqcoeff_ptr = d->dqcoeff;

     short *dequant_ptr = &d->dequant[0][0];

@@ -45,7 +46,9 @@

         if (x >= zbin)

-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)

+            x += round_ptr[rc];

+            y  = (((x * quant_ptr[rc]) >> 16) + x)

+                 >> quant_shift_ptr[rc];                // quantize (x)

             x  = (y ^ sz) - sz;                         // get the sign back

             qcoeff_ptr[rc] = x;                          // write to destination

             dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value

@@ -69,6 +72,7 @@

     short *zbin_ptr   = &b->zbin[0][0];

     short *round_ptr  = &b->round[0][0];

     short *quant_ptr  = &b->quant[0][0];

+    short *quant_shift_ptr = &b->quant_shift[0][0];

     short *qcoeff_ptr = d->qcoeff;

     short *dqcoeff_ptr = d->dqcoeff;

     short *dequant_ptr = &d->dequant[0][0];

@@ -95,7 +99,9 @@

         if (x >= zbin)

-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)

+            x += round_ptr[rc];

+            y  = (((x * quant_ptr[rc]) >> 16) + x)

+                 >> quant_shift_ptr[rc];                // quantize (x)

             x  = (y ^ sz) - sz;                         // get the sign back

             qcoeff_ptr[rc]  = x;                         // write to destination

             dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value

--- a/vp8/encoder/x86/x86_csystemdependent.c

+++ b/vp8/encoder/x86/x86_csystemdependent.c

@@ -238,7 +238,7 @@

         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;

         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;

-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;

+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/

 #endif

@@ -285,8 +285,8 @@

         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;

         /* cpi->rtcd.encodemb.sub* not implemented for wmt */

-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;

-        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;

+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;

+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/

 #endif

--

⑨