shithub: libvpx

--- a/vp8/encoder/encodeintra.c

+++ b/vp8/encoder/encodeintra.c

@@ -12,7 +12,7 @@

 #include "vpx_config.h"

 #include "vp8_rtcd.h"

 #include "./vpx_dsp_rtcd.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "vp8/common/reconintra4x4.h"

 #include "encodemb.h"

 #include "vp8/common/invtrans.h"

--- a/vp8/encoder/encodemb.c

+++ b/vp8/encoder/encodemb.c

@@ -14,7 +14,7 @@

 #include "vp8_rtcd.h"

 #include "encodemb.h"

 #include "vp8/common/reconinter.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "tokenize.h"

 #include "vp8/common/invtrans.h"

 #include "vpx_mem/vpx_mem.h"

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -17,7 +17,7 @@

 #include "vp8/common/blockd.h"

 #include "onyx_int.h"

 #include "vp8/common/systemdependent.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "vp8/common/alloccommon.h"

 #include "mcomp.h"

 #include "firstpass.h"

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -20,7 +20,7 @@

 #include "vp8/common/onyxc_int.h"

 #include "vpx_dsp/variance.h"

 #include "encodemb.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "vp8/common/entropy.h"

 #include "vp8/common/threading.h"

 #include "vpx_ports/mem.h"

--- a/vp8/encoder/picklpf.c

+++ b/vp8/encoder/picklpf.c

@@ -13,7 +13,7 @@

 #include "./vpx_scale_rtcd.h"

 #include "vp8/common/onyxc_int.h"

 #include "onyx_int.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vpx_scale/vpx_scale.h"

 #include "vp8/common/alloccommon.h"

--- a/vp8/encoder/quantize.c

+++ /dev/null

@@ -1,583 +1,0 @@

-/*

- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include <math.h>

-#include "vpx_mem/vpx_mem.h"

-#include "onyx_int.h"

-#include "quantize.h"

-#include "vp8/common/quant_common.h"

-void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)

-{

-    int i, rc, eob;

-    int x, y, z, sz;

-    short *coeff_ptr   = b->coeff;

-    short *round_ptr   = b->round;

-    short *quant_ptr   = b->quant_fast;

-    short *qcoeff_ptr  = d->qcoeff;

-    short *dqcoeff_ptr = d->dqcoeff;

-    short *dequant_ptr = d->dequant;

-    eob = -1;

-    for (i = 0; i < 16; i++)

-    {

-        rc   = vp8_default_zig_zag1d[i];

-        z    = coeff_ptr[rc];

-        sz = (z >> 31);                              /* sign of z */

-        x  = (z ^ sz) - sz;                          /* x = abs(z) */

-        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */

-        x  = (y ^ sz) - sz;                          /* get the sign back */

-        qcoeff_ptr[rc] = x;                          /* write to destination */

-        dqcoeff_ptr[rc] = x * dequant_ptr[rc];       /* dequantized value */

-        if (y)

-        {

-            eob = i;                                 /* last nonzero coeffs */

-        }

-    }

-    *d->eob = (char)(eob + 1);

-}

-void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)

-{

-    int i, rc, eob;

-    int zbin;

-    int x, y, z, sz;

-    short *zbin_boost_ptr  = b->zrun_zbin_boost;

-    short *coeff_ptr       = b->coeff;

-    short *zbin_ptr        = b->zbin;

-    short *round_ptr       = b->round;

-    short *quant_ptr       = b->quant;

-    short *quant_shift_ptr = b->quant_shift;

-    short *qcoeff_ptr      = d->qcoeff;

-    short *dqcoeff_ptr     = d->dqcoeff;

-    short *dequant_ptr     = d->dequant;

-    short zbin_oq_value    = b->zbin_extra;

-    memset(qcoeff_ptr, 0, 32);

-    memset(dqcoeff_ptr, 0, 32);

-    eob = -1;

-    for (i = 0; i < 16; i++)

-    {

-        rc   = vp8_default_zig_zag1d[i];

-        z    = coeff_ptr[rc];

-        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;

-        zbin_boost_ptr ++;

-        sz = (z >> 31);                              /* sign of z */

-        x  = (z ^ sz) - sz;                          /* x = abs(z) */

-        if (x >= zbin)

-        {

-            x += round_ptr[rc];

-            y  = ((((x * quant_ptr[rc]) >> 16) + x)

-                 * quant_shift_ptr[rc]) >> 16;       /* quantize (x) */

-            x  = (y ^ sz) - sz;                      /* get the sign back */

-            qcoeff_ptr[rc]  = x;                     /* write to destination */

-            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */

-            if (y)

-            {

-                eob = i;                             /* last nonzero coeffs */

-                zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */

-            }

-        }

-    }

-    *d->eob = (char)(eob + 1);

-}

-void vp8_quantize_mby(MACROBLOCK *x)

-{

-    int i;

-    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED

-        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);

-    for (i = 0; i < 16; i++)

-        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);

-    if(has_2nd_order)

-        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);

-}

-void vp8_quantize_mb(MACROBLOCK *x)

-{

-    int i;

-    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED

-        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);

-    for (i = 0; i < 24+has_2nd_order; i++)

-        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);

-}

-void vp8_quantize_mbuv(MACROBLOCK *x)

-{

-    int i;

-    for (i = 16; i < 24; i++)

-        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);

-}

-static const int qrounding_factors[129] =

-{

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48

-};

-static const int qzbin_factors[129] =

-{

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80

-};

-static const int qrounding_factors_y2[129] =

-{

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48, 48, 48, 48, 48, 48, 48, 48,

-    48

-};

-static const int qzbin_factors_y2[129] =

-{

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    84, 84, 84, 84, 84, 84, 84, 84,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80, 80, 80, 80, 80, 80, 80, 80,

-    80

-};

-static void invert_quant(int improved_quant, short *quant,

-                         short *shift, short d)

-{

-    if(improved_quant)

-    {

-        unsigned t;

-        int l;

-        t = d;

-        for(l = 0; t > 1; l++)

-            t>>=1;

-        t = 1 + (1<<(16+l))/d;

-        *quant = (short)(t - (1<<16));

-        *shift = l;

-        /* use multiplication and constant shift by 16 */

-        *shift = 1 << (16 - *shift);

-    }

-    else

-    {

-        *quant = (1 << 16) / d;

-        *shift = 0;

-        /* use multiplication and constant shift by 16 */

-        *shift = 1 << (16 - *shift);

-    }

-}

-void vp8cx_init_quantizer(VP8_COMP *cpi)

-{

-    int i;

-    int quant_val;

-    int Q;

-    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,

-                          44, 44};

-    for (Q = 0; Q < QINDEX_RANGE; Q++)

-    {

-        /* dc values */

-        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);

-        cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;

-        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,

-                     cpi->Y1quant_shift[Q] + 0, quant_val);

-        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

-        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;

-        cpi->common.Y1dequant[Q][0] = quant_val;

-        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;

-        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);

-        cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;

-        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,

-                     cpi->Y2quant_shift[Q] + 0, quant_val);

-        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;

-        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;

-        cpi->common.Y2dequant[Q][0] = quant_val;

-        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;

-        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);

-        cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;

-        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,

-                     cpi->UVquant_shift[Q] + 0, quant_val);

-        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;

-        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;

-        cpi->common.UVdequant[Q][0] = quant_val;

-        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;

-        /* all the ac values = ; */

-        quant_val = vp8_ac_yquant(Q);

-        cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;

-        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,

-                     cpi->Y1quant_shift[Q] + 1, quant_val);

-        cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

-        cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;

-        cpi->common.Y1dequant[Q][1] = quant_val;

-        cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;

-        quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);

-        cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;

-        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,

-                     cpi->Y2quant_shift[Q] + 1, quant_val);

-        cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;

-        cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;

-        cpi->common.Y2dequant[Q][1] = quant_val;

-        cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;

-        quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);

-        cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;

-        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,

-                     cpi->UVquant_shift[Q] + 1, quant_val);

-        cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

-        cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;

-        cpi->common.UVdequant[Q][1] = quant_val;

-        cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;

-        for (i = 2; i < 16; i++)

-        {

-            cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];

-            cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];

-            cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];

-            cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];

-            cpi->Y1round[Q][i] = cpi->Y1round[Q][1];

-            cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *

-                                             zbin_boost[i]) >> 7;

-            cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];

-            cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];

-            cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];

-            cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];

-            cpi->Y2round[Q][i] = cpi->Y2round[Q][1];

-            cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *

-                                             zbin_boost[i]) >> 7;

-            cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];

-            cpi->UVquant[Q][i] = cpi->UVquant[Q][1];

-            cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];

-            cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];

-            cpi->UVround[Q][i] = cpi->UVround[Q][1];

-            cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *

-                                             zbin_boost[i]) >> 7;

-        }

-    }

-}

-#define ZBIN_EXTRA_Y \

-    (( cpi->common.Y1dequant[QIndex][1] *  \

-    ( x->zbin_over_quant +  \

-      x->zbin_mode_boost +  \

-      x->act_zbin_adj ) ) >> 7)

-#define ZBIN_EXTRA_UV \

-    (( cpi->common.UVdequant[QIndex][1] *  \

-    ( x->zbin_over_quant +  \

-      x->zbin_mode_boost +  \

-      x->act_zbin_adj ) ) >> 7)

-#define ZBIN_EXTRA_Y2 \

-    (( cpi->common.Y2dequant[QIndex][1] *  \

-    ( (x->zbin_over_quant / 2) +  \

-       x->zbin_mode_boost +  \

-       x->act_zbin_adj ) ) >> 7)

-void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)

-{

-    int i;

-    int QIndex;

-    MACROBLOCKD *xd = &x->e_mbd;

-    int zbin_extra;

-    /* Select the baseline MB Q index. */

-    if (xd->segmentation_enabled)

-    {

-        /* Abs Value */

-        if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)

-            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];

-        /* Delta Value */

-        else

-        {

-            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];

-            /* Clamp to valid range */

-            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;

-        }

-    }

-    else

-        QIndex = cpi->common.base_qindex;

-    /* This initialization should be called at least once. Use ok_to_skip to

-     * decide if it is ok to skip.

-     * Before encoding a frame, this function is always called with ok_to_skip

-     * =0, which means no skiping of calculations. The "last" values are

-     * initialized at that time.

-     */

-    if (!ok_to_skip || QIndex != x->q_index)

-    {

-        xd->dequant_y1_dc[0] = 1;

-        xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];

-        xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];

-        xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];

-        for (i = 1; i < 16; i++)

-        {

-            xd->dequant_y1_dc[i] =

-            xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];

-            xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];

-            xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];

-        }

-#if 1

-        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until

-         * the quantizer code uses a passed in pointer to the dequant constants.

-         * This will also require modifications to the x86 and neon assembly.

-         * */

-        for (i = 0; i < 16; i++)

-            x->e_mbd.block[i].dequant = xd->dequant_y1;

-        for (i = 16; i < 24; i++)

-            x->e_mbd.block[i].dequant = xd->dequant_uv;

-        x->e_mbd.block[24].dequant = xd->dequant_y2;

-#endif

-        /* Y */

-        zbin_extra = ZBIN_EXTRA_Y;

-        for (i = 0; i < 16; i++)

-        {

-            x->block[i].quant = cpi->Y1quant[QIndex];

-            x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];

-            x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];

-            x->block[i].zbin = cpi->Y1zbin[QIndex];

-            x->block[i].round = cpi->Y1round[QIndex];

-            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];

-            x->block[i].zbin_extra = (short)zbin_extra;

-        }

-        /* UV */

-        zbin_extra = ZBIN_EXTRA_UV;

-        for (i = 16; i < 24; i++)

-        {

-            x->block[i].quant = cpi->UVquant[QIndex];

-            x->block[i].quant_fast = cpi->UVquant_fast[QIndex];

-            x->block[i].quant_shift = cpi->UVquant_shift[QIndex];

-            x->block[i].zbin = cpi->UVzbin[QIndex];

-            x->block[i].round = cpi->UVround[QIndex];

-            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];

-            x->block[i].zbin_extra = (short)zbin_extra;

-        }

-        /* Y2 */

-        zbin_extra = ZBIN_EXTRA_Y2;

-        x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];

-        x->block[24].quant = cpi->Y2quant[QIndex];

-        x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];

-        x->block[24].zbin = cpi->Y2zbin[QIndex];

-        x->block[24].round = cpi->Y2round[QIndex];

-        x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];

-        x->block[24].zbin_extra = (short)zbin_extra;

-        /* save this macroblock QIndex for vp8_update_zbin_extra() */

-        x->q_index = QIndex;

-        x->last_zbin_over_quant = x->zbin_over_quant;

-        x->last_zbin_mode_boost = x->zbin_mode_boost;

-        x->last_act_zbin_adj = x->act_zbin_adj;

-    }

-    else if(x->last_zbin_over_quant != x->zbin_over_quant

-            || x->last_zbin_mode_boost != x->zbin_mode_boost

-            || x->last_act_zbin_adj != x->act_zbin_adj)

-    {

-        /* Y */

-        zbin_extra = ZBIN_EXTRA_Y;

-        for (i = 0; i < 16; i++)

-            x->block[i].zbin_extra = (short)zbin_extra;

-        /* UV */

-        zbin_extra = ZBIN_EXTRA_UV;

-        for (i = 16; i < 24; i++)

-            x->block[i].zbin_extra = (short)zbin_extra;

-        /* Y2 */

-        zbin_extra = ZBIN_EXTRA_Y2;

-        x->block[24].zbin_extra = (short)zbin_extra;

-        x->last_zbin_over_quant = x->zbin_over_quant;

-        x->last_zbin_mode_boost = x->zbin_mode_boost;

-        x->last_act_zbin_adj = x->act_zbin_adj;

-    }

-}

-void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)

-{

-    int i;

-    int QIndex = x->q_index;

-    int zbin_extra;

-    /* Y */

-    zbin_extra = ZBIN_EXTRA_Y;

-    for (i = 0; i < 16; i++)

-        x->block[i].zbin_extra = (short)zbin_extra;

-    /* UV */

-    zbin_extra = ZBIN_EXTRA_UV;

-    for (i = 16; i < 24; i++)

-        x->block[i].zbin_extra = (short)zbin_extra;

-    /* Y2 */

-    zbin_extra = ZBIN_EXTRA_Y2;

-    x->block[24].zbin_extra = (short)zbin_extra;

-}

-#undef ZBIN_EXTRA_Y

-#undef ZBIN_EXTRA_UV

-#undef ZBIN_EXTRA_Y2

-void vp8cx_frame_init_quantizer(VP8_COMP *cpi)

-{

-    /* Clear Zbin mode boost for default case */

-    cpi->mb.zbin_mode_boost = 0;

-    /* MB level quantizer setup */

-    vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);

-}

-void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)

-{

-    VP8_COMMON *cm = &cpi->common;

-    MACROBLOCKD *mbd = &cpi->mb.e_mbd;

-    int update = 0;

-    int new_delta_q;

-    int new_uv_delta_q;

-    cm->base_qindex = Q;

-    /* if any of the delta_q values are changing update flag has to be set */

-    /* currently only y2dc_delta_q may change */

-    cm->y1dc_delta_q = 0;

-    cm->y2ac_delta_q = 0;

-    if (Q < 4)

-    {

-        new_delta_q = 4-Q;

-    }

-    else

-        new_delta_q = 0;

-    update |= cm->y2dc_delta_q != new_delta_q;

-    cm->y2dc_delta_q = new_delta_q;

-    new_uv_delta_q = 0;

-    // For screen content, lower the q value for UV channel. For now, select

-    // conservative delta; same delta for dc and ac, and decrease it with lower

-    // Q, and set to 0 below some threshold. May want to condition this in

-    // future on the variance/energy in UV channel.

-    if (cpi->oxcf.screen_content_mode && Q > 40) {

-      new_uv_delta_q = -(int)(0.15 * Q);

-      // Check range: magnitude of delta is 4 bits.

-      if (new_uv_delta_q < -15) {

-        new_uv_delta_q = -15;

-      }

-    }

-    update |= cm->uvdc_delta_q != new_uv_delta_q;

-    cm->uvdc_delta_q = new_uv_delta_q;

-    cm->uvac_delta_q = new_uv_delta_q;

-    /* Set Segment specific quatizers */

-    mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];

-    mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];

-    mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];

-    mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];

-    /* quantizer has to be reinitialized for any delta_q changes */

-    if(update)

-        vp8cx_init_quantizer(cpi);

-}

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -28,7 +28,7 @@

 #include "vp8/common/findnearmv.h"

 #include "vp8/common/quant_common.h"

 #include "encodemb.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "vpx_dsp/variance.h"

 #include "mcomp.h"

 #include "rdopt.h"

--- a/vp8/encoder/temporal_filter.c

+++ b/vp8/encoder/temporal_filter.c

@@ -12,7 +12,7 @@

 #include "vp8/common/onyxc_int.h"

 #include "onyx_int.h"

 #include "vp8/common/systemdependent.h"

-#include "quantize.h"

+#include "vp8/encoder/quantize.h"

 #include "vp8/common/alloccommon.h"

 #include "mcomp.h"

 #include "firstpass.h"

--- /dev/null

+++ b/vp8/encoder/vp8_quantize.c

@@ -1,0 +1,583 @@

+/*

+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <math.h>

+#include "vpx_mem/vpx_mem.h"

+#include "onyx_int.h"

+#include "vp8/encoder/quantize.h"

+#include "vp8/common/quant_common.h"

+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)

+{

+    int i, rc, eob;

+    int x, y, z, sz;

+    short *coeff_ptr   = b->coeff;

+    short *round_ptr   = b->round;

+    short *quant_ptr   = b->quant_fast;

+    short *qcoeff_ptr  = d->qcoeff;

+    short *dqcoeff_ptr = d->dqcoeff;

+    short *dequant_ptr = d->dequant;

+    eob = -1;

+    for (i = 0; i < 16; i++)

+    {

+        rc   = vp8_default_zig_zag1d[i];

+        z    = coeff_ptr[rc];

+        sz = (z >> 31);                              /* sign of z */

+        x  = (z ^ sz) - sz;                          /* x = abs(z) */

+        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */

+        x  = (y ^ sz) - sz;                          /* get the sign back */

+        qcoeff_ptr[rc] = x;                          /* write to destination */

+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];       /* dequantized value */

+        if (y)

+        {

+            eob = i;                                 /* last nonzero coeffs */

+        }

+    }

+    *d->eob = (char)(eob + 1);

+}

+void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)

+{

+    int i, rc, eob;

+    int zbin;

+    int x, y, z, sz;

+    short *zbin_boost_ptr  = b->zrun_zbin_boost;

+    short *coeff_ptr       = b->coeff;

+    short *zbin_ptr        = b->zbin;

+    short *round_ptr       = b->round;

+    short *quant_ptr       = b->quant;

+    short *quant_shift_ptr = b->quant_shift;

+    short *qcoeff_ptr      = d->qcoeff;

+    short *dqcoeff_ptr     = d->dqcoeff;

+    short *dequant_ptr     = d->dequant;

+    short zbin_oq_value    = b->zbin_extra;

+    memset(qcoeff_ptr, 0, 32);

+    memset(dqcoeff_ptr, 0, 32);

+    eob = -1;

+    for (i = 0; i < 16; i++)

+    {

+        rc   = vp8_default_zig_zag1d[i];

+        z    = coeff_ptr[rc];

+        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;

+        zbin_boost_ptr ++;

+        sz = (z >> 31);                              /* sign of z */

+        x  = (z ^ sz) - sz;                          /* x = abs(z) */

+        if (x >= zbin)

+        {

+            x += round_ptr[rc];

+            y  = ((((x * quant_ptr[rc]) >> 16) + x)

+                 * quant_shift_ptr[rc]) >> 16;       /* quantize (x) */

+            x  = (y ^ sz) - sz;                      /* get the sign back */

+            qcoeff_ptr[rc]  = x;                     /* write to destination */

+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */

+            if (y)

+            {

+                eob = i;                             /* last nonzero coeffs */

+                zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */

+            }

+        }

+    }

+    *d->eob = (char)(eob + 1);

+}

+void vp8_quantize_mby(MACROBLOCK *x)

+{

+    int i;

+    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED

+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);

+    for (i = 0; i < 16; i++)

+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);

+    if(has_2nd_order)

+        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);

+}

+void vp8_quantize_mb(MACROBLOCK *x)

+{

+    int i;

+    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED

+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);

+    for (i = 0; i < 24+has_2nd_order; i++)

+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);

+}

+void vp8_quantize_mbuv(MACROBLOCK *x)

+{

+    int i;

+    for (i = 16; i < 24; i++)

+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);

+}

+static const int qrounding_factors[129] =

+{

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48

+};

+static const int qzbin_factors[129] =

+{

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80

+};

+static const int qrounding_factors_y2[129] =

+{

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48, 48, 48, 48, 48, 48, 48, 48,

+    48

+};

+static const int qzbin_factors_y2[129] =

+{

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    84, 84, 84, 84, 84, 84, 84, 84,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80, 80, 80, 80, 80, 80, 80, 80,

+    80

+};

+static void invert_quant(int improved_quant, short *quant,

+                         short *shift, short d)

+{

+    if(improved_quant)

+    {

+        unsigned t;

+        int l;

+        t = d;

+        for(l = 0; t > 1; l++)

+            t>>=1;

+        t = 1 + (1<<(16+l))/d;

+        *quant = (short)(t - (1<<16));

+        *shift = l;

+        /* use multiplication and constant shift by 16 */

+        *shift = 1 << (16 - *shift);

+    }

+    else

+    {

+        *quant = (1 << 16) / d;

+        *shift = 0;

+        /* use multiplication and constant shift by 16 */

+        *shift = 1 << (16 - *shift);

+    }

+}

+void vp8cx_init_quantizer(VP8_COMP *cpi)

+{

+    int i;

+    int quant_val;

+    int Q;

+    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,

+                          44, 44};

+    for (Q = 0; Q < QINDEX_RANGE; Q++)

+    {

+        /* dc values */

+        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);

+        cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;

+        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,

+                     cpi->Y1quant_shift[Q] + 0, quant_val);

+        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

+        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;

+        cpi->common.Y1dequant[Q][0] = quant_val;

+        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;

+        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);

+        cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;

+        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,

+                     cpi->Y2quant_shift[Q] + 0, quant_val);

+        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;

+        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;

+        cpi->common.Y2dequant[Q][0] = quant_val;

+        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;

+        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);

+        cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;

+        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,

+                     cpi->UVquant_shift[Q] + 0, quant_val);

+        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;

+        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;

+        cpi->common.UVdequant[Q][0] = quant_val;

+        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;

+        /* all the ac values = ; */

+        quant_val = vp8_ac_yquant(Q);

+        cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;

+        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,

+                     cpi->Y1quant_shift[Q] + 1, quant_val);

+        cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

+        cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;

+        cpi->common.Y1dequant[Q][1] = quant_val;

+        cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;

+        quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);

+        cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;

+        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,

+                     cpi->Y2quant_shift[Q] + 1, quant_val);

+        cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;

+        cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;

+        cpi->common.Y2dequant[Q][1] = quant_val;

+        cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;

+        quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);

+        cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;

+        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,

+                     cpi->UVquant_shift[Q] + 1, quant_val);

+        cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;

+        cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;

+        cpi->common.UVdequant[Q][1] = quant_val;

+        cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;

+        for (i = 2; i < 16; i++)

+        {

+            cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];

+            cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];

+            cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];

+            cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];

+            cpi->Y1round[Q][i] = cpi->Y1round[Q][1];

+            cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *

+                                             zbin_boost[i]) >> 7;

+            cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];

+            cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];

+            cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];

+            cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];

+            cpi->Y2round[Q][i] = cpi->Y2round[Q][1];

+            cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *

+                                             zbin_boost[i]) >> 7;

+            cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];

+            cpi->UVquant[Q][i] = cpi->UVquant[Q][1];

+            cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];

+            cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];

+            cpi->UVround[Q][i] = cpi->UVround[Q][1];

+            cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *

+                                             zbin_boost[i]) >> 7;

+        }

+    }

+}

+#define ZBIN_EXTRA_Y \

+    (( cpi->common.Y1dequant[QIndex][1] *  \

+    ( x->zbin_over_quant +  \

+      x->zbin_mode_boost +  \

+      x->act_zbin_adj ) ) >> 7)

+#define ZBIN_EXTRA_UV \

+    (( cpi->common.UVdequant[QIndex][1] *  \

+    ( x->zbin_over_quant +  \

+      x->zbin_mode_boost +  \

+      x->act_zbin_adj ) ) >> 7)

+#define ZBIN_EXTRA_Y2 \

+    (( cpi->common.Y2dequant[QIndex][1] *  \

+    ( (x->zbin_over_quant / 2) +  \

+       x->zbin_mode_boost +  \

+       x->act_zbin_adj ) ) >> 7)

+void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)

+{

+    int i;

+    int QIndex;

+    MACROBLOCKD *xd = &x->e_mbd;

+    int zbin_extra;

+    /* Select the baseline MB Q index. */

+    if (xd->segmentation_enabled)

+    {

+        /* Abs Value */

+        if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)

+            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];

+        /* Delta Value */

+        else

+        {

+            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];

+            /* Clamp to valid range */

+            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;

+        }

+    }

+    else

+        QIndex = cpi->common.base_qindex;

+    /* This initialization should be called at least once. Use ok_to_skip to

+     * decide if it is ok to skip.

+     * Before encoding a frame, this function is always called with ok_to_skip

+     * =0, which means no skiping of calculations. The "last" values are

+     * initialized at that time.

+     */

+    if (!ok_to_skip || QIndex != x->q_index)

+    {

+        xd->dequant_y1_dc[0] = 1;

+        xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];

+        xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];

+        xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];

+        for (i = 1; i < 16; i++)

+        {

+            xd->dequant_y1_dc[i] =

+            xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];

+            xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];

+            xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];

+        }

+#if 1

+        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until

+         * the quantizer code uses a passed in pointer to the dequant constants.

+         * This will also require modifications to the x86 and neon assembly.

+         * */

+        for (i = 0; i < 16; i++)

+            x->e_mbd.block[i].dequant = xd->dequant_y1;

+        for (i = 16; i < 24; i++)

+            x->e_mbd.block[i].dequant = xd->dequant_uv;

+        x->e_mbd.block[24].dequant = xd->dequant_y2;

+#endif

+        /* Y */

+        zbin_extra = ZBIN_EXTRA_Y;

+        for (i = 0; i < 16; i++)

+        {

+            x->block[i].quant = cpi->Y1quant[QIndex];

+            x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];

+            x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];

+            x->block[i].zbin = cpi->Y1zbin[QIndex];

+            x->block[i].round = cpi->Y1round[QIndex];

+            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];

+            x->block[i].zbin_extra = (short)zbin_extra;

+        }

+        /* UV */

+        zbin_extra = ZBIN_EXTRA_UV;

+        for (i = 16; i < 24; i++)

+        {

+            x->block[i].quant = cpi->UVquant[QIndex];

+            x->block[i].quant_fast = cpi->UVquant_fast[QIndex];

+            x->block[i].quant_shift = cpi->UVquant_shift[QIndex];

+            x->block[i].zbin = cpi->UVzbin[QIndex];

+            x->block[i].round = cpi->UVround[QIndex];

+            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];

+            x->block[i].zbin_extra = (short)zbin_extra;

+        }

+        /* Y2 */

+        zbin_extra = ZBIN_EXTRA_Y2;

+        x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];

+        x->block[24].quant = cpi->Y2quant[QIndex];

+        x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];

+        x->block[24].zbin = cpi->Y2zbin[QIndex];

+        x->block[24].round = cpi->Y2round[QIndex];

+        x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];

+        x->block[24].zbin_extra = (short)zbin_extra;

+        /* save this macroblock QIndex for vp8_update_zbin_extra() */

+        x->q_index = QIndex;

+        x->last_zbin_over_quant = x->zbin_over_quant;

+        x->last_zbin_mode_boost = x->zbin_mode_boost;

+        x->last_act_zbin_adj = x->act_zbin_adj;

+    }

+    else if(x->last_zbin_over_quant != x->zbin_over_quant

+            || x->last_zbin_mode_boost != x->zbin_mode_boost

+            || x->last_act_zbin_adj != x->act_zbin_adj)

+    {

+        /* Y */

+        zbin_extra = ZBIN_EXTRA_Y;

+        for (i = 0; i < 16; i++)

+            x->block[i].zbin_extra = (short)zbin_extra;

+        /* UV */

+        zbin_extra = ZBIN_EXTRA_UV;

+        for (i = 16; i < 24; i++)

+            x->block[i].zbin_extra = (short)zbin_extra;

+        /* Y2 */

+        zbin_extra = ZBIN_EXTRA_Y2;

+        x->block[24].zbin_extra = (short)zbin_extra;

+        x->last_zbin_over_quant = x->zbin_over_quant;

+        x->last_zbin_mode_boost = x->zbin_mode_boost;

+        x->last_act_zbin_adj = x->act_zbin_adj;

+    }

+}

+void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)

+{

+    int i;

+    int QIndex = x->q_index;

+    int zbin_extra;

+    /* Y */

+    zbin_extra = ZBIN_EXTRA_Y;

+    for (i = 0; i < 16; i++)

+        x->block[i].zbin_extra = (short)zbin_extra;

+    /* UV */

+    zbin_extra = ZBIN_EXTRA_UV;

+    for (i = 16; i < 24; i++)

+        x->block[i].zbin_extra = (short)zbin_extra;

+    /* Y2 */

+    zbin_extra = ZBIN_EXTRA_Y2;

+    x->block[24].zbin_extra = (short)zbin_extra;

+}

+#undef ZBIN_EXTRA_Y

+#undef ZBIN_EXTRA_UV

+#undef ZBIN_EXTRA_Y2

+void vp8cx_frame_init_quantizer(VP8_COMP *cpi)

+{

+    /* Clear Zbin mode boost for default case */

+    cpi->mb.zbin_mode_boost = 0;

+    /* MB level quantizer setup */

+    vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);

+}

+void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)

+{

+    VP8_COMMON *cm = &cpi->common;

+    MACROBLOCKD *mbd = &cpi->mb.e_mbd;

+    int update = 0;

+    int new_delta_q;

+    int new_uv_delta_q;

+    cm->base_qindex = Q;

+    /* if any of the delta_q values are changing update flag has to be set */

+    /* currently only y2dc_delta_q may change */

+    cm->y1dc_delta_q = 0;

+    cm->y2ac_delta_q = 0;

+    if (Q < 4)

+    {

+        new_delta_q = 4-Q;

+    }

+    else

+        new_delta_q = 0;

+    update |= cm->y2dc_delta_q != new_delta_q;

+    cm->y2dc_delta_q = new_delta_q;

+    new_uv_delta_q = 0;

+    // For screen content, lower the q value for UV channel. For now, select

+    // conservative delta; same delta for dc and ac, and decrease it with lower

+    // Q, and set to 0 below some threshold. May want to condition this in

+    // future on the variance/energy in UV channel.

+    if (cpi->oxcf.screen_content_mode && Q > 40) {

+      new_uv_delta_q = -(int)(0.15 * Q);

+      // Check range: magnitude of delta is 4 bits.

+      if (new_uv_delta_q < -15) {

+        new_uv_delta_q = -15;

+      }

+    }

+    update |= cm->uvdc_delta_q != new_uv_delta_q;

+    cm->uvdc_delta_q = new_uv_delta_q;

+    cm->uvac_delta_q = new_uv_delta_q;

+    /* Set Segment specific quatizers */

+    mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];

+    mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];

+    mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];

+    mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];

+    /* quantizer has to be reinitialized for any delta_q changes */

+    if(update)

+        vp8cx_init_quantizer(cpi);

+}

--- a/vp8/encoder/x86/quantize_sse2.c

+++ /dev/null

@@ -1,228 +1,0 @@

-/*

- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include "vpx_config.h"

-#include "vp8_rtcd.h"

-#include "vpx_ports/x86.h"

-#include "vpx_mem/vpx_mem.h"

-#include "vp8/encoder/block.h"

-#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */

-#include <mmintrin.h> /* MMX */

-#include <xmmintrin.h> /* SSE */

-#include <emmintrin.h> /* SSE2 */

-#define SELECT_EOB(i, z) \

-    do { \

-        short boost = *zbin_boost_ptr; \

-        int cmp = (x[z] < boost) | (y[z] == 0); \

-        zbin_boost_ptr++; \

-        if (cmp) \

-            break; \

-        qcoeff_ptr[z] = y[z]; \

-        eob = i; \

-        zbin_boost_ptr = b->zrun_zbin_boost; \

-    } while (0)

-void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)

-{

-    char eob = 0;

-    short *zbin_boost_ptr;

-    short *qcoeff_ptr      = d->qcoeff;

-    DECLARE_ALIGNED(16, short, x[16]);

-    DECLARE_ALIGNED(16, short, y[16]);

-    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;

-    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));

-    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));

-    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));

-    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));

-    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);

-    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));

-    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));

-    __m128i round0 = _mm_load_si128((__m128i *)(b->round));

-    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));

-    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));

-    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));

-    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));

-    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));

-    memset(qcoeff_ptr, 0, 32);

-    /* Duplicate to all lanes. */

-    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);

-    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);

-    /* Sign of z: z >> 15 */

-    sz0 = _mm_srai_epi16(z0, 15);

-    sz1 = _mm_srai_epi16(z1, 15);

-    /* x = abs(z): (z ^ sz) - sz */

-    x0 = _mm_xor_si128(z0, sz0);

-    x1 = _mm_xor_si128(z1, sz1);

-    x0 = _mm_sub_epi16(x0, sz0);

-    x1 = _mm_sub_epi16(x1, sz1);

-    /* zbin[] + zbin_extra */

-    zbin0 = _mm_add_epi16(zbin0, zbin_extra);

-    zbin1 = _mm_add_epi16(zbin1, zbin_extra);

-    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance

-     * the equation because boost is the only value which can change:

-     * x - (zbin[] + extra) >= boost */

-    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);

-    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);

-    _mm_store_si128((__m128i *)(x), x_minus_zbin0);

-    _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);

-    /* All the remaining calculations are valid whether they are done now with

-     * simd or later inside the loop one at a time. */

-    x0 = _mm_add_epi16(x0, round0);

-    x1 = _mm_add_epi16(x1, round1);

-    y0 = _mm_mulhi_epi16(x0, quant0);

-    y1 = _mm_mulhi_epi16(x1, quant1);

-    y0 = _mm_add_epi16(y0, x0);

-    y1 = _mm_add_epi16(y1, x1);

-    /* Instead of shifting each value independently we convert the scaling

-     * factor with 1 << (16 - shift) so we can use multiply/return high half. */

-    y0 = _mm_mulhi_epi16(y0, quant_shift0);

-    y1 = _mm_mulhi_epi16(y1, quant_shift1);

-    /* Return the sign: (y ^ sz) - sz */

-    y0 = _mm_xor_si128(y0, sz0);

-    y1 = _mm_xor_si128(y1, sz1);

-    y0 = _mm_sub_epi16(y0, sz0);

-    y1 = _mm_sub_epi16(y1, sz1);

-    _mm_store_si128((__m128i *)(y), y0);

-    _mm_store_si128((__m128i *)(y + 8), y1);

-    zbin_boost_ptr = b->zrun_zbin_boost;

-    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */

-    SELECT_EOB(1, 0);

-    SELECT_EOB(2, 1);

-    SELECT_EOB(3, 4);

-    SELECT_EOB(4, 8);

-    SELECT_EOB(5, 5);

-    SELECT_EOB(6, 2);

-    SELECT_EOB(7, 3);

-    SELECT_EOB(8, 6);

-    SELECT_EOB(9, 9);

-    SELECT_EOB(10, 12);

-    SELECT_EOB(11, 13);

-    SELECT_EOB(12, 10);

-    SELECT_EOB(13, 7);

-    SELECT_EOB(14, 11);

-    SELECT_EOB(15, 14);

-    SELECT_EOB(16, 15);

-    y0 = _mm_load_si128((__m128i *)(d->qcoeff));

-    y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));

-    /* dqcoeff = qcoeff * dequant */

-    y0 = _mm_mullo_epi16(y0, dequant0);

-    y1 = _mm_mullo_epi16(y1, dequant1);

-    _mm_store_si128((__m128i *)(d->dqcoeff), y0);

-    _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);

-    *d->eob = eob;

-}

-void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)

-{

-  __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));

-  __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));

-  __m128i round0 = _mm_load_si128((__m128i *)(b->round));

-  __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));

-  __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));

-  __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));

-  __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));

-  __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));

-  __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));

-  __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));

-  __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;

-  /* sign of z: z >> 15 */

-  sz0 = _mm_srai_epi16(z0, 15);

-  sz1 = _mm_srai_epi16(z1, 15);

-  /* x = abs(z): (z ^ sz) - sz */

-  x0 = _mm_xor_si128(z0, sz0);

-  x1 = _mm_xor_si128(z1, sz1);

-  x0 = _mm_sub_epi16(x0, sz0);

-  x1 = _mm_sub_epi16(x1, sz1);

-  /* x += round */

-  x0 = _mm_add_epi16(x0, round0);

-  x1 = _mm_add_epi16(x1, round1);

-  /* y = (x * quant) >> 16 */

-  y0 = _mm_mulhi_epi16(x0, quant_fast0);

-  y1 = _mm_mulhi_epi16(x1, quant_fast1);

-  /* x = abs(y) = (y ^ sz) - sz */

-  y0 = _mm_xor_si128(y0, sz0);

-  y1 = _mm_xor_si128(y1, sz1);

-  x0 = _mm_sub_epi16(y0, sz0);

-  x1 = _mm_sub_epi16(y1, sz1);

-  /* qcoeff = x */

-  _mm_store_si128((__m128i *)(d->qcoeff), x0);

-  _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);

-  /* x * dequant */

-  xdq0 = _mm_mullo_epi16(x0, dequant0);

-  xdq1 = _mm_mullo_epi16(x1, dequant1);

-  /* dqcoeff = x * dequant */

-  _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);

-  _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);

-  /* build a mask for the zig zag */

-  zeros = _mm_setzero_si128();

-  x0 = _mm_cmpeq_epi16(x0, zeros);

-  x1 = _mm_cmpeq_epi16(x1, zeros);

-  ones = _mm_cmpeq_epi16(zeros, zeros);

-  x0 = _mm_xor_si128(x0, ones);

-  x1 = _mm_xor_si128(x1, ones);

-  x0 = _mm_and_si128(x0, inv_zig_zag0);

-  x1 = _mm_and_si128(x1, inv_zig_zag1);

-  x0 = _mm_max_epi16(x0, x1);

-  /* now down to 8 */

-  x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110

-  x0 = _mm_max_epi16(x0, x1);

-  /* only 4 left */

-  x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110

-  x0 = _mm_max_epi16(x0, x1);

-  /* okay, just 2! */

-  x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001

-  x0 = _mm_max_epi16(x0, x1);

-  *d->eob = 0xFF & _mm_cvtsi128_si32(x0);

-}

--- /dev/null

+++ b/vp8/encoder/x86/vp8_quantize_sse2.c

@@ -1,0 +1,228 @@

+/*

+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vpx_config.h"

+#include "vp8_rtcd.h"

+#include "vpx_ports/x86.h"

+#include "vpx_mem/vpx_mem.h"

+#include "vp8/encoder/block.h"

+#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */

+#include <mmintrin.h> /* MMX */

+#include <xmmintrin.h> /* SSE */

+#include <emmintrin.h> /* SSE2 */

+#define SELECT_EOB(i, z) \

+    do { \

+        short boost = *zbin_boost_ptr; \

+        int cmp = (x[z] < boost) | (y[z] == 0); \

+        zbin_boost_ptr++; \

+        if (cmp) \

+            break; \

+        qcoeff_ptr[z] = y[z]; \

+        eob = i; \

+        zbin_boost_ptr = b->zrun_zbin_boost; \

+    } while (0)

+void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)

+{

+    char eob = 0;

+    short *zbin_boost_ptr;

+    short *qcoeff_ptr      = d->qcoeff;

+    DECLARE_ALIGNED(16, short, x[16]);

+    DECLARE_ALIGNED(16, short, y[16]);

+    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;

+    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));

+    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));

+    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));

+    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));

+    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);

+    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));

+    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));

+    __m128i round0 = _mm_load_si128((__m128i *)(b->round));

+    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));

+    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));

+    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));

+    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));

+    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));

+    memset(qcoeff_ptr, 0, 32);

+    /* Duplicate to all lanes. */

+    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);

+    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);

+    /* Sign of z: z >> 15 */

+    sz0 = _mm_srai_epi16(z0, 15);

+    sz1 = _mm_srai_epi16(z1, 15);

+    /* x = abs(z): (z ^ sz) - sz */

+    x0 = _mm_xor_si128(z0, sz0);

+    x1 = _mm_xor_si128(z1, sz1);

+    x0 = _mm_sub_epi16(x0, sz0);

+    x1 = _mm_sub_epi16(x1, sz1);

+    /* zbin[] + zbin_extra */

+    zbin0 = _mm_add_epi16(zbin0, zbin_extra);

+    zbin1 = _mm_add_epi16(zbin1, zbin_extra);

+    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance

+     * the equation because boost is the only value which can change:

+     * x - (zbin[] + extra) >= boost */

+    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);

+    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);

+    _mm_store_si128((__m128i *)(x), x_minus_zbin0);

+    _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);

+    /* All the remaining calculations are valid whether they are done now with

+     * simd or later inside the loop one at a time. */

+    x0 = _mm_add_epi16(x0, round0);

+    x1 = _mm_add_epi16(x1, round1);

+    y0 = _mm_mulhi_epi16(x0, quant0);

+    y1 = _mm_mulhi_epi16(x1, quant1);

+    y0 = _mm_add_epi16(y0, x0);

+    y1 = _mm_add_epi16(y1, x1);

+    /* Instead of shifting each value independently we convert the scaling

+     * factor with 1 << (16 - shift) so we can use multiply/return high half. */

+    y0 = _mm_mulhi_epi16(y0, quant_shift0);

+    y1 = _mm_mulhi_epi16(y1, quant_shift1);

+    /* Return the sign: (y ^ sz) - sz */

+    y0 = _mm_xor_si128(y0, sz0);

+    y1 = _mm_xor_si128(y1, sz1);

+    y0 = _mm_sub_epi16(y0, sz0);

+    y1 = _mm_sub_epi16(y1, sz1);

+    _mm_store_si128((__m128i *)(y), y0);

+    _mm_store_si128((__m128i *)(y + 8), y1);

+    zbin_boost_ptr = b->zrun_zbin_boost;

+    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */

+    SELECT_EOB(1, 0);

+    SELECT_EOB(2, 1);

+    SELECT_EOB(3, 4);

+    SELECT_EOB(4, 8);

+    SELECT_EOB(5, 5);

+    SELECT_EOB(6, 2);

+    SELECT_EOB(7, 3);

+    SELECT_EOB(8, 6);

+    SELECT_EOB(9, 9);

+    SELECT_EOB(10, 12);

+    SELECT_EOB(11, 13);

+    SELECT_EOB(12, 10);

+    SELECT_EOB(13, 7);

+    SELECT_EOB(14, 11);

+    SELECT_EOB(15, 14);

+    SELECT_EOB(16, 15);

+    y0 = _mm_load_si128((__m128i *)(d->qcoeff));

+    y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));

+    /* dqcoeff = qcoeff * dequant */

+    y0 = _mm_mullo_epi16(y0, dequant0);

+    y1 = _mm_mullo_epi16(y1, dequant1);

+    _mm_store_si128((__m128i *)(d->dqcoeff), y0);

+    _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);

+    *d->eob = eob;

+}

+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)

+{

+  __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));

+  __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));

+  __m128i round0 = _mm_load_si128((__m128i *)(b->round));

+  __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));

+  __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));

+  __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));

+  __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));

+  __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));

+  __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));

+  __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));

+  __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;

+  /* sign of z: z >> 15 */

+  sz0 = _mm_srai_epi16(z0, 15);

+  sz1 = _mm_srai_epi16(z1, 15);

+  /* x = abs(z): (z ^ sz) - sz */

+  x0 = _mm_xor_si128(z0, sz0);

+  x1 = _mm_xor_si128(z1, sz1);

+  x0 = _mm_sub_epi16(x0, sz0);

+  x1 = _mm_sub_epi16(x1, sz1);

+  /* x += round */

+  x0 = _mm_add_epi16(x0, round0);

+  x1 = _mm_add_epi16(x1, round1);

+  /* y = (x * quant) >> 16 */

+  y0 = _mm_mulhi_epi16(x0, quant_fast0);

+  y1 = _mm_mulhi_epi16(x1, quant_fast1);

+  /* x = abs(y) = (y ^ sz) - sz */

+  y0 = _mm_xor_si128(y0, sz0);

+  y1 = _mm_xor_si128(y1, sz1);

+  x0 = _mm_sub_epi16(y0, sz0);

+  x1 = _mm_sub_epi16(y1, sz1);

+  /* qcoeff = x */

+  _mm_store_si128((__m128i *)(d->qcoeff), x0);

+  _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);

+  /* x * dequant */

+  xdq0 = _mm_mullo_epi16(x0, dequant0);

+  xdq1 = _mm_mullo_epi16(x1, dequant1);

+  /* dqcoeff = x * dequant */

+  _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);

+  _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);

+  /* build a mask for the zig zag */

+  zeros = _mm_setzero_si128();

+  x0 = _mm_cmpeq_epi16(x0, zeros);

+  x1 = _mm_cmpeq_epi16(x1, zeros);

+  ones = _mm_cmpeq_epi16(zeros, zeros);

+  x0 = _mm_xor_si128(x0, ones);

+  x1 = _mm_xor_si128(x1, ones);

+  x0 = _mm_and_si128(x0, inv_zig_zag0);

+  x1 = _mm_and_si128(x1, inv_zig_zag1);

+  x0 = _mm_max_epi16(x0, x1);

+  /* now down to 8 */

+  x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110

+  x0 = _mm_max_epi16(x0, x1);

+  /* only 4 left */

+  x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110

+  x0 = _mm_max_epi16(x0, x1);

+  /* okay, just 2! */

+  x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001

+  x0 = _mm_max_epi16(x0, x1);

+  *d->eob = 0xFF & _mm_cvtsi128_si32(x0);

+}

--- a/vp8/vp8cx.mk

+++ b/vp8/vp8cx.mk

@@ -60,7 +60,7 @@

 VP8_CX_SRCS-yes += encoder/onyx_if.c

 VP8_CX_SRCS-yes += encoder/pickinter.c

 VP8_CX_SRCS-yes += encoder/picklpf.c

-VP8_CX_SRCS-yes += encoder/quantize.c

+VP8_CX_SRCS-yes += encoder/vp8_quantize.c

 VP8_CX_SRCS-yes += encoder/ratectrl.c

 VP8_CX_SRCS-yes += encoder/rdopt.c

 VP8_CX_SRCS-yes += encoder/segmentation.c

@@ -84,7 +84,7 @@

 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c

 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm

 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm

-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c

+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_quantize_sse2.c

 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c

 VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c

--

⑨