shithub: libvpx

Download patch

ref: 798b54a006bf971e206555a563e218f0b57dcb5a
parent: 0092960d2746fa25a41235532df9c68fe2fe093a
parent: 79a00d71bd3619e9d51aeb4472856026b73c3d1b
author: Johann Koenig <johannkoenig@google.com>
date: Tue Aug 18 19:08:44 EDT 2015

Merge "Rename vp8 quantize.c"

--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -12,7 +12,7 @@
 #include "vpx_config.h"
 #include "vp8_rtcd.h"
 #include "./vpx_dsp_rtcd.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "vp8/common/reconintra4x4.h"
 #include "encodemb.h"
 #include "vp8/common/invtrans.h"
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -14,7 +14,7 @@
 #include "vp8_rtcd.h"
 #include "encodemb.h"
 #include "vp8/common/reconinter.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "tokenize.h"
 #include "vp8/common/invtrans.h"
 #include "vpx_mem/vpx_mem.h"
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -17,7 +17,7 @@
 #include "vp8/common/blockd.h"
 #include "onyx_int.h"
 #include "vp8/common/systemdependent.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "vp8/common/alloccommon.h"
 #include "mcomp.h"
 #include "firstpass.h"
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -20,7 +20,7 @@
 #include "vp8/common/onyxc_int.h"
 #include "vpx_dsp/variance.h"
 #include "encodemb.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "vp8/common/entropy.h"
 #include "vp8/common/threading.h"
 #include "vpx_ports/mem.h"
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -13,7 +13,7 @@
 #include "./vpx_scale_rtcd.h"
 #include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vp8/common/alloccommon.h"
--- a/vp8/encoder/quantize.c
+++ /dev/null
@@ -1,583 +1,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <math.h>
-#include "vpx_mem/vpx_mem.h"
-
-#include "onyx_int.h"
-#include "quantize.h"
-#include "vp8/common/quant_common.h"
-
-void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
-    int i, rc, eob;
-    int x, y, z, sz;
-    short *coeff_ptr   = b->coeff;
-    short *round_ptr   = b->round;
-    short *quant_ptr   = b->quant_fast;
-    short *qcoeff_ptr  = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = d->dequant;
-
-    eob = -1;
-    for (i = 0; i < 16; i++)
-    {
-        rc   = vp8_default_zig_zag1d[i];
-        z    = coeff_ptr[rc];
-
-        sz = (z >> 31);                              /* sign of z */
-        x  = (z ^ sz) - sz;                          /* x = abs(z) */
-
-        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
-        x  = (y ^ sz) - sz;                          /* get the sign back */
-        qcoeff_ptr[rc] = x;                          /* write to destination */
-        dqcoeff_ptr[rc] = x * dequant_ptr[rc];       /* dequantized value */
-
-        if (y)
-        {
-            eob = i;                                 /* last nonzero coeffs */
-        }
-    }
-    *d->eob = (char)(eob + 1);
-}
-
-void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
-    int i, rc, eob;
-    int zbin;
-    int x, y, z, sz;
-    short *zbin_boost_ptr  = b->zrun_zbin_boost;
-    short *coeff_ptr       = b->coeff;
-    short *zbin_ptr        = b->zbin;
-    short *round_ptr       = b->round;
-    short *quant_ptr       = b->quant;
-    short *quant_shift_ptr = b->quant_shift;
-    short *qcoeff_ptr      = d->qcoeff;
-    short *dqcoeff_ptr     = d->dqcoeff;
-    short *dequant_ptr     = d->dequant;
-    short zbin_oq_value    = b->zbin_extra;
-
-    memset(qcoeff_ptr, 0, 32);
-    memset(dqcoeff_ptr, 0, 32);
-
-    eob = -1;
-
-    for (i = 0; i < 16; i++)
-    {
-        rc   = vp8_default_zig_zag1d[i];
-        z    = coeff_ptr[rc];
-
-        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-
-        zbin_boost_ptr ++;
-        sz = (z >> 31);                              /* sign of z */
-        x  = (z ^ sz) - sz;                          /* x = abs(z) */
-
-        if (x >= zbin)
-        {
-            x += round_ptr[rc];
-            y  = ((((x * quant_ptr[rc]) >> 16) + x)
-                 * quant_shift_ptr[rc]) >> 16;       /* quantize (x) */
-            x  = (y ^ sz) - sz;                      /* get the sign back */
-            qcoeff_ptr[rc]  = x;                     /* write to destination */
-            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */
-
-            if (y)
-            {
-                eob = i;                             /* last nonzero coeffs */
-                zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */
-            }
-        }
-    }
-
-    *d->eob = (char)(eob + 1);
-}
-
-void vp8_quantize_mby(MACROBLOCK *x)
-{
-    int i;
-    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
-        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
-
-    for (i = 0; i < 16; i++)
-        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-
-    if(has_2nd_order)
-        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-}
-
-void vp8_quantize_mb(MACROBLOCK *x)
-{
-    int i;
-    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
-        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
-
-    for (i = 0; i < 24+has_2nd_order; i++)
-        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-}
-
-
-void vp8_quantize_mbuv(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 16; i < 24; i++)
-        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-}
-
-static const int qrounding_factors[129] =
-{
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48
-};
-
-
-static const int qzbin_factors[129] =
-{
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80
-};
-
-
-static const int qrounding_factors_y2[129] =
-{
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48, 48, 48, 48, 48, 48, 48, 48,
-    48
-};
-
-
-static const int qzbin_factors_y2[129] =
-{
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    84, 84, 84, 84, 84, 84, 84, 84,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80
-};
-
-
-static void invert_quant(int improved_quant, short *quant,
-                         short *shift, short d)
-{
-    if(improved_quant)
-    {
-        unsigned t;
-        int l;
-        t = d;
-        for(l = 0; t > 1; l++)
-            t>>=1;
-        t = 1 + (1<<(16+l))/d;
-        *quant = (short)(t - (1<<16));
-        *shift = l;
-        /* use multiplication and constant shift by 16 */
-        *shift = 1 << (16 - *shift);
-    }
-    else
-    {
-        *quant = (1 << 16) / d;
-        *shift = 0;
-        /* use multiplication and constant shift by 16 */
-        *shift = 1 << (16 - *shift);
-    }
-}
-
-
-void vp8cx_init_quantizer(VP8_COMP *cpi)
-{
-    int i;
-    int quant_val;
-    int Q;
-
-    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,
-                          44, 44};
-
-    for (Q = 0; Q < QINDEX_RANGE; Q++)
-    {
-        /* dc values */
-        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
-        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
-                     cpi->Y1quant_shift[Q] + 0, quant_val);
-        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][0] = quant_val;
-        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
-        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
-        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
-                     cpi->Y2quant_shift[Q] + 0, quant_val);
-        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][0] = quant_val;
-        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
-        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
-        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
-                     cpi->UVquant_shift[Q] + 0, quant_val);
-        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
-        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][0] = quant_val;
-        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
-        /* all the ac values = ; */
-        quant_val = vp8_ac_yquant(Q);
-        cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
-        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
-                     cpi->Y1quant_shift[Q] + 1, quant_val);
-        cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][1] = quant_val;
-        cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;
-
-        quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-        cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;
-        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,
-                     cpi->Y2quant_shift[Q] + 1, quant_val);
-        cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][1] = quant_val;
-        cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;
-
-        quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-        cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;
-        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,
-                     cpi->UVquant_shift[Q] + 1, quant_val);
-        cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][1] = quant_val;
-        cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;
-
-        for (i = 2; i < 16; i++)
-        {
-            cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];
-            cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];
-            cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
-            cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
-            cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
-            cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
-                                             zbin_boost[i]) >> 7;
-
-            cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];
-            cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];
-            cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
-            cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
-            cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
-            cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
-                                             zbin_boost[i]) >> 7;
-
-            cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];
-            cpi->UVquant[Q][i] = cpi->UVquant[Q][1];
-            cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
-            cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
-            cpi->UVround[Q][i] = cpi->UVround[Q][1];
-            cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
-                                             zbin_boost[i]) >> 7;
-        }
-    }
-}
-
-#define ZBIN_EXTRA_Y \
-    (( cpi->common.Y1dequant[QIndex][1] *  \
-    ( x->zbin_over_quant +  \
-      x->zbin_mode_boost +  \
-      x->act_zbin_adj ) ) >> 7)
-
-#define ZBIN_EXTRA_UV \
-    (( cpi->common.UVdequant[QIndex][1] *  \
-    ( x->zbin_over_quant +  \
-      x->zbin_mode_boost +  \
-      x->act_zbin_adj ) ) >> 7)
-
-#define ZBIN_EXTRA_Y2 \
-    (( cpi->common.Y2dequant[QIndex][1] *  \
-    ( (x->zbin_over_quant / 2) +  \
-       x->zbin_mode_boost +  \
-       x->act_zbin_adj ) ) >> 7)
-
-void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
-{
-    int i;
-    int QIndex;
-    MACROBLOCKD *xd = &x->e_mbd;
-    int zbin_extra;
-
-    /* Select the baseline MB Q index. */
-    if (xd->segmentation_enabled)
-    {
-        /* Abs Value */
-        if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
-        /* Delta Value */
-        else
-        {
-            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
-            /* Clamp to valid range */
-            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
-        }
-    }
-    else
-        QIndex = cpi->common.base_qindex;
-
-    /* This initialization should be called at least once. Use ok_to_skip to
-     * decide if it is ok to skip.
-     * Before encoding a frame, this function is always called with ok_to_skip
-     * =0, which means no skiping of calculations. The "last" values are
-     * initialized at that time.
-     */
-    if (!ok_to_skip || QIndex != x->q_index)
-    {
-
-        xd->dequant_y1_dc[0] = 1;
-        xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
-        xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
-        xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
-
-        for (i = 1; i < 16; i++)
-        {
-            xd->dequant_y1_dc[i] =
-            xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
-            xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
-            xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
-        }
-#if 1
-        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until
-         * the quantizer code uses a passed in pointer to the dequant constants.
-         * This will also require modifications to the x86 and neon assembly.
-         * */
-        for (i = 0; i < 16; i++)
-            x->e_mbd.block[i].dequant = xd->dequant_y1;
-        for (i = 16; i < 24; i++)
-            x->e_mbd.block[i].dequant = xd->dequant_uv;
-        x->e_mbd.block[24].dequant = xd->dequant_y2;
-#endif
-
-        /* Y */
-        zbin_extra = ZBIN_EXTRA_Y;
-
-        for (i = 0; i < 16; i++)
-        {
-            x->block[i].quant = cpi->Y1quant[QIndex];
-            x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
-            x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
-            x->block[i].zbin = cpi->Y1zbin[QIndex];
-            x->block[i].round = cpi->Y1round[QIndex];
-            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
-            x->block[i].zbin_extra = (short)zbin_extra;
-        }
-
-        /* UV */
-        zbin_extra = ZBIN_EXTRA_UV;
-
-        for (i = 16; i < 24; i++)
-        {
-            x->block[i].quant = cpi->UVquant[QIndex];
-            x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
-            x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
-            x->block[i].zbin = cpi->UVzbin[QIndex];
-            x->block[i].round = cpi->UVround[QIndex];
-            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
-            x->block[i].zbin_extra = (short)zbin_extra;
-        }
-
-        /* Y2 */
-        zbin_extra = ZBIN_EXTRA_Y2;
-
-        x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
-        x->block[24].quant = cpi->Y2quant[QIndex];
-        x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
-        x->block[24].zbin = cpi->Y2zbin[QIndex];
-        x->block[24].round = cpi->Y2round[QIndex];
-        x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
-        x->block[24].zbin_extra = (short)zbin_extra;
-
-        /* save this macroblock QIndex for vp8_update_zbin_extra() */
-        x->q_index = QIndex;
-
-        x->last_zbin_over_quant = x->zbin_over_quant;
-        x->last_zbin_mode_boost = x->zbin_mode_boost;
-        x->last_act_zbin_adj = x->act_zbin_adj;
-
-
-
-    }
-    else if(x->last_zbin_over_quant != x->zbin_over_quant
-            || x->last_zbin_mode_boost != x->zbin_mode_boost
-            || x->last_act_zbin_adj != x->act_zbin_adj)
-    {
-        /* Y */
-        zbin_extra = ZBIN_EXTRA_Y;
-
-        for (i = 0; i < 16; i++)
-            x->block[i].zbin_extra = (short)zbin_extra;
-
-        /* UV */
-        zbin_extra = ZBIN_EXTRA_UV;
-
-        for (i = 16; i < 24; i++)
-            x->block[i].zbin_extra = (short)zbin_extra;
-
-        /* Y2 */
-        zbin_extra = ZBIN_EXTRA_Y2;
-        x->block[24].zbin_extra = (short)zbin_extra;
-
-        x->last_zbin_over_quant = x->zbin_over_quant;
-        x->last_zbin_mode_boost = x->zbin_mode_boost;
-        x->last_act_zbin_adj = x->act_zbin_adj;
-    }
-}
-
-void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
-{
-    int i;
-    int QIndex = x->q_index;
-    int zbin_extra;
-
-    /* Y */
-    zbin_extra = ZBIN_EXTRA_Y;
-
-    for (i = 0; i < 16; i++)
-        x->block[i].zbin_extra = (short)zbin_extra;
-
-    /* UV */
-    zbin_extra = ZBIN_EXTRA_UV;
-
-    for (i = 16; i < 24; i++)
-        x->block[i].zbin_extra = (short)zbin_extra;
-
-    /* Y2 */
-    zbin_extra = ZBIN_EXTRA_Y2;
-    x->block[24].zbin_extra = (short)zbin_extra;
-}
-#undef ZBIN_EXTRA_Y
-#undef ZBIN_EXTRA_UV
-#undef ZBIN_EXTRA_Y2
-
-void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
-{
-    /* Clear Zbin mode boost for default case */
-    cpi->mb.zbin_mode_boost = 0;
-
-    /* MB level quantizer setup */
-    vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);
-}
-
-
-void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
-{
-    VP8_COMMON *cm = &cpi->common;
-    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
-    int update = 0;
-    int new_delta_q;
-    int new_uv_delta_q;
-    cm->base_qindex = Q;
-
-    /* if any of the delta_q values are changing update flag has to be set */
-    /* currently only y2dc_delta_q may change */
-
-    cm->y1dc_delta_q = 0;
-    cm->y2ac_delta_q = 0;
-
-    if (Q < 4)
-    {
-        new_delta_q = 4-Q;
-    }
-    else
-        new_delta_q = 0;
-
-    update |= cm->y2dc_delta_q != new_delta_q;
-    cm->y2dc_delta_q = new_delta_q;
-
-    new_uv_delta_q = 0;
-    // For screen content, lower the q value for UV channel. For now, select
-    // conservative delta; same delta for dc and ac, and decrease it with lower
-    // Q, and set to 0 below some threshold. May want to condition this in
-    // future on the variance/energy in UV channel.
-    if (cpi->oxcf.screen_content_mode && Q > 40) {
-      new_uv_delta_q = -(int)(0.15 * Q);
-      // Check range: magnitude of delta is 4 bits.
-      if (new_uv_delta_q < -15) {
-        new_uv_delta_q = -15;
-      }
-    }
-    update |= cm->uvdc_delta_q != new_uv_delta_q;
-    cm->uvdc_delta_q = new_uv_delta_q;
-    cm->uvac_delta_q = new_uv_delta_q;
-
-    /* Set Segment specific quatizers */
-    mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
-    mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
-    mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
-    mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];
-
-    /* quantizer has to be reinitialized for any delta_q changes */
-    if(update)
-        vp8cx_init_quantizer(cpi);
-
-}
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -28,7 +28,7 @@
 #include "vp8/common/findnearmv.h"
 #include "vp8/common/quant_common.h"
 #include "encodemb.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "vpx_dsp/variance.h"
 #include "mcomp.h"
 #include "rdopt.h"
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -12,7 +12,7 @@
 #include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
 #include "vp8/common/systemdependent.h"
-#include "quantize.h"
+#include "vp8/encoder/quantize.h"
 #include "vp8/common/alloccommon.h"
 #include "mcomp.h"
 #include "firstpass.h"
--- /dev/null
+++ b/vp8/encoder/vp8_quantize.c
@@ -1,0 +1,583 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <math.h>
+#include "vpx_mem/vpx_mem.h"
+
+#include "onyx_int.h"
+#include "vp8/encoder/quantize.h"
+#include "vp8/common/quant_common.h"
+
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int x, y, z, sz;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant_fast;
+    short *qcoeff_ptr  = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = d->dequant;
+
+    eob = -1;
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        sz = (z >> 31);                              /* sign of z */
+        x  = (z ^ sz) - sz;                          /* x = abs(z) */
+
+        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */
+        x  = (y ^ sz) - sz;                          /* get the sign back */
+        qcoeff_ptr[rc] = x;                          /* write to destination */
+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];       /* dequantized value */
+
+        if (y)
+        {
+            eob = i;                                 /* last nonzero coeffs */
+        }
+    }
+    *d->eob = (char)(eob + 1);
+}
+
+void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *zbin_boost_ptr  = b->zrun_zbin_boost;
+    short *coeff_ptr       = b->coeff;
+    short *zbin_ptr        = b->zbin;
+    short *round_ptr       = b->round;
+    short *quant_ptr       = b->quant;
+    short *quant_shift_ptr = b->quant_shift;
+    short *qcoeff_ptr      = d->qcoeff;
+    short *dqcoeff_ptr     = d->dqcoeff;
+    short *dequant_ptr     = d->dequant;
+    short zbin_oq_value    = b->zbin_extra;
+
+    memset(qcoeff_ptr, 0, 32);
+    memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+
+        zbin_boost_ptr ++;
+        sz = (z >> 31);                              /* sign of z */
+        x  = (z ^ sz) - sz;                          /* x = abs(z) */
+
+        if (x >= zbin)
+        {
+            x += round_ptr[rc];
+            y  = ((((x * quant_ptr[rc]) >> 16) + x)
+                 * quant_shift_ptr[rc]) >> 16;       /* quantize (x) */
+            x  = (y ^ sz) - sz;                      /* get the sign back */
+            qcoeff_ptr[rc]  = x;                     /* write to destination */
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];   /* dequantized value */
+
+            if (y)
+            {
+                eob = i;                             /* last nonzero coeffs */
+                zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */
+            }
+        }
+    }
+
+    *d->eob = (char)(eob + 1);
+}
+
+void vp8_quantize_mby(MACROBLOCK *x)
+{
+    int i;
+    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+
+    for (i = 0; i < 16; i++)
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+
+    if(has_2nd_order)
+        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
+}
+
+void vp8_quantize_mb(MACROBLOCK *x)
+{
+    int i;
+    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+
+    for (i = 0; i < 24+has_2nd_order; i++)
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+}
+
+
+void vp8_quantize_mbuv(MACROBLOCK *x)
+{
+    int i;
+
+    for (i = 16; i < 24; i++)
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+}
+
+static const int qrounding_factors[129] =
+{
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48
+};
+
+
+static const int qzbin_factors[129] =
+{
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80
+};
+
+
+static const int qrounding_factors_y2[129] =
+{
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48
+};
+
+
+static const int qzbin_factors_y2[129] =
+{
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80
+};
+
+
+static void invert_quant(int improved_quant, short *quant,
+                         short *shift, short d)
+{
+    if(improved_quant)
+    {
+        unsigned t;
+        int l;
+        t = d;
+        for(l = 0; t > 1; l++)
+            t>>=1;
+        t = 1 + (1<<(16+l))/d;
+        *quant = (short)(t - (1<<16));
+        *shift = l;
+        /* use multiplication and constant shift by 16 */
+        *shift = 1 << (16 - *shift);
+    }
+    else
+    {
+        *quant = (1 << 16) / d;
+        *shift = 0;
+        /* use multiplication and constant shift by 16 */
+        *shift = 1 << (16 - *shift);
+    }
+}
+
+
+void vp8cx_init_quantizer(VP8_COMP *cpi)
+{
+    int i;
+    int quant_val;
+    int Q;
+
+    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44,
+                          44, 44};
+
+    for (Q = 0; Q < QINDEX_RANGE; Q++)
+    {
+        /* dc values */
+        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+        cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
+                     cpi->Y1quant_shift[Q] + 0, quant_val);
+        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0] = quant_val;
+        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+        cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
+                     cpi->Y2quant_shift[Q] + 0, quant_val);
+        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0] = quant_val;
+        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+        cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
+                     cpi->UVquant_shift[Q] + 0, quant_val);
+        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0] = quant_val;
+        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        /* all the ac values = ; */
+        quant_val = vp8_ac_yquant(Q);
+        cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1,
+                     cpi->Y1quant_shift[Q] + 1, quant_val);
+        cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][1] = quant_val;
+        cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+        quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+        cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1,
+                     cpi->Y2quant_shift[Q] + 1, quant_val);
+        cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][1] = quant_val;
+        cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+        quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+        cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val;
+        invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1,
+                     cpi->UVquant_shift[Q] + 1, quant_val);
+        cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][1] = quant_val;
+        cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7;
+
+        for (i = 2; i < 16; i++)
+        {
+            cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1];
+            cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1];
+            cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
+            cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
+            cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
+            cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
+                                             zbin_boost[i]) >> 7;
+
+            cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1];
+            cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1];
+            cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
+            cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
+            cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
+            cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
+                                             zbin_boost[i]) >> 7;
+
+            cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1];
+            cpi->UVquant[Q][i] = cpi->UVquant[Q][1];
+            cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
+            cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
+            cpi->UVround[Q][i] = cpi->UVround[Q][1];
+            cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
+                                             zbin_boost[i]) >> 7;
+        }
+    }
+}
+
+#define ZBIN_EXTRA_Y \
+    (( cpi->common.Y1dequant[QIndex][1] *  \
+    ( x->zbin_over_quant +  \
+      x->zbin_mode_boost +  \
+      x->act_zbin_adj ) ) >> 7)
+
+#define ZBIN_EXTRA_UV \
+    (( cpi->common.UVdequant[QIndex][1] *  \
+    ( x->zbin_over_quant +  \
+      x->zbin_mode_boost +  \
+      x->act_zbin_adj ) ) >> 7)
+
+#define ZBIN_EXTRA_Y2 \
+    (( cpi->common.Y2dequant[QIndex][1] *  \
+    ( (x->zbin_over_quant / 2) +  \
+       x->zbin_mode_boost +  \
+       x->act_zbin_adj ) ) >> 7)
+
+void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
+{
+    int i;
+    int QIndex;
+    MACROBLOCKD *xd = &x->e_mbd;
+    int zbin_extra;
+
+    /* Select the baseline MB Q index. */
+    if (xd->segmentation_enabled)
+    {
+        /* Abs Value */
+        if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
+        /* Delta Value */
+        else
+        {
+            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
+            /* Clamp to valid range */
+            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
+        }
+    }
+    else
+        QIndex = cpi->common.base_qindex;
+
+    /* This initialization should be called at least once. Use ok_to_skip to
+     * decide if it is ok to skip.
+     * Before encoding a frame, this function is always called with ok_to_skip
+     * =0, which means no skiping of calculations. The "last" values are
+     * initialized at that time.
+     */
+    if (!ok_to_skip || QIndex != x->q_index)
+    {
+
+        xd->dequant_y1_dc[0] = 1;
+        xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
+        xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
+        xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
+
+        for (i = 1; i < 16; i++)
+        {
+            xd->dequant_y1_dc[i] =
+            xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
+            xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
+            xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
+        }
+#if 1
+        /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until
+         * the quantizer code uses a passed in pointer to the dequant constants.
+         * This will also require modifications to the x86 and neon assembly.
+         * */
+        for (i = 0; i < 16; i++)
+            x->e_mbd.block[i].dequant = xd->dequant_y1;
+        for (i = 16; i < 24; i++)
+            x->e_mbd.block[i].dequant = xd->dequant_uv;
+        x->e_mbd.block[24].dequant = xd->dequant_y2;
+#endif
+
+        /* Y */
+        zbin_extra = ZBIN_EXTRA_Y;
+
+        for (i = 0; i < 16; i++)
+        {
+            x->block[i].quant = cpi->Y1quant[QIndex];
+            x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
+            x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
+            x->block[i].zbin = cpi->Y1zbin[QIndex];
+            x->block[i].round = cpi->Y1round[QIndex];
+            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
+            x->block[i].zbin_extra = (short)zbin_extra;
+        }
+
+        /* UV */
+        zbin_extra = ZBIN_EXTRA_UV;
+
+        for (i = 16; i < 24; i++)
+        {
+            x->block[i].quant = cpi->UVquant[QIndex];
+            x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
+            x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
+            x->block[i].zbin = cpi->UVzbin[QIndex];
+            x->block[i].round = cpi->UVround[QIndex];
+            x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
+            x->block[i].zbin_extra = (short)zbin_extra;
+        }
+
+        /* Y2 */
+        zbin_extra = ZBIN_EXTRA_Y2;
+
+        x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
+        x->block[24].quant = cpi->Y2quant[QIndex];
+        x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
+        x->block[24].zbin = cpi->Y2zbin[QIndex];
+        x->block[24].round = cpi->Y2round[QIndex];
+        x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
+        x->block[24].zbin_extra = (short)zbin_extra;
+
+        /* save this macroblock QIndex for vp8_update_zbin_extra() */
+        x->q_index = QIndex;
+
+        x->last_zbin_over_quant = x->zbin_over_quant;
+        x->last_zbin_mode_boost = x->zbin_mode_boost;
+        x->last_act_zbin_adj = x->act_zbin_adj;
+
+
+
+    }
+    else if(x->last_zbin_over_quant != x->zbin_over_quant
+            || x->last_zbin_mode_boost != x->zbin_mode_boost
+            || x->last_act_zbin_adj != x->act_zbin_adj)
+    {
+        /* Y */
+        zbin_extra = ZBIN_EXTRA_Y;
+
+        for (i = 0; i < 16; i++)
+            x->block[i].zbin_extra = (short)zbin_extra;
+
+        /* UV */
+        zbin_extra = ZBIN_EXTRA_UV;
+
+        for (i = 16; i < 24; i++)
+            x->block[i].zbin_extra = (short)zbin_extra;
+
+        /* Y2 */
+        zbin_extra = ZBIN_EXTRA_Y2;
+        x->block[24].zbin_extra = (short)zbin_extra;
+
+        x->last_zbin_over_quant = x->zbin_over_quant;
+        x->last_zbin_mode_boost = x->zbin_mode_boost;
+        x->last_act_zbin_adj = x->act_zbin_adj;
+    }
+}
+
+void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
+{
+    int i;
+    int QIndex = x->q_index;
+    int zbin_extra;
+
+    /* Y */
+    zbin_extra = ZBIN_EXTRA_Y;
+
+    for (i = 0; i < 16; i++)
+        x->block[i].zbin_extra = (short)zbin_extra;
+
+    /* UV */
+    zbin_extra = ZBIN_EXTRA_UV;
+
+    for (i = 16; i < 24; i++)
+        x->block[i].zbin_extra = (short)zbin_extra;
+
+    /* Y2 */
+    zbin_extra = ZBIN_EXTRA_Y2;
+    x->block[24].zbin_extra = (short)zbin_extra;
+}
+#undef ZBIN_EXTRA_Y
+#undef ZBIN_EXTRA_UV
+#undef ZBIN_EXTRA_Y2
+
+void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
+{
+    /* Clear Zbin mode boost for default case */
+    cpi->mb.zbin_mode_boost = 0;
+
+    /* MB level quantizer setup */
+    vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0);
+}
+
+
+void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
+{
+    VP8_COMMON *cm = &cpi->common;
+    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+    int update = 0;
+    int new_delta_q;
+    int new_uv_delta_q;
+    cm->base_qindex = Q;
+
+    /* if any of the delta_q values are changing update flag has to be set */
+    /* currently only y2dc_delta_q may change */
+
+    cm->y1dc_delta_q = 0;
+    cm->y2ac_delta_q = 0;
+
+    if (Q < 4)
+    {
+        new_delta_q = 4-Q;
+    }
+    else
+        new_delta_q = 0;
+
+    update |= cm->y2dc_delta_q != new_delta_q;
+    cm->y2dc_delta_q = new_delta_q;
+
+    new_uv_delta_q = 0;
+    // For screen content, lower the q value for UV channel. For now, select
+    // conservative delta; same delta for dc and ac, and decrease it with lower
+    // Q, and set to 0 below some threshold. May want to condition this in
+    // future on the variance/energy in UV channel.
+    if (cpi->oxcf.screen_content_mode && Q > 40) {
+      new_uv_delta_q = -(int)(0.15 * Q);
+      // Check range: magnitude of delta is 4 bits.
+      if (new_uv_delta_q < -15) {
+        new_uv_delta_q = -15;
+      }
+    }
+    update |= cm->uvdc_delta_q != new_uv_delta_q;
+    cm->uvdc_delta_q = new_uv_delta_q;
+    cm->uvac_delta_q = new_uv_delta_q;
+
+    /* Set Segment specific quatizers */
+    mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
+    mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
+    mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
+    mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];
+
+    /* quantizer has to be reinitialized for any delta_q changes */
+    if(update)
+        vp8cx_init_quantizer(cpi);
+
+}
--- a/vp8/encoder/x86/quantize_sse2.c
+++ /dev/null
@@ -1,228 +1,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-#include "vpx_ports/x86.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp8/encoder/block.h"
-#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
-
-#include <mmintrin.h> /* MMX */
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#define SELECT_EOB(i, z) \
-    do { \
-        short boost = *zbin_boost_ptr; \
-        int cmp = (x[z] < boost) | (y[z] == 0); \
-        zbin_boost_ptr++; \
-        if (cmp) \
-            break; \
-        qcoeff_ptr[z] = y[z]; \
-        eob = i; \
-        zbin_boost_ptr = b->zrun_zbin_boost; \
-    } while (0)
-
-void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
-{
-    char eob = 0;
-    short *zbin_boost_ptr;
-    short *qcoeff_ptr      = d->qcoeff;
-    DECLARE_ALIGNED(16, short, x[16]);
-    DECLARE_ALIGNED(16, short, y[16]);
-
-    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
-    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
-    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
-    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
-    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
-    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
-    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
-    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
-    __m128i round0 = _mm_load_si128((__m128i *)(b->round));
-    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
-    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
-    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
-    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
-    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
-
-    memset(qcoeff_ptr, 0, 32);
-
-    /* Duplicate to all lanes. */
-    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
-    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
-
-    /* Sign of z: z >> 15 */
-    sz0 = _mm_srai_epi16(z0, 15);
-    sz1 = _mm_srai_epi16(z1, 15);
-
-    /* x = abs(z): (z ^ sz) - sz */
-    x0 = _mm_xor_si128(z0, sz0);
-    x1 = _mm_xor_si128(z1, sz1);
-    x0 = _mm_sub_epi16(x0, sz0);
-    x1 = _mm_sub_epi16(x1, sz1);
-
-    /* zbin[] + zbin_extra */
-    zbin0 = _mm_add_epi16(zbin0, zbin_extra);
-    zbin1 = _mm_add_epi16(zbin1, zbin_extra);
-
-    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
-     * the equation because boost is the only value which can change:
-     * x - (zbin[] + extra) >= boost */
-    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
-    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
-
-    _mm_store_si128((__m128i *)(x), x_minus_zbin0);
-    _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
-
-    /* All the remaining calculations are valid whether they are done now with
-     * simd or later inside the loop one at a time. */
-    x0 = _mm_add_epi16(x0, round0);
-    x1 = _mm_add_epi16(x1, round1);
-
-    y0 = _mm_mulhi_epi16(x0, quant0);
-    y1 = _mm_mulhi_epi16(x1, quant1);
-
-    y0 = _mm_add_epi16(y0, x0);
-    y1 = _mm_add_epi16(y1, x1);
-
-    /* Instead of shifting each value independently we convert the scaling
-     * factor with 1 << (16 - shift) so we can use multiply/return high half. */
-    y0 = _mm_mulhi_epi16(y0, quant_shift0);
-    y1 = _mm_mulhi_epi16(y1, quant_shift1);
-
-    /* Return the sign: (y ^ sz) - sz */
-    y0 = _mm_xor_si128(y0, sz0);
-    y1 = _mm_xor_si128(y1, sz1);
-    y0 = _mm_sub_epi16(y0, sz0);
-    y1 = _mm_sub_epi16(y1, sz1);
-
-    _mm_store_si128((__m128i *)(y), y0);
-    _mm_store_si128((__m128i *)(y + 8), y1);
-
-    zbin_boost_ptr = b->zrun_zbin_boost;
-
-    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
-    SELECT_EOB(1, 0);
-    SELECT_EOB(2, 1);
-    SELECT_EOB(3, 4);
-    SELECT_EOB(4, 8);
-    SELECT_EOB(5, 5);
-    SELECT_EOB(6, 2);
-    SELECT_EOB(7, 3);
-    SELECT_EOB(8, 6);
-    SELECT_EOB(9, 9);
-    SELECT_EOB(10, 12);
-    SELECT_EOB(11, 13);
-    SELECT_EOB(12, 10);
-    SELECT_EOB(13, 7);
-    SELECT_EOB(14, 11);
-    SELECT_EOB(15, 14);
-    SELECT_EOB(16, 15);
-
-    y0 = _mm_load_si128((__m128i *)(d->qcoeff));
-    y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
-
-    /* dqcoeff = qcoeff * dequant */
-    y0 = _mm_mullo_epi16(y0, dequant0);
-    y1 = _mm_mullo_epi16(y1, dequant1);
-
-    _mm_store_si128((__m128i *)(d->dqcoeff), y0);
-    _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
-
-    *d->eob = eob;
-}
-
-void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
-{
-  __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
-  __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
-  __m128i round0 = _mm_load_si128((__m128i *)(b->round));
-  __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
-  __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
-  __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
-  __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
-  __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
-  __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
-  __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
-
-  __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
-
-  /* sign of z: z >> 15 */
-  sz0 = _mm_srai_epi16(z0, 15);
-  sz1 = _mm_srai_epi16(z1, 15);
-
-  /* x = abs(z): (z ^ sz) - sz */
-  x0 = _mm_xor_si128(z0, sz0);
-  x1 = _mm_xor_si128(z1, sz1);
-  x0 = _mm_sub_epi16(x0, sz0);
-  x1 = _mm_sub_epi16(x1, sz1);
-
-  /* x += round */
-  x0 = _mm_add_epi16(x0, round0);
-  x1 = _mm_add_epi16(x1, round1);
-
-  /* y = (x * quant) >> 16 */
-  y0 = _mm_mulhi_epi16(x0, quant_fast0);
-  y1 = _mm_mulhi_epi16(x1, quant_fast1);
-
-  /* x = abs(y) = (y ^ sz) - sz */
-  y0 = _mm_xor_si128(y0, sz0);
-  y1 = _mm_xor_si128(y1, sz1);
-  x0 = _mm_sub_epi16(y0, sz0);
-  x1 = _mm_sub_epi16(y1, sz1);
-
-  /* qcoeff = x */
-  _mm_store_si128((__m128i *)(d->qcoeff), x0);
-  _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
-
-  /* x * dequant */
-  xdq0 = _mm_mullo_epi16(x0, dequant0);
-  xdq1 = _mm_mullo_epi16(x1, dequant1);
-
-  /* dqcoeff = x * dequant */
-  _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
-  _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
-
-  /* build a mask for the zig zag */
-  zeros = _mm_setzero_si128();
-
-  x0 = _mm_cmpeq_epi16(x0, zeros);
-  x1 = _mm_cmpeq_epi16(x1, zeros);
-
-  ones = _mm_cmpeq_epi16(zeros, zeros);
-
-  x0 = _mm_xor_si128(x0, ones);
-  x1 = _mm_xor_si128(x1, ones);
-
-  x0 = _mm_and_si128(x0, inv_zig_zag0);
-  x1 = _mm_and_si128(x1, inv_zig_zag1);
-
-  x0 = _mm_max_epi16(x0, x1);
-
-  /* now down to 8 */
-  x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
-
-  x0 = _mm_max_epi16(x0, x1);
-
-  /* only 4 left */
-  x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
-
-  x0 = _mm_max_epi16(x0, x1);
-
-  /* okay, just 2! */
-  x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
-
-  x0 = _mm_max_epi16(x0, x1);
-
-  *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
-}
--- /dev/null
+++ b/vp8/encoder/x86/vp8_quantize_sse2.c
@@ -1,0 +1,228 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx_ports/x86.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp8/encoder/block.h"
+#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
+
+#include <mmintrin.h> /* MMX */
+#include <xmmintrin.h> /* SSE */
+#include <emmintrin.h> /* SSE2 */
+
+#define SELECT_EOB(i, z) \
+    do { \
+        short boost = *zbin_boost_ptr; \
+        int cmp = (x[z] < boost) | (y[z] == 0); \
+        zbin_boost_ptr++; \
+        if (cmp) \
+            break; \
+        qcoeff_ptr[z] = y[z]; \
+        eob = i; \
+        zbin_boost_ptr = b->zrun_zbin_boost; \
+    } while (0)
+
+void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+    char eob = 0;
+    short *zbin_boost_ptr;
+    short *qcoeff_ptr      = d->qcoeff;
+    DECLARE_ALIGNED(16, short, x[16]);
+    DECLARE_ALIGNED(16, short, y[16]);
+
+    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
+    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
+    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
+    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
+    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
+    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
+    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
+    __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
+    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
+    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+
+    memset(qcoeff_ptr, 0, 32);
+
+    /* Duplicate to all lanes. */
+    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
+    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
+
+    /* Sign of z: z >> 15 */
+    sz0 = _mm_srai_epi16(z0, 15);
+    sz1 = _mm_srai_epi16(z1, 15);
+
+    /* x = abs(z): (z ^ sz) - sz */
+    x0 = _mm_xor_si128(z0, sz0);
+    x1 = _mm_xor_si128(z1, sz1);
+    x0 = _mm_sub_epi16(x0, sz0);
+    x1 = _mm_sub_epi16(x1, sz1);
+
+    /* zbin[] + zbin_extra */
+    zbin0 = _mm_add_epi16(zbin0, zbin_extra);
+    zbin1 = _mm_add_epi16(zbin1, zbin_extra);
+
+    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
+     * the equation because boost is the only value which can change:
+     * x - (zbin[] + extra) >= boost */
+    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
+    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
+
+    _mm_store_si128((__m128i *)(x), x_minus_zbin0);
+    _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
+
+    /* All the remaining calculations are valid whether they are done now with
+     * simd or later inside the loop one at a time. */
+    x0 = _mm_add_epi16(x0, round0);
+    x1 = _mm_add_epi16(x1, round1);
+
+    y0 = _mm_mulhi_epi16(x0, quant0);
+    y1 = _mm_mulhi_epi16(x1, quant1);
+
+    y0 = _mm_add_epi16(y0, x0);
+    y1 = _mm_add_epi16(y1, x1);
+
+    /* Instead of shifting each value independently we convert the scaling
+     * factor with 1 << (16 - shift) so we can use multiply/return high half. */
+    y0 = _mm_mulhi_epi16(y0, quant_shift0);
+    y1 = _mm_mulhi_epi16(y1, quant_shift1);
+
+    /* Return the sign: (y ^ sz) - sz */
+    y0 = _mm_xor_si128(y0, sz0);
+    y1 = _mm_xor_si128(y1, sz1);
+    y0 = _mm_sub_epi16(y0, sz0);
+    y1 = _mm_sub_epi16(y1, sz1);
+
+    _mm_store_si128((__m128i *)(y), y0);
+    _mm_store_si128((__m128i *)(y + 8), y1);
+
+    zbin_boost_ptr = b->zrun_zbin_boost;
+
+    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
+    SELECT_EOB(1, 0);
+    SELECT_EOB(2, 1);
+    SELECT_EOB(3, 4);
+    SELECT_EOB(4, 8);
+    SELECT_EOB(5, 5);
+    SELECT_EOB(6, 2);
+    SELECT_EOB(7, 3);
+    SELECT_EOB(8, 6);
+    SELECT_EOB(9, 9);
+    SELECT_EOB(10, 12);
+    SELECT_EOB(11, 13);
+    SELECT_EOB(12, 10);
+    SELECT_EOB(13, 7);
+    SELECT_EOB(14, 11);
+    SELECT_EOB(15, 14);
+    SELECT_EOB(16, 15);
+
+    y0 = _mm_load_si128((__m128i *)(d->qcoeff));
+    y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
+
+    /* dqcoeff = qcoeff * dequant */
+    y0 = _mm_mullo_epi16(y0, dequant0);
+    y1 = _mm_mullo_epi16(y1, dequant1);
+
+    _mm_store_si128((__m128i *)(d->dqcoeff), y0);
+    _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
+
+    *d->eob = eob;
+}
+
+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+  __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+  __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
+  __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+  __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+  __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
+  __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
+  __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+  __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+  __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
+  __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
+
+  __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
+
+  /* sign of z: z >> 15 */
+  sz0 = _mm_srai_epi16(z0, 15);
+  sz1 = _mm_srai_epi16(z1, 15);
+
+  /* x = abs(z): (z ^ sz) - sz */
+  x0 = _mm_xor_si128(z0, sz0);
+  x1 = _mm_xor_si128(z1, sz1);
+  x0 = _mm_sub_epi16(x0, sz0);
+  x1 = _mm_sub_epi16(x1, sz1);
+
+  /* x += round */
+  x0 = _mm_add_epi16(x0, round0);
+  x1 = _mm_add_epi16(x1, round1);
+
+  /* y = (x * quant) >> 16 */
+  y0 = _mm_mulhi_epi16(x0, quant_fast0);
+  y1 = _mm_mulhi_epi16(x1, quant_fast1);
+
+  /* x = abs(y) = (y ^ sz) - sz */
+  y0 = _mm_xor_si128(y0, sz0);
+  y1 = _mm_xor_si128(y1, sz1);
+  x0 = _mm_sub_epi16(y0, sz0);
+  x1 = _mm_sub_epi16(y1, sz1);
+
+  /* qcoeff = x */
+  _mm_store_si128((__m128i *)(d->qcoeff), x0);
+  _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
+
+  /* x * dequant */
+  xdq0 = _mm_mullo_epi16(x0, dequant0);
+  xdq1 = _mm_mullo_epi16(x1, dequant1);
+
+  /* dqcoeff = x * dequant */
+  _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
+  _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
+
+  /* build a mask for the zig zag */
+  zeros = _mm_setzero_si128();
+
+  x0 = _mm_cmpeq_epi16(x0, zeros);
+  x1 = _mm_cmpeq_epi16(x1, zeros);
+
+  ones = _mm_cmpeq_epi16(zeros, zeros);
+
+  x0 = _mm_xor_si128(x0, ones);
+  x1 = _mm_xor_si128(x1, ones);
+
+  x0 = _mm_and_si128(x0, inv_zig_zag0);
+  x1 = _mm_and_si128(x1, inv_zig_zag1);
+
+  x0 = _mm_max_epi16(x0, x1);
+
+  /* now down to 8 */
+  x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
+
+  x0 = _mm_max_epi16(x0, x1);
+
+  /* only 4 left */
+  x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
+
+  x0 = _mm_max_epi16(x0, x1);
+
+  /* okay, just 2! */
+  x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
+
+  x0 = _mm_max_epi16(x0, x1);
+
+  *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
+}
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -60,7 +60,7 @@
 VP8_CX_SRCS-yes += encoder/onyx_if.c
 VP8_CX_SRCS-yes += encoder/pickinter.c
 VP8_CX_SRCS-yes += encoder/picklpf.c
-VP8_CX_SRCS-yes += encoder/quantize.c
+VP8_CX_SRCS-yes += encoder/vp8_quantize.c
 VP8_CX_SRCS-yes += encoder/ratectrl.c
 VP8_CX_SRCS-yes += encoder/rdopt.c
 VP8_CX_SRCS-yes += encoder/segmentation.c
@@ -84,7 +84,7 @@
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_quantize_sse2.c
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c
 VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c