shithub: libvpx

Download patch

ref: 15255eef8251eb645b9b89f6e211663e6f22563d
parent: 4bd0f4f6464bdf46fe0040557f39110bb41561cc
author: John Koleszar <jkoleszar@google.com>
date: Wed Apr 24 10:48:17 EDT 2013

Move dequant from BLOCKD to per-plane MACROBLOCKD

This data can vary per-plane, but not per-block.

Change-Id: I1971b0b2c2e697d2118e38b54ef446e52f63c65a

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -280,7 +280,6 @@
 
 typedef struct blockd {
   int16_t *diff;
-  int16_t *dequant;
 
   /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
   uint8_t **base_pre;
@@ -335,6 +334,7 @@
   int subsampling_y;
   struct buf_2d dst;
   struct buf_2d pre[2];
+  int16_t *dequant;
 };
 
 #define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -206,12 +206,10 @@
   const int qindex = get_qindex(mb, segment_id, pc->base_qindex);
   mb->q_index = qindex;
 
-  for (i = 0; i < 16; i++)
-    mb->block[i].dequant = pc->y_dequant[qindex];
+  mb->plane[0].dequant = pc->y_dequant[qindex];
+  for (i = 1; i < MAX_MB_PLANE; i++)
+    mb->plane[i].dequant = pc->uv_dequant[qindex];
 
-  for (i = 16; i < 24; i++)
-    mb->block[i].dequant = pc->uv_dequant[qindex];
-
   if (mb->lossless) {
     assert(qindex == 0);
     mb->inv_txm4x4_1      = vp9_short_iwalsh4x4_1;
@@ -354,7 +352,8 @@
       xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
           vp9_find_bpred_context(xd, b);
       if (!xd->mode_info_context->mbmi.mb_skip_coeff)
-        vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i, b->dequant);
+        vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i,
+                             xd->plane[0].dequant);
 #endif
       vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst,
                            b->dst_stride);
@@ -363,7 +362,7 @@
     }
 #if CONFIG_NEWBINTRAMODES
     if (!xd->mode_info_context->mbmi.mb_skip_coeff)
-      vp9_decode_mb_tokens_4x4_uv(pbi, xd, r, xd->block[16].dequant);
+      vp9_decode_mb_tokens_4x4_uv(pbi, xd, r, xd->plane[1].dequant);
 #endif
     vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
     xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->plane[1].dst.buf,
@@ -596,7 +595,7 @@
       mb_init_dequantizer(pbi, xd);
 
     // dequantization and idct
-    eobtotal = vp9_decode_tokens(pbi, xd, r, bsize, xd->block[0].dequant);
+    eobtotal = vp9_decode_tokens(pbi, xd, r, bsize, xd->plane[0].dequant);
     if (eobtotal == 0) {  // skip loopfilter
       for (n = 0; n < bw * bh; n++) {
         const int x_idx = n & (bw - 1), y_idx = n >> bwl;
@@ -671,7 +670,7 @@
     if (mode != I4X4_PRED)
 #endif
       eobtotal = vp9_decode_tokens(pbi, xd, r, BLOCK_SIZE_MB16X16,
-                                   xd->block[0].dequant);
+                                   xd->plane[0].dequant);
     }
   }
 
--- a/vp9/encoder/vp9_asm_enc_offsets.c
+++ /dev/null
@@ -1,29 +1,0 @@
-/*
- *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/asm_offsets.h"
-#include "vpx_config.h"
-#include "vp9/encoder/vp9_block.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_treewriter.h"
-#include "vp9/encoder/vp9_tokenize.h"
-
-BEGIN
-
-/* regular quantize */
-DEFINE(vp9_blockd_dequant,                      offsetof(BLOCKD, dequant));
-
-END
-
-/* add asserts for any offset that is not supported by assembly code
- * add asserts for any size that is not supported by assembly code
- */
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -509,7 +509,7 @@
   for (n = 0; n < bw * bh; n++) {
     const int x_idx = n & (bw - 1), y_idx = n >> bwl;
 
-    optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+    optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
                ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh);
   }
 }
@@ -532,7 +532,7 @@
   for (n = 0; n < bw * bh; n++) {
     const int x_idx = n & (bw - 1), y_idx = n >> bwl;
 
-    optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+    optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
                ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh);
   }
 }
@@ -560,7 +560,7 @@
   for (n = 0; n < bw * bh; n++) {
     const int x_idx = n & (bw - 1), y_idx = n >> bwl;
 
-    optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+    optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
                ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh);
   }
 }
@@ -585,7 +585,7 @@
   for (n = 0; n < bw * bh; n++) {
     const int x_idx = n & (bw - 1), y_idx = n >> bwl;
 
-    optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+    optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
                ta + x_idx, tl + y_idx, TX_4X4, bh * bw);
   }
 }
@@ -599,7 +599,7 @@
 
   assert(bsize == BLOCK_SIZE_SB64X64);
   for (b = 256; b < 384; b += 64) {
-    const int cidx = b >= 320 ? 20 : 16;
+    const int plane = 1 + (b >= 320);
     a = ta + vp9_block2above_sb64[TX_32X32][b];
     l = tl + vp9_block2left_sb64[TX_32X32][b];
     a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
@@ -610,7 +610,7 @@
     l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
     a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
     l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
-    optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+    optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant,
                &a_ec, &l_ec, TX_32X32, 256);
   }
 }
@@ -638,11 +638,10 @@
   }
 
   for (plane = 0; plane < 2; plane++) {
-    const int cidx = 16 + plane * 4;
     for (n = 0; n < bw * bh; n++) {
       const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
       optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV,
-                 x->e_mbd.block[cidx].dequant,
+                 x->e_mbd.plane[plane + 1].dequant,
                  &ta[plane][x_idx], &tl[plane][y_idx],
                  TX_16X16, bh * bw * 64);
     }
@@ -671,11 +670,10 @@
   }
 
   for (plane = 0; plane < 2; plane++) {
-    const int cidx = 16 + plane * 4;
     for (n = 0; n < bw * bh; n++) {
       const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
       optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV,
-                 x->e_mbd.block[cidx].dequant,
+                 x->e_mbd.plane[plane + 1].dequant,
                  &ta[plane][x_idx], &tl[plane][y_idx],
                  TX_8X8, bh * bw * 16);
     }
@@ -708,11 +706,10 @@
   }
 
   for (plane = 0; plane < 2; plane++) {
-    const int cidx = 16 + plane * 4;
     for (n = 0; n < bw * bh; n++) {
       const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
       optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV,
-                 x->e_mbd.block[cidx].dequant,
+                 x->e_mbd.plane[plane + 1].dequant,
                  &ta[plane][x_idx], &tl[plane][y_idx],
                  TX_4X4, bh * bw * 4);
     }
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -28,7 +28,6 @@
 
 void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
   MACROBLOCKD *const xd = &mb->e_mbd;
-  BLOCKD *const d = &xd->block[0];
   int i, rc, eob;
   int zbin;
   int x, y, z, sz;
@@ -41,7 +40,7 @@
   int16_t *round_ptr       = mb->plane[0].round;
   int16_t *quant_ptr       = mb->plane[0].quant;
   uint8_t *quant_shift_ptr = mb->plane[0].quant_shift;
-  int16_t *dequant_ptr     = d->dequant;
+  int16_t *dequant_ptr     = xd->plane[0].dequant;
   int zbin_oq_value        = mb->plane[0].zbin_extra;
   const int *pt_scan = get_scan_4x4(tx_type);
 
@@ -84,7 +83,6 @@
   MACROBLOCKD *const xd = &mb->e_mbd;
   const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
   const int c_idx = plane_idx(pb_idx.plane);
-  BLOCKD *const d = &xd->block[c_idx];
   int i, rc, eob;
   int zbin;
   int x, y, z, sz;
@@ -99,7 +97,7 @@
   int16_t *round_ptr       = mb->plane[pb_idx.plane].round;
   int16_t *quant_ptr       = mb->plane[pb_idx.plane].quant;
   uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
-  int16_t *dequant_ptr     = d->dequant;
+  int16_t *dequant_ptr     = xd->plane[0].dequant;
   int zbin_oq_value        = mb->plane[pb_idx.plane].zbin_extra;
 
   if (c_idx == 0) assert(pb_idx.plane == 0);
@@ -152,7 +150,6 @@
                                       pb_idx.block, 16);
   int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
                                     pb_idx.block, 16);
-  BLOCKD *const d = &xd->block[c_idx];
   const int *pt_scan = get_scan_8x8(tx_type);
 
   if (c_idx == 0) assert(pb_idx.plane == 0);
@@ -171,7 +168,7 @@
     int16_t *round_ptr  = mb->plane[pb_idx.plane].round;
     int16_t *quant_ptr  = mb->plane[pb_idx.plane].quant;
     uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
-    int16_t *dequant_ptr = d->dequant;
+    int16_t *dequant_ptr = xd->plane[pb_idx.plane].dequant;
     int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
 
     eob = -1;
@@ -286,7 +283,6 @@
   MACROBLOCKD *const xd = &mb->e_mbd;
   const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
   const int c_idx = plane_idx(pb_idx.plane);
-  BLOCKD *const d = &xd->block[c_idx];
   const int *pt_scan = get_scan_16x16(tx_type);
 
   if (c_idx == 0) assert(pb_idx.plane == 0);
@@ -301,7 +297,7 @@
            mb->plane[pb_idx.plane].quant_shift,
            BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
            BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
-           d->dequant,
+           xd->plane[pb_idx.plane].dequant,
            mb->plane[pb_idx.plane].zbin_extra,
            &xd->plane[pb_idx.plane].eobs[pb_idx.block],
            pt_scan, 1);
@@ -311,7 +307,6 @@
   MACROBLOCKD *const xd = &mb->e_mbd;
   const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
   const int c_idx = plane_idx(pb_idx.plane);
-  BLOCKD *const d = &xd->block[c_idx];
 
   if (c_idx == 0) assert(pb_idx.plane == 0);
   if (c_idx == 16) assert(pb_idx.plane == 1);
@@ -325,7 +320,7 @@
            mb->plane[pb_idx.plane].quant_shift,
            BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
            BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
-           d->dequant,
+           xd->plane[pb_idx.plane].dequant,
            mb->plane[pb_idx.plane].zbin_extra,
            &xd->plane[pb_idx.plane].eobs[pb_idx.block],
            vp9_default_zig_zag1d_32x32, 2);
@@ -528,8 +523,7 @@
   x->plane[0].round = cpi->Y1round[qindex];
   x->plane[0].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[qindex];
   x->plane[0].zbin_extra = (int16_t)zbin_extra;
-  for (i = 0; i < 16; i++)
-    x->e_mbd.block[i].dequant = cpi->common.y_dequant[qindex];
+  x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
 
   // UV
   zbin_extra = (cpi->common.uv_dequant[qindex][1] *
@@ -542,9 +536,8 @@
     x->plane[i].round = cpi->UVround[qindex];
     x->plane[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[qindex];
     x->plane[i].zbin_extra = (int16_t)zbin_extra;
+    x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
   }
-  for (i = 16; i < 24; i++)
-    x->e_mbd.block[i].dequant = cpi->common.uv_dequant[qindex];
 
   x->skip_block = vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
 
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3050,7 +3050,7 @@
         // Hence quantizer step is also 8 times. To get effective quantizer
         // we need to divide by 8 before sending to modeling function.
         model_rd_from_var_lapndz(var, 16 * bw * 16 * bh,
-                                 xd->block[0].dequant[1] >> 3,
+                                 xd->plane[0].dequant[1] >> 3,
                                  &tmp_rate_y, &tmp_dist_y);
         var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
                                             x->plane[1].src.stride,
@@ -3058,7 +3058,7 @@
                                             xd->plane[1].dst.stride,
                                             &sse);
         model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
-                                 xd->block[16].dequant[1] >> 3,
+                                 xd->plane[1].dequant[1] >> 3,
                                  &tmp_rate_u, &tmp_dist_u);
         var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
                                             x->plane[1].src.stride,
@@ -3066,7 +3066,7 @@
                                             xd->plane[1].dst.stride,
                                             &sse);
         model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
-                                 xd->block[20].dequant[1] >> 3,
+                                 xd->plane[2].dequant[1] >> 3,
                                  &tmp_rate_v, &tmp_dist_v);
         rd = RDCOST(x->rdmult, x->rddiv,
                     rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@@ -3138,17 +3138,17 @@
         // Note our transform coeffs are 8 times an orthogonal transform.
         // Hence quantizer step is also 8 times. To get effective quantizer
         // we need to divide by 8 before sending to modeling function.
-        model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
+        model_rd_from_var_lapndz(var, 16 * 16, xd->plane[0].dequant[1] >> 3,
                                  &tmp_rate_y, &tmp_dist_y);
         var = vp9_variance8x8(x->plane[1].src.buf, x->plane[1].src.stride,
                               xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                               &sse);
-        model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
+        model_rd_from_var_lapndz(var, 8 * 8, xd->plane[1].dequant[1] >> 3,
                                  &tmp_rate_u, &tmp_dist_u);
         var = vp9_variance8x8(x->plane[2].src.buf, x->plane[1].src.stride,
                               xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                               &sse);
-        model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
+        model_rd_from_var_lapndz(var, 8 * 8, xd->plane[2].dequant[1] >> 3,
                                  &tmp_rate_v, &tmp_dist_v);
         rd = RDCOST(x->rdmult, x->rddiv,
                     rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@@ -3225,8 +3225,8 @@
     x->skip = 1;
   else if (x->encode_breakout) {
     unsigned int var, sse;
-    int threshold = (xd->block[0].dequant[1]
-                     * xd->block[0].dequant[1] >> 4);
+    int threshold = (xd->plane[0].dequant[1]
+                     * xd->plane[0].dequant[1] >> 4);
 
     if (threshold < x->encode_breakout)
       threshold = x->encode_breakout;
@@ -3244,7 +3244,7 @@
     }
 
     if ((int)sse < threshold) {
-      unsigned int q2dc = xd->block[0].dequant[0];
+      unsigned int q2dc = xd->plane[0].dequant[0];
       /* If there is no codeable 2nd order dc
          or a very small uniform pixel change change */
       if ((sse - var < q2dc * q2dc >> 4) ||
--- a/vp9/encoder/x86/vp9_quantize_sse2.asm
+++ b/vp9/encoder/x86/vp9_quantize_sse2.asm
@@ -9,7 +9,6 @@
 
 
 %include "vpx_ports/x86_abi_support.asm"
-%include "vp9_asm_enc_offsets.asm"
 
 
 ; void vp9_regular_quantize_b_sse2 | arg
--- a/vp9/encoder/x86/vp9_quantize_sse4.asm
+++ b/vp9/encoder/x86/vp9_quantize_sse4.asm
@@ -9,7 +9,6 @@
 
 
 %include "vpx_ports/x86_abi_support.asm"
-%include "vp9_asm_enc_offsets.asm"
 
 
 ; void vp9_regular_quantize_b_sse4 | arg
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -9,7 +9,6 @@
 
 
 %include "vpx_ports/x86_abi_support.asm"
-%include "vp9_asm_enc_offsets.asm"
 
 
 ; void vp9_fast_quantize_b_ssse3 | arg
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -26,7 +26,6 @@
 #INCLUDES += common
 #INCLUDES += encoder
 
-VP9_CX_SRCS-yes += encoder/vp9_asm_enc_offsets.c
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
 VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c
 VP9_CX_SRCS-yes += encoder/vp9_dct.c
@@ -117,6 +116,3 @@
 
 
 VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
-
-$(eval $(call asm_offsets_template,\
-         vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/vp9_asm_enc_offsets.c))