shithub: libvpx

--- a/vp9/common/vp9_blockd.h

+++ b/vp9/common/vp9_blockd.h

@@ -199,7 +199,6 @@

 struct macroblockd_plane {

   int16_t *dqcoeff;

-  uint16_t *eobs;

   PLANE_TYPE plane_type;

   int subsampling_x;

   int subsampling_y;

--- a/vp9/decoder/vp9_decodeframe.c

+++ b/vp9/decoder/vp9_decodeframe.c

@@ -42,7 +42,6 @@

   vp9_reader bit_reader;

   DECLARE_ALIGNED(16, MACROBLOCKD, xd);

   DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);

-  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);

 } TileWorkerData;

 static int read_be32(const uint8_t *p) {

@@ -238,9 +237,9 @@

 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,

-                                    TX_SIZE tx_size, uint8_t *dst, int stride) {

+                                    TX_SIZE tx_size, uint8_t *dst, int stride,

+                                    int eob) {

   struct macroblockd_plane *const pd = &xd->plane[plane];

-  const int eob = pd->eobs[block];

   if (eob > 0) {

     TX_TYPE tx_type;

     const int plane_type = pd->plane_type;

@@ -313,9 +312,11 @@

                           dst, pd->dst.stride, dst, pd->dst.stride);

   if (!mi->mbmi.skip_coeff) {

-    vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size,

-                            args->r);

-    inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride);

+    const int eob = vp9_decode_block_tokens(cm, xd, plane, block,

+                                            plane_bsize, x, y, tx_size,

+                                            args->r);

+    inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,

+                            eob);

@@ -333,14 +334,14 @@

   VP9_COMMON *const cm = args->cm;

   MACROBLOCKD *const xd = args->xd;

   struct macroblockd_plane *const pd = &xd->plane[plane];

-  int x, y;

+  int x, y, eob;

   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);

-  *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,

-                                             plane_bsize, x, y, tx_size,

-                                             args->r);

+  eob = vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y,

+                                tx_size, args->r);

   inverse_transform_block(xd, plane, block, tx_size,

                           &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],

-                          pd->dst.stride);

+                          pd->dst.stride, eob);

+  *args->eobtotal += eob;

 static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,

@@ -925,7 +926,6 @@

   for (i = 0; i < MAX_MB_PLANE; ++i) {

     pd[i].dqcoeff = tile_data->dqcoeff[i];

-    pd[i].eobs    = tile_data->eobs[i];

     vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));

--- a/vp9/decoder/vp9_detokenize.c

+++ b/vp9/decoder/vp9_detokenize.c

@@ -213,7 +213,6 @@

                                BLOCK_OFFSET(pd->dqcoeff, block), tx_size,

                                pd->dequant, pt);

   set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);

-  pd->eobs[block] = eob;

   return eob;

--- a/vp9/decoder/vp9_onyxd_if.c

+++ b/vp9/decoder/vp9_onyxd_if.c

@@ -112,10 +112,8 @@

   struct macroblockd_plane *const pd = xd->plane;

   int i;

-  for (i = 0; i < MAX_MB_PLANE; ++i) {

+  for (i = 0; i < MAX_MB_PLANE; ++i)

     pd[i].dqcoeff = pbi->dqcoeff[i];

-    pd[i].eobs    = pbi->eobs[i];

-  }

 VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {

--- a/vp9/decoder/vp9_onyxd_int.h

+++ b/vp9/decoder/vp9_onyxd_int.h

@@ -23,7 +23,6 @@

   DECLARE_ALIGNED(16, VP9_COMMON, common);

   DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);

-  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);

   VP9D_CONFIG oxcf;

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -71,6 +71,7 @@

   DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);

   int16_t *qcoeff;

   int16_t *coeff;

+  uint16_t *eobs;

   struct buf_2d src;

   // Quantizer setings

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -441,7 +441,7 @@

     p[i].coeff = ctx->coeff_pbuf[i][1];

     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];

     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];

-    pd[i].eobs = ctx->eobs_pbuf[i][1];

+    p[i].eobs = ctx->eobs_pbuf[i][1];

   for (i = max_plane; i < MAX_MB_PLANE; ++i) {

@@ -448,7 +448,7 @@

     p[i].coeff = ctx->coeff_pbuf[i][2];

     p[i].qcoeff = ctx->qcoeff_pbuf[i][2];

     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];

-    pd[i].eobs = ctx->eobs_pbuf[i][2];

+    p[i].eobs = ctx->eobs_pbuf[i][2];

   // Restore the coding context of the MB to that that was in place

@@ -677,7 +677,7 @@

     p[i].coeff = ctx->coeff_pbuf[i][0];

     p[i].qcoeff = ctx->qcoeff_pbuf[i][0];

     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];

-    pd[i].eobs = ctx->eobs_pbuf[i][0];

+    p[i].eobs = ctx->eobs_pbuf[i][0];

   ctx->is_coded = 0;

   x->skip_recode = 0;

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -144,7 +144,7 @@

   const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block);

   int16_t *qcoeff_ptr;

   int16_t *dqcoeff_ptr;

-  int eob = pd->eobs[block], final_eob, sz = 0;

+  int eob = p->eobs[block], final_eob, sz = 0;

   const int i0 = 0;

   int rc, x, next, i;

   int64_t rdmult, rddiv, rd_cost0, rd_cost1;

@@ -334,7 +334,7 @@

   final_eob++;

-  xd->plane[plane].eobs[block] = final_eob;

+  mb->plane[plane].eobs[block] = final_eob;

   *a = *l = (final_eob > 0);

@@ -372,7 +372,7 @@

   int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);

   int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

   const scan_order *so;

-  uint16_t *eob = &pd->eobs[block];

+  uint16_t *eob = &p->eobs[block];

   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];

   int i, j;

   int16_t *src_diff;

@@ -423,6 +423,7 @@

   MACROBLOCK *const x = args->x;

   MACROBLOCKD *const xd = &x->e_mbd;

   struct optimize_ctx *const ctx = args->ctx;

+  struct macroblock_plane *const p = &x->plane[plane];

   struct macroblockd_plane *const pd = &xd->plane[plane];

   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

   int i, j;

@@ -433,7 +434,7 @@

   // TODO(jingning): per transformed block zero forcing only enabled for

   // luma component. will integrate chroma components as well.

   if (x->zcoeff_blk[tx_size][block] && plane == 0) {

-    pd->eobs[block] = 0;

+    p->eobs[block] = 0;

     ctx->ta[plane][i] = 0;

     ctx->tl[plane][j] = 0;

     return;

@@ -445,28 +446,28 @@

   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {

     vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);

   } else {

-    ctx->ta[plane][i] = pd->eobs[block] > 0;

-    ctx->tl[plane][j] = pd->eobs[block] > 0;

+    ctx->ta[plane][i] = p->eobs[block] > 0;

+    ctx->tl[plane][j] = p->eobs[block] > 0;

-  if (x->skip_encode || pd->eobs[block] == 0)

+  if (x->skip_encode || p->eobs[block] == 0)

     return;

   switch (tx_size) {

     case TX_32X32:

-      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

+      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);

       break;

     case TX_16X16:

-      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

+      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);

       break;

     case TX_8X8:

-      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

+      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);

       break;

     case TX_4X4:

       // this is like vp9_short_idct4x4 but has a special case around eob<=1

       // which is significant (not just an optimization) for the lossless

       // case.

-      xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

+      xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);

       break;

     default:

       assert(!"Invalid transform size");

@@ -478,6 +479,7 @@

   struct encode_b_args *const args = arg;

   MACROBLOCK *const x = args->x;

   MACROBLOCKD *const xd = &x->e_mbd;

+  struct macroblock_plane *const p = &x->plane[plane];

   struct macroblockd_plane *const pd = &xd->plane[plane];

   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

   int i, j;

@@ -487,10 +489,10 @@

   vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);

-  if (pd->eobs[block] == 0)

+  if (p->eobs[block] == 0)

     return;

-  xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);

+  xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);

 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {

@@ -540,7 +542,7 @@

   const int diff_stride = 4 * (1 << bwl);

   uint8_t *src, *dst;

   int16_t *src_diff;

-  uint16_t *eob = &pd->eobs[block];

+  uint16_t *eob = &p->eobs[block];

   int i, j;

   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);

   dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)];

--- a/vp9/encoder/vp9_firstpass.c

+++ b/vp9/encoder/vp9_firstpass.c

@@ -545,7 +545,7 @@

     p[i].coeff = ctx->coeff_pbuf[i][1];

     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];

     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];

-    pd[i].eobs = ctx->eobs_pbuf[i][1];

+    p[i].eobs = ctx->eobs_pbuf[i][1];

   x->skip_recode = 0;

--- a/vp9/encoder/vp9_quantize.c

+++ b/vp9/encoder/vp9_quantize.c

@@ -148,7 +148,7 @@

            p->zbin, p->round, p->quant, p->quant_shift,

            BLOCK_OFFSET(p->qcoeff, block),

            BLOCK_OFFSET(pd->dqcoeff, block),

-           pd->dequant, p->zbin_extra, &pd->eobs[block], scan, iscan);

+           pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan);

 static void invert_quant(int16_t *quant, int16_t *shift, int d) {

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -525,7 +525,7 @@

   struct macroblockd_plane *pd = &xd->plane[plane];

   const PLANE_TYPE type = pd->plane_type;

   const int16_t *band_count = &band_counts[tx_size][1];

-  const int eob = pd->eobs[block];

+  const int eob = p->eobs[block];

   const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);

   const int ref = mbmi->ref_frame[0] != INTRA_FRAME;

   unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =

@@ -643,7 +643,7 @@

   // TODO(jingning): temporarily enabled only for luma component

   rd = MIN(rd1, rd2);

   if (plane == 0)

-    x->zcoeff_blk[tx_size][block] = !xd->plane[plane].eobs[block] ||

+    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||

                                     (rd1 > rd2 && !xd->lossless);

   args->this_rate += args->rate;

@@ -739,7 +739,7 @@

     *distortion = rd_stack->this_dist;

     *rate       = rd_stack->this_rate;

     *sse        = rd_stack->this_sse;

-    *skippable  = vp9_is_skippable_in_plane(xd, bsize, plane);

+    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);

@@ -1329,7 +1329,7 @@

           p[i].coeff    = ctx->coeff_pbuf[i][2];

           p[i].qcoeff   = ctx->qcoeff_pbuf[i][2];

           pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];

-          pd[i].eobs    = ctx->eobs_pbuf[i][2];

+          p[i].eobs    = ctx->eobs_pbuf[i][2];

           ctx->coeff_pbuf[i][2]   = ctx->coeff_pbuf[i][0];

           ctx->qcoeff_pbuf[i][2]  = ctx->qcoeff_pbuf[i][0];

@@ -1339,7 +1339,7 @@

           ctx->coeff_pbuf[i][0]   = p[i].coeff;

           ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;

           ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;

-          ctx->eobs_pbuf[i][0]    = pd[i].eobs;

+          ctx->eobs_pbuf[i][0]    = p[i].eobs;

@@ -1630,6 +1630,7 @@

   MB_PREDICTION_MODE this_mode;

   MODE_INFO *mi = x->e_mbd.mi_8x8[0];

   MB_MODE_INFO *const mbmi = &mi->mbmi;

+  struct macroblock_plane *const p = &x->plane[0];

   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];

   const int label_count = 4;

   int64_t this_segment_rd = 0;

@@ -1958,11 +1959,11 @@

           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,

                                             bsi->rdstat[i][mode_idx].brate, 0);

           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;

-          bsi->rdstat[i][mode_idx].eobs = pd->eobs[i];

+          bsi->rdstat[i][mode_idx].eobs = p->eobs[i];

           if (num_4x4_blocks_wide > 1)

-            bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1];

+            bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];

           if (num_4x4_blocks_high > 1)

-            bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2];

+            bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];

         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {

@@ -2060,7 +2061,7 @@

     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;

     if (has_second_ref(mbmi))

       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;

-    xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;

+    x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;

     mi->bmi[i].as_mode = bsi->modes[i];

@@ -2070,7 +2071,7 @@

   *returntotrate = bsi->r;

   *returndistortion = bsi->d;

   *returnyrate = bsi->segment_yrate;

-  *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);

+  *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);

   *psse = bsi->sse;

   mbmi->mode = bsi->modes[3];

@@ -3005,7 +3006,7 @@

     p[i].coeff    = ctx->coeff_pbuf[i][1];

     p[i].qcoeff  = ctx->qcoeff_pbuf[i][1];

     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];

-    pd[i].eobs    = ctx->eobs_pbuf[i][1];

+    p[i].eobs    = ctx->eobs_pbuf[i][1];

     ctx->coeff_pbuf[i][1]   = ctx->coeff_pbuf[i][0];

     ctx->qcoeff_pbuf[i][1]  = ctx->qcoeff_pbuf[i][0];

@@ -3015,7 +3016,7 @@

     ctx->coeff_pbuf[i][0]   = p[i].coeff;

     ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;

     ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;

-    ctx->eobs_pbuf[i][0]    = pd[i].eobs;

+    ctx->eobs_pbuf[i][0]    = p[i].eobs;

@@ -4133,7 +4134,7 @@

               tmp_best_mbmode = *mbmi;

               for (i = 0; i < 4; i++) {

                 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];

-                x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i];

+                x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];

               pred_exists = 1;

               if (switchable_filter_index == 0 &&

--- a/vp9/encoder/vp9_tokenize.c

+++ b/vp9/encoder/vp9_tokenize.c

@@ -168,10 +168,11 @@

                                   TX_SIZE tx_size, void *arg) {

   struct tokenize_b_args* const args = arg;

   MACROBLOCKD *const xd = args->xd;

+  struct macroblock_plane *p = &args->cpi->mb.plane[plane];

   struct macroblockd_plane *pd = &xd->plane[plane];

   int aoff, loff;

   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);

-  set_contexts(xd, pd, plane_bsize, tx_size, pd->eobs[block] > 0, aoff, loff);

+  set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, aoff, loff);

 static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,

@@ -181,16 +182,15 @@

   MACROBLOCKD *xd = args->xd;

   TOKENEXTRA **tp = args->tp;

   uint8_t *token_cache = args->token_cache;

+  struct macroblock_plane *p = &cpi->mb.plane[plane];

   struct macroblockd_plane *pd = &xd->plane[plane];

   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;

   int pt; /* near block/prev token context index */

   int c = 0, rc = 0;

   TOKENEXTRA *t = *tp;        /* store tokens starting here */

-  const int eob = pd->eobs[block];

+  const int eob = p->eobs[block];

   const PLANE_TYPE type = pd->plane_type;

-  struct macroblock_plane *p = &cpi->mb.plane[plane];

   const int16_t *qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);

   const int segment_id = mbmi->segment_id;

   const int16_t *scan, *nb;

   const scan_order *so;

@@ -249,7 +249,7 @@

 struct is_skippable_args {

-  MACROBLOCKD *xd;

+  MACROBLOCK *x;

   int *skippable;

};

@@ -257,21 +257,21 @@

                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size,

                          void *argv) {

   struct is_skippable_args *args = argv;

-  args->skippable[0] &= (!args->xd->plane[plane].eobs[block]);

+  args->skippable[0] &= (!args->x->plane[plane].eobs[block]);

-int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize) {

+static int sb_is_skippable(MACROBLOCK *x, BLOCK_SIZE bsize) {

   int result = 1;

-  struct is_skippable_args args = {xd, &result};

-  foreach_transformed_block(xd, bsize, is_skippable, &args);

+  struct is_skippable_args args = {x, &result};

+  foreach_transformed_block(&x->e_mbd, bsize, is_skippable, &args);

   return result;

-int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize,

-                              int plane) {

+int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {

   int result = 1;

-  struct is_skippable_args args = {xd, &result};

-  foreach_transformed_block_in_plane(xd, bsize, plane, is_skippable, &args);

+  struct is_skippable_args args = {x, &result};

+  foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable,

+                                     &args);

   return result;

@@ -286,7 +286,7 @@

                                               SEG_LVL_SKIP);

   struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};

-  mbmi->skip_coeff = vp9_sb_is_skippable(xd, bsize);

+  mbmi->skip_coeff = sb_is_skippable(&cpi->mb, bsize);

   if (mbmi->skip_coeff) {

     if (!dry_run)

       cm->counts.mbskip[mb_skip_context][1] += skip_inc;

--- a/vp9/encoder/vp9_tokenize.h

+++ b/vp9/encoder/vp9_tokenize.h

@@ -32,9 +32,8 @@

 extern const vp9_tree_index vp9_coef_con_tree[];

 extern struct vp9_token vp9_coef_encodings[];

-int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize);

-int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize,

-                              int plane);

+int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);

 struct VP9_COMP;

 void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,

--

⑨