shithub: libvpx

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -34,6 +34,7 @@

 typedef struct {

   MODE_INFO mic;

   PARTITION_INFO partition_info;

+  unsigned char zcoeff_blk[256];

   int skip;

   int_mv best_ref_mv;

   int_mv second_best_ref_mv;

@@ -136,6 +137,7 @@

   int mv_row_min;

   int mv_row_max;

+  unsigned char zcoeff_blk[TX_SIZES][256];

   int skip;

   int encode_breakout;

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -390,6 +390,9 @@

   x->skip = ctx->skip;

+  vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,

+             sizeof(ctx->zcoeff_blk));

   if (!output_enabled)

     return;

@@ -2743,7 +2746,6 @@

                      &xd->scale_factor[0]);

     setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,

                      &xd->scale_factor[1]);

     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -482,6 +482,14 @@

   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

   uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,

                                                  pd->dst.buf, pd->dst.stride);

+  // TODO(jingning): per transformed block zero forcing only enabled for

+  // luma component. will integrate chroma components as well.

+  if (x->zcoeff_blk[tx_size][block] && plane == 0) {

+    pd->eobs[block] = 0;

+    return;

+  }

   vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);

   if (x->optimize)

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -624,7 +624,12 @@

   rate_block(plane, block, plane_bsize, tx_size, args);

   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);

   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);

+  // TODO(jingning): temporarily enabled only for luma component

   rd = MIN(rd1, rd2);

+  if (plane == 0)

+    x->zcoeff_blk[tx_size][block] = rd1 > rd2;

   args->this_rate += args->rate[block];

   args->this_dist += args->dist[block];

   args->this_sse  += args->sse[block];

@@ -2234,6 +2239,9 @@

   ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];

   ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];

+  vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],

+             sizeof(ctx->zcoeff_blk));

   // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()

   // doesn't actually work this way

   memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));

@@ -3153,8 +3161,11 @@

   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;

   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;

   int best_skip2 = 0;

+  unsigned char best_zcoeff_blk[256] = { 0 };

   x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;

+  vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk));

+  vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk));

   for (i = 0; i < 4; i++) {

     int j;

@@ -3826,6 +3837,8 @@

         best_mbmode = *mbmi;

         best_skip2 = this_skip2;

         best_partition = *x->partition_info;

+        vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],

+                   sizeof(best_zcoeff_blk));

         if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)

           for (i = 0; i < 4; i++)

@@ -4020,6 +4033,9 @@

     mbmi->mv[0].as_int = xd->this_mi->bmi[3].as_mv[0].as_int;

     mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;

+  vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,

+             sizeof(best_zcoeff_blk));

   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {

     if (best_pred_rd[i] == INT64_MAX)

--

⑨