shithub: libvpx

Download patch

ref: 9bcd75056582cce1ee8cd2ebeb2fbace0c9d9629
parent: 24ad6925722ed092ec8b687298a92f5df17c6ced
parent: a517343ca33edebadd963485abdd1a2cacda7df6
author: Jingning Han <jingning@google.com>
date: Tue Sep 24 05:18:17 EDT 2013

Merge "Enable per transformed block zero coeffs forcing"

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -34,6 +34,7 @@
 typedef struct {
   MODE_INFO mic;
   PARTITION_INFO partition_info;
+  unsigned char zcoeff_blk[256];
   int skip;
   int_mv best_ref_mv;
   int_mv second_best_ref_mv;
@@ -136,6 +137,7 @@
   int mv_row_min;
   int mv_row_max;
 
+  unsigned char zcoeff_blk[TX_SIZES][256];
   int skip;
 
   int encode_breakout;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -390,6 +390,9 @@
   }
 
   x->skip = ctx->skip;
+  vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
+             sizeof(ctx->zcoeff_blk));
+
   if (!output_enabled)
     return;
 
@@ -2743,7 +2746,6 @@
                      &xd->scale_factor[0]);
     setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
                      &xd->scale_factor[1]);
-
 
     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
   }
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -482,6 +482,14 @@
   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
                                                  pd->dst.buf, pd->dst.stride);
+
+  // TODO(jingning): per transformed block zero forcing only enabled for
+  // luma component. will integrate chroma components as well.
+  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+    pd->eobs[block] = 0;
+    return;
+  }
+
   vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
 
   if (x->optimize)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -624,7 +624,12 @@
   rate_block(plane, block, plane_bsize, tx_size, args);
   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
+
+  // TODO(jingning): temporarily enabled only for luma component
   rd = MIN(rd1, rd2);
+  if (plane == 0)
+    x->zcoeff_blk[tx_size][block] = rd1 > rd2;
+
   args->this_rate += args->rate[block];
   args->this_dist += args->dist[block];
   args->this_sse  += args->sse[block];
@@ -2234,6 +2239,9 @@
   ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
   ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
 
+  vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],
+             sizeof(ctx->zcoeff_blk));
+
   // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
   // doesn't actually work this way
   memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
@@ -3153,8 +3161,11 @@
   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
   int best_skip2 = 0;
+  unsigned char best_zcoeff_blk[256] = { 0 };
 
   x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
+  vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk));
+  vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk));
 
   for (i = 0; i < 4; i++) {
     int j;
@@ -3826,6 +3837,8 @@
         best_mbmode = *mbmi;
         best_skip2 = this_skip2;
         best_partition = *x->partition_info;
+        vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
+                   sizeof(best_zcoeff_blk));
 
         if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)
           for (i = 0; i < 4; i++)
@@ -4020,6 +4033,9 @@
     mbmi->mv[0].as_int = xd->this_mi->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
   }
+
+  vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
+             sizeof(best_zcoeff_blk));
 
   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
     if (best_pred_rd[i] == INT64_MAX)