shithub: libvpx

Download patch

ref: 3df55cebb25d18f75f7711f84d9786ac2ce33b72
parent: a2d35b234f176316fb978d8c7099a003ddd4e8ab
author: Jingning Han <jingning@google.com>
date: Wed Jun 20 09:15:10 EDT 2018

Exploit the spatial variance in temporal dependency model

Adapt the Lagrangian multipler based on the spatial variance in
the temporal dependency model. The functionality is disabled by
default. To turn on, set enable_tpl_model to 1.

Change-Id: I1b50606d9e2c8eb9c790c49eacc12c00d3d7c211

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -92,6 +92,7 @@
   int sadperbit4;
   int rddiv;
   int rdmult;
+  int cb_rdmult;
   int mb_energy;
 
   // These are set to their default values at the beginning, and then adjusted
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1950,6 +1950,8 @@
       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
   }
 
+  if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
+
   // Find best coding mode & reconstruct the MB so it is available
   // as a predictor for MBs that follow in the SB
   if (frame_is_intra_only(cm)) {
@@ -1976,8 +1978,6 @@
     vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
   }
 
-  x->rdmult = orig_rdmult;
-
   // TODO(jingning) The rate-distortion optimization flow needs to be
   // refactored to provide proper exit/return handle.
   if (rd_cost->rate == INT_MAX)
@@ -1985,6 +1985,8 @@
   else
     rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
 
+  x->rdmult = orig_rdmult;
+
   ctx->rate = rd_cost->rate;
   ctx->dist = rd_cost->dist;
 }
@@ -2110,6 +2112,7 @@
                      PICK_MODE_CONTEXT *ctx) {
   MACROBLOCK *const x = &td->mb;
   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+  if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
   update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
 
@@ -3491,6 +3494,47 @@
 #undef FEATURES
 #undef Q_CTX
 
+int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
+                     int orig_rdmult) {
+  TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+  int tpl_stride = tpl_frame->stride;
+  int64_t intra_cost = 0;
+  int64_t mc_dep_cost = 0;
+  int mi_wide = num_8x8_blocks_wide_lookup[bsize];
+  int mi_high = num_8x8_blocks_high_lookup[bsize];
+  int row, col;
+
+  int dr = 0;
+  int count = 0;
+  double r0, rk, beta;
+
+  r0 = cpi->rd.r0;
+
+  for (row = mi_row; row < mi_row + mi_high; ++row) {
+    for (col = mi_col; col < mi_col + mi_wide; ++col) {
+      TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+
+      if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
+
+      intra_cost += this_stats->intra_cost;
+      mc_dep_cost += this_stats->mc_dep_cost;
+
+      ++count;
+    }
+  }
+
+  rk = (double)intra_cost / (intra_cost + mc_dep_cost);
+  beta = r0 / rk;
+  dr = vp9_get_adaptive_rdmult(cpi, beta);
+
+  dr = VPXMIN(dr, orig_rdmult * 5 / 4);
+  dr = VPXMAX(dr, orig_rdmult * 3 / 4);
+
+  dr = VPXMAX(1, dr);
+  return dr;
+}
+
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
@@ -3540,7 +3584,7 @@
   int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
   int must_split = 0;
 
-  int partition_mul = cpi->rd.RDMULT;
+  int partition_mul = cpi->sf.enable_tpl_model ? x->cb_rdmult : cpi->rd.RDMULT;
 
   (void)*tp_orig;
 
@@ -4056,6 +4100,14 @@
       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, td->pc_root);
     } else {
+      int orig_rdmult = cpi->rd.RDMULT;
+      x->cb_rdmult = orig_rdmult;
+      if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
+        int dr =
+            get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
+        x->cb_rdmult = dr;
+      }
+
       // If required set upper and lower partition size limits
       if (sf->auto_min_max_partition_size) {
         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
@@ -5265,6 +5317,24 @@
 
     if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
       source_var_based_partition_search_method(cpi);
+  } else if (cpi->twopass.gf_group.index) {
+    TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+    TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+    int tpl_stride = tpl_frame->stride;
+    int64_t intra_cost_base = 0;
+    int64_t mc_dep_cost_base = 0;
+    int row, col;
+
+    for (row = 0; row < cm->mi_rows; ++row) {
+      for (col = 0; col < cm->mi_cols; ++col) {
+        TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+        intra_cost_base += this_stats->intra_cost;
+        mc_dep_cost_base += this_stats->mc_dep_cost;
+      }
+    }
+
+    cpi->rd.r0 = (double)intra_cost_base / (intra_cost_base + mc_dep_cost_base);
   }
 
   {
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5428,6 +5428,16 @@
   return overlap_area = width * height;
 }
 
+int round_floor(int ref_pos) {
+  int round;
+  if (ref_pos < 0)
+    round = -(1 + (-ref_pos - 1) / MI_SIZE);
+  else
+    round = ref_pos / MI_SIZE;
+
+  return round;
+}
+
 void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
                       int mi_row, int mi_col) {
   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
@@ -5440,8 +5450,8 @@
   int ref_pos_col = mi_col * MI_SIZE + mv_col;
 
   // top-left on grid block location
-  int grid_pos_row_base = (ref_pos_row >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
-  int grid_pos_col_base = (ref_pos_col >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
+  int grid_pos_row_base = round_floor(ref_pos_row) * MI_SIZE;
+  int grid_pos_col_base = round_floor(ref_pos_col) * MI_SIZE;
   int block;
 
   for (block = 0; block < 4; ++block) {
@@ -5452,8 +5462,8 @@
         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
       int overlap_area = get_overlap_area(grid_pos_row, grid_pos_col,
                                           ref_pos_row, ref_pos_col, block);
-      int ref_mi_row = grid_pos_row >> MI_SIZE_LOG2;
-      int ref_mi_col = grid_pos_col >> MI_SIZE_LOG2;
+      int ref_mi_row = round_floor(grid_pos_row);
+      int ref_mi_col = round_floor(grid_pos_col);
 
       int64_t mc_flow = tpl_stats->mc_dep_cost -
                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -299,7 +299,7 @@
   int mi_cols;
 } TplDepFrame;
 
-#define TPL_DEP_COST_SCALE_LOG2 16
+#define TPL_DEP_COST_SCALE_LOG2 4
 
 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
 typedef struct TileDataEnc {
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -200,6 +200,38 @@
   return (int)rdmult;
 }
 
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+  const VP9_COMMON *cm = &cpi->common;
+  int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  int64_t rdmult = 0;
+  switch (cpi->common.bit_depth) {
+    case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
+    case VPX_BITS_10:
+      rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+      break;
+    default:
+      assert(cpi->common.bit_depth == VPX_BITS_12);
+      rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
+      break;
+  }
+#else
+  int64_t rdmult = (int)((88 * q * q / beta) / 24);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+    const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
+    const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+
+    rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
+    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+  }
+  if (rdmult < 1) rdmult = 1;
+  return (int)rdmult;
+}
+
 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
   double q;
 #if CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -115,6 +115,7 @@
 #endif
   int RDMULT;
   int RDDIV;
+  double r0;
 } RD_OPT;
 
 typedef struct RD_COST {
@@ -137,6 +138,8 @@
                                             int qindex);
 
 int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
+
+int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta);
 
 void vp9_initialize_rd_consts(struct VP9_COMP *cpi);