shithub: libvpx

Download patch

ref: 635ae8bdc1f5f1fe9e94c2f14144ad4c8750b627
parent: bf7a02a4cf0ed7f34c92fc3f974d94487cf44c02
author: paulwilkins <paulwilkins@google.com>
date: Thu Aug 18 10:15:25 EDT 2016

Adjust  coefficient optimization  and tx_domain rd speed features.

Previously Tx domain rd was used in all cases above speed 0.
Coefficient optimization was only enabled for best and speed 0.

This patch selectively sets these features at other speed settings
based on block complexity.

For the Netflix and HD sets in particular the quality gains are
large compared to the speed hit. At speed 1 the average psnr
gain in the NF set  is > 2.5% with one clip coming in at 18%
and some points almost 30%.  Average gains for the lower
resolution test sets are around 1%.

The gains are biggest at low Q so some further optimization
may be possible.

Change-Id: I340376c7b2a78e5389a34b7ebdc41072808d0576

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -71,6 +71,8 @@
   int skip_recode;
   int skip_optimize;
   int q_index;
+  int block_qcoeff_opt;
+  int block_tx_domain;
 
   // The equivalent error at the current rdmult of one whole bit (not one
   // bitcost unit).
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1336,6 +1336,22 @@
   // Save rdmult before it might be changed, so it can be restored later.
   orig_rdmult = x->rdmult;
 
+  if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
+    double logvar = vp9_log_block_var(cpi, x, bsize);
+    // Check block complexity as part of descision on using pixel or transform
+    // domain distortion in rd tests.
+    x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
+                         (logvar >= cpi->sf.tx_domain_thresh);
+
+    // Check block complexity as part of descision on using quantized
+    // coefficient optimisation inside the rd loop.
+    x->block_qcoeff_opt =
+        cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
+  } else {
+    x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
+    x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
+  }
+
   if (aq_mode == VARIANCE_AQ) {
     const int energy =
         bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -535,7 +535,7 @@
   const struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
 
-  if (cpi->sf.txfm_domain_distortion) {
+  if (x->block_tx_domain) {
     const int ss_txfrm_size = tx_size << 1;
     int64_t this_sse;
     const int shift = tx_size == TX_32X32 ? 0 : 2;
@@ -663,11 +663,11 @@
   if (args->exit_early) return;
 
   if (!is_inter_block(mi)) {
-    struct encode_b_args intra_arg = { x, args->cpi->sf.quant_coeff_opt,
-                                       args->t_above, args->t_left, &mi->skip };
+    struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above,
+                                       args->t_left, &mi->skip };
     vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
                            &intra_arg);
-    if (args->cpi->sf.txfm_domain_distortion) {
+    if (x->block_tx_domain) {
       dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
                  tx_size, &dist, &sse);
     } else {
@@ -697,7 +697,7 @@
         SKIP_TXFM_NONE) {
       // full forward transform and quantization
       vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
-      if (args->cpi->sf.quant_coeff_opt)
+      if (x->block_qcoeff_opt)
         vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
       dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
                  tx_size, &dist, &sse);
@@ -731,7 +731,7 @@
   } else {
     // full forward transform and quantization
     vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
-    if (args->cpi->sf.quant_coeff_opt)
+    if (x->block_qcoeff_opt)
       vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
     dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
                tx_size, &dist, &sse);
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -137,6 +137,9 @@
   }
 }
 
+static double tx_dom_thresholds[6] = { 99.0, 14.0, 12.0, 8.0, 4.0, 0.0 };
+static double qopt_thresholds[6] = { 99.0, 12.0, 10.0, 4.0, 2.0, 0.0 };
+
 static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
                                    SPEED_FEATURES *sf, int speed) {
   const int boosted = frame_is_boosted(cpi);
@@ -151,14 +154,24 @@
   sf->use_square_only_threshold = BLOCK_16X16;
 
   if (speed >= 1) {
-    if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
-        vp9_internal_image_edge(cpi)) {
-      sf->use_square_partition_only = !frame_is_boosted(cpi);
+    if (cpi->oxcf.pass == 2) {
+      TWO_PASS *const twopass = &cpi->twopass;
+      if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
+          vp9_internal_image_edge(cpi)) {
+        sf->use_square_partition_only = !frame_is_boosted(cpi);
+      } else {
+        sf->use_square_partition_only = !frame_is_intra_only(cm);
+      }
     } else {
       sf->use_square_partition_only = !frame_is_intra_only(cm);
     }
-    sf->use_square_only_threshold = BLOCK_4X4;
 
+    sf->allow_txfm_domain_distortion = 1;
+    sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
+    sf->allow_quant_coeff_opt = sf->optimize_coefficients;
+    sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
+
+    sf->use_square_only_threshold = BLOCK_4X4;
     sf->less_rectangular_check = 1;
 
     sf->use_rd_breakout = 1;
@@ -174,8 +187,6 @@
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->txfm_domain_distortion = 1;
-    sf->quant_coeff_opt = 0;
   }
 
   if (speed >= 2) {
@@ -294,8 +305,10 @@
   sf->exhaustive_searches_thresh = INT_MAX;
 
   if (speed >= 1) {
-    sf->txfm_domain_distortion = 1;
-    sf->quant_coeff_opt = 0;
+    sf->allow_txfm_domain_distortion = 1;
+    sf->tx_domain_thresh = 0.0;
+    sf->allow_quant_coeff_opt = 0;
+    sf->quant_opt_thresh = 0.0;
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check = 1;
     sf->tx_size_search_method =
@@ -566,8 +579,10 @@
   sf->disable_filter_search_var_thresh = 0;
   sf->adaptive_interp_filter_search = 0;
   sf->allow_partition_search_skip = 0;
-  sf->txfm_domain_distortion = 0;
-  sf->quant_coeff_opt = sf->optimize_coefficients;
+  sf->allow_txfm_domain_distortion = 0;
+  sf->tx_domain_thresh = 99.0;
+  sf->allow_quant_coeff_opt = sf->optimize_coefficients;
+  sf->quant_opt_thresh = 99.0;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -239,11 +239,13 @@
   int coeff_prob_appx_step;
 
   // Enable uniform quantizer followed by trellis coefficient optimization
-  int quant_coeff_opt;
+  int allow_quant_coeff_opt;
+  double quant_opt_thresh;
 
   // Use transform domain distortion. Use pixel domain distortion in speed 0
   // and certain situations in higher speed to improve the RD model precision.
-  int txfm_domain_distortion;
+  int allow_txfm_domain_distortion;
+  double tx_domain_thresh;
 
   // The threshold is to determine how slow the motino is, it is used when
   // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION