shithub: libvpx

Download patch

ref: 238cf66eb5e376062ee13198ef537e848866b8aa
parent: 3ac2b57015000dde5731ceb9fc824e86747c5945
author: Hui Su <huisu@google.com>
date: Tue Jun 12 07:56:09 EDT 2018

Improve the partition search breakout speed feature

Use a linear model to make partition search breakout decisions.
Currently the model is tuned for large quantizers and small resolutions.
So it is only used when q-index is larger than 200 and frame
width/height is smaller than 720. Also it's not yet supported for high
bit depth.

Tested speed 1 and 2 on lowres and midres. Compression performance is
neutral. At low bitrates, encoding speedup is up to 50% for speed 1;
up to 30% for speed 2.
Some sample numbers:

into_tree_480p, speed 1
QP=60 before:  35.228 dB, 3488b/f, 7.78 fps
      now:     35.217 dB, 3475b/f, 11.57 fps
QP=50 before:  37.492 dB, 7983b/f, 6.24 fps
      now:     37.491 dB, 7974b/f, 7.55 fps

PartyScene_832x480_50, speed 1
QP=60 before:  30.104 dB, 22426b/f, 3.28 fps
      now:     30.109 dB, 22410b/f, 4.43 fps
QP=50 before:  33.016 dB, 46984b/f, 2.78 fps
      now:     33.018 dB, 46998b/f, 3.35 fps

into_tree_480p, speed 2
QP=60 before:  35.175 dB, 3506b/f, 10.96 fps
      now:     35.185 dB, 3510b/f, 13.47 fps
QP=50 before:  37.448 dB, 8016b/f, 9.04 fps
      now:     37.459 dB, 8048b/f, 9.81 fps

PartyScene_832x480_50, speed 2
QP=60 before:  30.060 dB, 22537b/f, 4.42 fps
      now:     30.061 dB, 22541b/f, 5.38 fps
QP=50 before:  32.923 dB, 47134b/f, 3.85 fps
      now:     32.920 dB, 47073b/f, 4.31 fps

Change-Id: I674cba4f027c4c65f7837d5ec9179d6201e6ba86

--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3314,6 +3314,79 @@
   return 0;
 }
 
+#define FEATURES 4
+static const float partition_breakout_weights_64[FEATURES + 1] = {
+  -0.016673f, -0.001025f, -0.000032f, 0.000833f, 1.94261885f - 2.1f,
+};
+
+static const float partition_breakout_weights_32[FEATURES + 1] = {
+  -0.010554f, -0.003081f, -0.000134f, 0.004491f, 1.68445992f - 3.5f,
+};
+
+static const float partition_breakout_weights_16[FEATURES + 1] = {
+  -0.013154f, -0.002404f, -0.000977f, 0.008450f, 2.57404566f - 5.5f,
+};
+
+static const float partition_breakout_weights_8[FEATURES + 1] = {
+  -0.011807f, -0.009873f, -0.000931f, 0.034768f, 1.32254851f - 2.0f,
+};
+
+// ML-based partition search breakout.
+static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
+                               const MACROBLOCK *const x,
+                               const RD_COST *const rd_cost) {
+  DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
+  float features[FEATURES];
+  const float *linear_weights = NULL;  // Linear model weights.
+  float linear_score = 0.0f;
+
+  switch (bsize) {
+    case BLOCK_64X64: linear_weights = partition_breakout_weights_64; break;
+    case BLOCK_32X32: linear_weights = partition_breakout_weights_32; break;
+    case BLOCK_16X16: linear_weights = partition_breakout_weights_16; break;
+    case BLOCK_8X8: linear_weights = partition_breakout_weights_8; break;
+    default: assert(0 && "Unexpected block size."); return 0;
+  }
+  if (!linear_weights) return 0;
+
+  {  // Generate feature values.
+    const VP9_COMMON *const cm = &cpi->common;
+    const int ac_q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth);
+    const int num_pels_log2 = num_pels_log2_lookup[bsize];
+    int feature_index = 0;
+    unsigned int var, sse;
+    float rate_f, dist_f;
+
+    var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+                                vp9_64_zeros, 0, &sse);
+    var = var >> num_pels_log2;
+
+    vpx_clear_system_state();
+
+    rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX);
+    dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2);
+    rate_f =
+        ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
+        rate_f;
+
+    features[feature_index++] = rate_f;
+    features[feature_index++] = dist_f;
+    features[feature_index++] = (float)var;
+    features[feature_index++] = (float)ac_q;
+    assert(feature_index == FEATURES);
+  }
+
+  {  // Calculate the output score.
+    int i;
+    linear_score = linear_weights[FEATURES];
+    for (i = 0; i < FEATURES; ++i)
+      linear_score += linear_weights[i] * features[i];
+  }
+
+  return linear_score >= 0;
+}
+#undef FEATURES
+
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
@@ -3499,12 +3572,27 @@
           // If all y, u, v transform blocks in this partition are skippable,
           // and the dist & rate are within the thresholds, the partition search
           // is terminated for current branch of the partition search tree.
-          if (!x->e_mbd.lossless && ctx->skippable &&
-              ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
-               (best_rdc.dist < dist_breakout_thr &&
-                best_rdc.rate < rate_breakout_thr))) {
-            do_split = 0;
-            do_rect = 0;
+          if (!x->e_mbd.lossless && ctx->skippable) {
+            int use_ml_based_breakout =
+                cpi->sf.use_ml_partition_search_breakout &&
+                cm->base_qindex >= 200;
+#if CONFIG_VP9_HIGHBITDEPTH
+            if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+              use_ml_based_breakout = 0;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+            if (use_ml_based_breakout) {
+              if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
+                do_split = 0;
+                do_rect = 0;
+              }
+            } else {
+              if ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+                  (best_rdc.dist < dist_breakout_thr &&
+                   best_rdc.rate < rate_breakout_thr)) {
+                do_split = 0;
+                do_rect = 0;
+              }
+            }
           }
         } else {
           // Currently, the machine-learning based partition search early
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -82,6 +82,7 @@
     } else {
       sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
       sf->partition_search_breakout_thr.dist = (1 << 21);
+      sf->use_ml_partition_search_breakout = 1;
     }
   }
 
@@ -112,6 +113,7 @@
   }
 
   if (speed >= 3) {
+    sf->use_ml_partition_search_breakout = 0;
     if (VPXMIN(cm->width, cm->height) >= 720) {
       sf->disable_split_mask = DISABLE_ALL_SPLIT;
       sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
@@ -712,6 +714,7 @@
   sf->partition_search_breakout_thr.dist = (1 << 19);
   sf->partition_search_breakout_thr.rate = 80;
   sf->ml_partition_search_early_termination = 0;
+  sf->use_ml_partition_search_breakout = 0;
 
   if (oxcf->mode == REALTIME) {
     set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -467,6 +467,9 @@
   // Partition search early breakout thresholds.
   PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr;
 
+  // Use ML-based partition search early breakout.
+  int use_ml_partition_search_breakout;
+
   // Machine-learning based partition search early termination
   int ml_partition_search_early_termination;