shithub: libvpx

Download patch

ref: ada6a428f0b916381c69ee284efaf39fddb7aa60
parent: 538b40699fc851957fd32a201e3b449976ecfb57
parent: 97848890a99af6604df13ec38c094a01aea40f0e
author: Marco Paniconi <marpan@google.com>
date: Thu Aug 9 22:24:43 EDT 2018

Merge "vp9: Allow for overshoot detection for non-screen CBR mode."

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4052,13 +4052,12 @@
     vp9_svc_assert_constraints_pattern(cpi);
   }
 
-  if (!cpi->sf.re_encode_overshoot_rt &&
-      cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+  // Check if this high_source_sad (scene/slide change) frame should be
+  // encoded at high/max QP, and if so, set the q and adjust some rate
+  // control parameters.
+  if (cpi->sf.overshoot_detection_rt == 1 &&
       (cpi->rc.high_source_sad ||
        (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
-    // Check if this high_source_sad (scene/slide change) frame should be
-    // encoded at high/max QP, and if so, set the q and adjust some rate
-    // control parameters.
     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
       vp9_set_quantizer(cm, q);
       vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -4087,10 +4086,11 @@
 
   vp9_encode_frame(cpi);
 
-  // Check if we should drop this frame because of high overshoot.
-  // Only for frames where high temporal-source SAD is detected.
+  // Check if we should re-encode this frame at high Q because of high
+  // overshoot based on the encoded frame size. Only for frames where
+  // high temporal-source SAD is detected.
   // For SVC: all spatial layers are checked for re-encoding.
-  if (cpi->sf.re_encode_overshoot_rt &&
+  if (cpi->sf.overshoot_detection_rt == 2 &&
       (cpi->rc.high_source_sad ||
        (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
     int frame_size = 0;
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2648,10 +2648,8 @@
     float thresh = 8.0f;
     uint32_t thresh_key = 140000;
     if (cpi->oxcf.speed <= 5) thresh_key = 240000;
-    if (cpi->oxcf.rc_mode == VPX_VBR) {
-      min_thresh = 65000;
-      thresh = 2.1f;
-    }
+    if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000;
+    if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f;
     if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) {
       const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2);
       const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2);
@@ -2822,14 +2820,16 @@
   SPEED_FEATURES *const sf = &cpi->sf;
   int thresh_qp = 7 * (rc->worst_quality >> 3);
   int thresh_rate = rc->avg_frame_bandwidth << 3;
-  // Lower rate threshold for video.
+  // Lower thresh_qp for video (more overshoot at lower Q) to be
+  // more conservative for video.
   if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
-    thresh_rate = rc->avg_frame_bandwidth << 2;
+    thresh_qp = rc->worst_quality >> 1;
   // If this decision is not based on an encoded frame size but just on
-  // scene/slide change detection (i.e., re_encode_overshoot_rt = 0), adjust the
-  // qp_thresh and skip the (frame_size > thresh_rate) condition in this case.
-  if (!sf->re_encode_overshoot_rt) thresh_qp = 3 * (rc->worst_quality >> 2);
-  if ((!sf->re_encode_overshoot_rt || frame_size > thresh_rate) &&
+  // scene/slide change detection (i.e., re_encode_overshoot_rt = 1),
+  // for now skip the (frame_size > thresh_rate) condition in this case.
+  // TODO(marpan): Use a better size/rate condition for this case and
+  // adjust thresholds.
+  if ((sf->overshoot_detection_rt == 1 || frame_size > thresh_rate) &&
       cm->base_qindex < thresh_qp) {
     double rate_correction_factor =
         cpi->rc.rate_correction_factors[INTER_NORMAL];
@@ -2846,7 +2846,7 @@
     // and the encoded frame used alot of Intra modes, then force hybrid_intra
     // encoding for the re-encode on this scene change. hybrid_intra will
     // use rd-based intra mode selection for small blocks.
-    if (sf->re_encode_overshoot_rt && frame_size > (thresh_rate << 1) &&
+    if (sf->overshoot_detection_rt == 2 && frame_size > (thresh_rate << 1) &&
         cpi->svc.spatial_layer_id == 0) {
       MODE_INFO **mi = cm->mi_grid_visible;
       int sum_intra_usage = 0;
@@ -2900,8 +2900,8 @@
         LAYER_CONTEXT *lc = &svc->layer_context[layer];
         RATE_CONTROL *lrc = &lc->rc;
         lrc->avg_frame_qindex[INTER_FRAME] = *q;
-        lrc->buffer_level = rc->optimal_buffer_level;
-        lrc->bits_off_target = rc->optimal_buffer_level;
+        lrc->buffer_level = lrc->optimal_buffer_level;
+        lrc->bits_off_target = lrc->optimal_buffer_level;
         lrc->rc_1_frame = 0;
         lrc->rc_2_frame = 0;
         lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -405,7 +405,7 @@
   sf->use_compound_nonrd_pickmode = 0;
   sf->nonrd_keyframe = 0;
   sf->svc_use_lowres_part = 0;
-  sf->re_encode_overshoot_rt = 0;
+  sf->overshoot_detection_rt = 0;
   sf->disable_16x16part_nonkey = 0;
   sf->disable_golden_ref = 0;
   sf->enable_tpl_model = 0;
@@ -570,11 +570,9 @@
     // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
     // increase in encoding time.
     if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
-    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
-        cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
-        (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
-      sf->re_encode_overshoot_rt = 1;
-    }
+    if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
+        cpi->oxcf.rc_mode == VPX_CBR)
+      sf->overshoot_detection_rt = 1;
     if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
         cm->width <= 1280 && cm->height <= 720) {
       sf->use_altref_onepass = 1;
@@ -583,7 +581,6 @@
   }
 
   if (speed >= 6) {
-    sf->re_encode_overshoot_rt = 0;
     if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) {
       sf->use_altref_onepass = 1;
       sf->use_compound_nonrd_pickmode = 1;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -542,9 +542,17 @@
   // For SVC: enables use of partition from lower spatial resolution.
   int svc_use_lowres_part;
 
-  // Enable re-encoding on scene change with potential high overshoot,
-  // for real-time encoding flow.
-  int re_encode_overshoot_rt;
+  // Flag to indicate process for handling overshoot on slide/scene change,
+  // for real-time CBR mode.
+  // 0: no reaction to rate control on a detected slide/scene change
+  // (prior to encoding the frame).
+  // 1: set to larger Q based only on the detected slide/scene change
+  // and current/past Q. No second pass encoding, so faster than option 2.
+  // 2: based on (first pass) encoded frame, if large frame size is detected
+  // then set to higher Q for second encode. This involves 2 pass encoding
+  // on slide change, so slower than 1, but more accurate for detecting
+  // overshoot.
+  int overshoot_detection_rt;
 
   // Disable partitioning of 16x16 blocks.
   int disable_16x16part_nonkey;