shithub: libvpx

Download patch

ref: 1e892e63f915de6864d64335415710eeb31eeb56
parent: c2aa1520a4610fb5cc640f55c117805fb0047502
author: Marco Paniconi <marpan@google.com>
date: Tue Mar 3 10:14:02 EST 2020

vp9-svc: Allow for dynamic resize for single layer SVC

Make internal dynamic resize work for SVC mode
when single layer SVC is running (i.e, other layers
are dropped due to 0 bitrate).

Added unittest.

Change-Id: Icf03e1f276d9c4ba2734c87c927f7881c6b0a116

--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -60,6 +60,7 @@
     memset(bits_total_, 0, sizeof(bits_total_));
     memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_));
     dynamic_drop_layer_ = false;
+    single_layer_resize_ = false;
     change_bitrate_ = false;
     last_pts_ref_ = 0;
     middle_bitrate_ = 0;
@@ -285,7 +286,7 @@
       encoder->Config(&cfg_);
     }
 
-    if (dynamic_drop_layer_) {
+    if (dynamic_drop_layer_ && !single_layer_resize_) {
       // TODO(jian): Disable AQ Mode for this test for now.
       encoder->Control(VP9E_SET_AQ_MODE, 0);
       if (video->frame() == 0) {
@@ -329,8 +330,27 @@
         cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2];
         encoder->Config(&cfg_);
       }
+    } else if (dynamic_drop_layer_ && single_layer_resize_) {
+      // Change layer bitrates to set top layers to 0. This will trigger skip
+      // encoding/dropping of top spatial layers.
+      if (video->frame() == 10) {
+        cfg_.rc_target_bitrate -=
+            (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]);
+        middle_bitrate_ = cfg_.layer_target_bitrate[1];
+        top_bitrate_ = cfg_.layer_target_bitrate[2];
+        cfg_.layer_target_bitrate[1] = 0;
+        cfg_.layer_target_bitrate[2] = 0;
+        // Set spatial layer 0 to a very low bitrate to trigger resize.
+        cfg_.layer_target_bitrate[0] = 30;
+        cfg_.rc_target_bitrate = cfg_.layer_target_bitrate[0];
+        encoder->Config(&cfg_);
+      } else if (video->frame() == 300) {
+        // Set base spatial layer to very high to go back up to original size.
+        cfg_.layer_target_bitrate[0] = 300;
+        cfg_.rc_target_bitrate = cfg_.layer_target_bitrate[0];
+        encoder->Config(&cfg_);
+      }
     }
-
     if (force_key_test_ && force_key_) frame_flags_ = VPX_EFLAG_FORCE_KF;
 
     if (insert_layer_sync_) {
@@ -483,13 +503,15 @@
         }
       }
 
-      ASSERT_EQ(pkt->data.frame.width[sl],
-                top_sl_width_ * svc_params_.scaling_factor_num[sl] /
-                    svc_params_.scaling_factor_den[sl]);
+      if (!single_layer_resize_) {
+        ASSERT_EQ(pkt->data.frame.width[sl],
+                  top_sl_width_ * svc_params_.scaling_factor_num[sl] /
+                      svc_params_.scaling_factor_den[sl]);
 
-      ASSERT_EQ(pkt->data.frame.height[sl],
-                top_sl_height_ * svc_params_.scaling_factor_num[sl] /
-                    svc_params_.scaling_factor_den[sl]);
+        ASSERT_EQ(pkt->data.frame.height[sl],
+                  top_sl_height_ * svc_params_.scaling_factor_num[sl] /
+                      svc_params_.scaling_factor_den[sl]);
+      }
     }
   }
 
@@ -525,6 +547,7 @@
   int tune_content_;
   int spatial_layer_id_;
   bool dynamic_drop_layer_;
+  bool single_layer_resize_;
   unsigned int top_sl_width_;
   unsigned int top_sl_height_;
   vpx_svc_ref_frame_config_t ref_frame_config;
@@ -781,6 +804,43 @@
   cfg_.rc_target_bitrate = 800;
   ResetModel();
   dynamic_drop_layer_ = true;
+  AssignLayerBitrates();
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Don't check rate targeting on two top spatial layer since they will be
+  // skipped for part of the sequence.
+  CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_,
+                          0.78, 1.15);
+#if CONFIG_VP9_DECODER
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames());
+#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC with 2 spatial layers and on
+// the fly switching to 1 spatial layer with dynamic resize enabled.
+// The resizer will resize the single layer down and back up again, as the
+// bitrate goes back up.
+TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL_SingleLayerResize) {
+  SetSvcConfig(2, 1);
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.g_threads = 1;
+  cfg_.temporal_layering_mode = 0;
+  cfg_.rc_dropframe_thresh = 30;
+  cfg_.kf_max_dist = 9999;
+  cfg_.rc_resize_allowed = 1;
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  top_sl_width_ = 640;
+  top_sl_height_ = 480;
+  cfg_.rc_target_bitrate = 800;
+  ResetModel();
+  dynamic_drop_layer_ = true;
+  single_layer_resize_ = true;
   AssignLayerBitrates();
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   // Don't check rate targeting on two top spatial layer since they will be
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2369,6 +2369,35 @@
 
   vp9_rc_set_frame_target(cpi, target);
   if (cm->show_frame) update_buffer_level_svc_preencode(cpi);
+
+  if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && svc->single_layer_svc == 1 &&
+      svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
+    LAYER_CONTEXT *lc = NULL;
+    cpi->resize_pending = vp9_resize_one_pass_cbr(cpi);
+    if (cpi->resize_pending) {
+      int tl, width, height;
+      // Apply the same scale to all temporal layers.
+      for (tl = 0; tl < svc->number_temporal_layers; tl++) {
+        lc = &svc->layer_context[svc->spatial_layer_id *
+                                     svc->number_temporal_layers +
+                                 tl];
+        lc->scaling_factor_num_resize =
+            cpi->resize_scale_num * lc->scaling_factor_num;
+        lc->scaling_factor_den_resize =
+            cpi->resize_scale_den * lc->scaling_factor_den;
+      }
+      // Set the size for this current temporal layer.
+      lc = &svc->layer_context[svc->spatial_layer_id *
+                                   svc->number_temporal_layers +
+                               svc->temporal_layer_id];
+      get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
+                           lc->scaling_factor_num_resize,
+                           lc->scaling_factor_den_resize, &width, &height);
+      vp9_set_size_literal(cpi, width, height);
+    }
+  } else {
+    cpi->resize_pending = 0;
+  }
 }
 
 void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
@@ -2625,7 +2654,7 @@
   int avg_qp_thr1 = 70;
   int avg_qp_thr2 = 50;
   int min_width = 180;
-  int min_height = 180;
+  int min_height = 90;
   int down_size_on = 1;
   cpi->resize_scale_num = 1;
   cpi->resize_scale_den = 1;
@@ -2635,6 +2664,7 @@
     cpi->resize_count = 0;
     return 0;
   }
+
   // Check current frame reslution to avoid generating frames smaller than
   // the minimum resolution.
   if (ONEHALFONLY_RESIZE) {
@@ -2645,8 +2675,7 @@
         (cm->width * 3 / 4 < min_width || cm->height * 3 / 4 < min_height))
       return 0;
     else if (cpi->resize_state == THREE_QUARTER &&
-             ((cpi->oxcf.width >> 1) < min_width ||
-              (cpi->oxcf.height >> 1) < min_height))
+             (cm->width * 3 / 4 < min_width || cm->height * 3 / 4 < min_height))
       down_size_on = 0;
   }
 
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -55,6 +55,7 @@
   svc->use_set_ref_frame_config = 0;
   svc->num_encoded_top_layer = 0;
   svc->simulcast_mode = 0;
+  svc->single_layer_svc = 0;
 
   for (i = 0; i < REF_FRAMES; ++i) {
     svc->fb_idx_spatial_layer_id[i] = 0xff;
@@ -193,6 +194,7 @@
   const RATE_CONTROL *const rc = &cpi->rc;
   int sl, tl, layer = 0, spatial_layer_target;
   float bitrate_alloc = 1.0;
+  int num_spatial_layers_nonzero_rate = 0;
 
   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
 
@@ -273,6 +275,17 @@
       lrc->best_quality = rc->best_quality;
     }
   }
+  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+    // Check bitrate of spatia layer.
+    layer = LAYER_IDS_TO_IDX(sl, oxcf->ts_number_layers - 1,
+                             oxcf->ts_number_layers);
+    if (oxcf->layer_target_bitrate[layer] > 0)
+      num_spatial_layers_nonzero_rate += 1;
+  }
+  if (num_spatial_layers_nonzero_rate == 1)
+    svc->single_layer_svc = 1;
+  else
+    svc->single_layer_svc = 0;
 }
 
 static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) {
@@ -751,6 +764,8 @@
   int width = 0, height = 0;
   SVC *const svc = &cpi->svc;
   LAYER_CONTEXT *lc = NULL;
+  int scaling_factor_num = 1;
+  int scaling_factor_den = 1;
   svc->skip_enhancement_layer = 0;
 
   if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF &&
@@ -888,18 +903,25 @@
     lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q);
   }
 
-  get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
-                       lc->scaling_factor_num, lc->scaling_factor_den, &width,
-                       &height);
+  if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && svc->single_layer_svc == 1 &&
+      svc->spatial_layer_id == svc->first_spatial_layer_to_encode &&
+      cpi->resize_state != ORIG) {
+    scaling_factor_num = lc->scaling_factor_num_resize;
+    scaling_factor_den = lc->scaling_factor_den_resize;
+  } else {
+    scaling_factor_num = lc->scaling_factor_num;
+    scaling_factor_den = lc->scaling_factor_den;
+  }
 
+  get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, scaling_factor_num,
+                       scaling_factor_den, &width, &height);
+
   // Use Eightap_smooth for low resolutions.
   if (width * height <= 320 * 240)
     svc->downsample_filter_type[svc->spatial_layer_id] = EIGHTTAP_SMOOTH;
   // For scale factors > 0.75, set the phase to 0 (aligns decimated pixel
   // to source pixel).
-  lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
-                           svc->temporal_layer_id];
-  if (lc->scaling_factor_num > (3 * lc->scaling_factor_den) >> 2)
+  if (scaling_factor_num > (3 * scaling_factor_den) >> 2)
     svc->downsample_filter_phase[svc->spatial_layer_id] = 0;
 
   // The usage of use_base_mv or partition_reuse assumes down-scale of 2x2.
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -47,6 +47,9 @@
   int min_q;
   int scaling_factor_num;
   int scaling_factor_den;
+  // Scaling factors used for internal resize scaling for single layer SVC.
+  int scaling_factor_num_resize;
+  int scaling_factor_den_resize;
   TWO_PASS twopass;
   vpx_fixed_buf_t rc_twopass_stats_in;
   unsigned int current_video_frame_in_layer;
@@ -192,6 +195,9 @@
 
   // Every spatial layer on a superframe whose base is key is key too.
   int simulcast_mode;
+
+  // Flag to indicate SVC is dynamically switched to a single layer.
+  int single_layer_svc;
 } SVC;
 
 struct VP9_COMP;