shithub: libvpx

Download patch

ref: a2d5a4a9566874aea4f32303f74cad7ca408933a
parent: e5b858740aed35a6f2eb7bf84adb993ccd1962f2
author: Marco Paniconi <marpan@google.com>
date: Fri May 18 10:50:46 EDT 2018

vp9-svc: Fix issue with reseting lst_fb_idx.

When encoding a given spatial layer and the same spatial layer
on previous superframe was dropped (or disabled due to 0 bitrate),
the lst_fb_idx for current layer is set to the buffer index that
was last updated on TL0 frame (for the same spatial layer).

This condition was to maintain proper temporal prediction pattern
under frame drops, and it should only apply to INTER frames.

But the condition was causing an assert to be triggered on spatial
layers whose base are key frames. Fix is to condition this reset of
lst_fb_idx on the "is_key_frame" flag. Also initialize the
fb_idx_upd_tl0 to -1 and only use it for a given spatial layer
if its been set.

These issues can happen when superframe drop happens just before
a key frame, or when stream starts with lower layers and dynamically
enabled higher spatial layers.

Added datarate unittest the inserts key frame after superframe drop,
and verified that this fix is needed for test to pass.
Also modified the existing DisableEnable spatial layer test to trigger
the issue of using fb_idx_upd_tl0 when it hasn't been set for a
spatial layer.

Change-Id: I059d1135736aca17e1326b9b4a2b16371eb4634e

--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -115,6 +115,8 @@
     key_frame_spacing_ = 9999;
     num_nonref_frames_ = 0;
     layer_framedrop_ = 0;
+    force_key_ = 0;
+    force_key_test_ = 0;
   }
   virtual void BeginPassHook(unsigned int /*pass*/) {}
 
@@ -268,7 +270,7 @@
     }
 
     if (dynamic_drop_layer_) {
-      if (video->frame() == 50) {
+      if (video->frame() == 0) {
         // Change layer bitrates to set top layers to 0. This will trigger skip
         // encoding/dropping of top two spatial layers.
         cfg_.rc_target_bitrate -=
@@ -278,7 +280,25 @@
         cfg_.layer_target_bitrate[1] = 0;
         cfg_.layer_target_bitrate[2] = 0;
         encoder->Config(&cfg_);
+      } else if (video->frame() == 50) {
+        // Change layer bitrates to non-zero on two top spatial layers.
+        // This will trigger skip encoding of top two spatial layers.
+        cfg_.layer_target_bitrate[1] = middle_bitrate_;
+        cfg_.layer_target_bitrate[2] = top_bitrate_;
+        cfg_.rc_target_bitrate +=
+            cfg_.layer_target_bitrate[2] + cfg_.layer_target_bitrate[1];
+        encoder->Config(&cfg_);
       } else if (video->frame() == 100) {
+        // Change layer bitrates to set top layers to 0. This will trigger skip
+        // encoding/dropping of top two spatial layers.
+        cfg_.rc_target_bitrate -=
+            (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]);
+        middle_bitrate_ = cfg_.layer_target_bitrate[1];
+        top_bitrate_ = cfg_.layer_target_bitrate[2];
+        cfg_.layer_target_bitrate[1] = 0;
+        cfg_.layer_target_bitrate[2] = 0;
+        encoder->Config(&cfg_);
+      } else if (video->frame() == 150) {
         // Change layer bitrate on second layer to non-zero to start
         // encoding it again.
         cfg_.layer_target_bitrate[1] = middle_bitrate_;
@@ -292,6 +312,12 @@
         encoder->Config(&cfg_);
       }
     }
+
+    if (force_key_test_ && force_key_)
+      frame_flags_ = VPX_EFLAG_FORCE_KF;
+    else
+      frame_flags_ = 0;
+
     const vpx_rational_t tb = video->timebase();
     timebase_ = static_cast<double>(tb.num) / tb.den;
     duration_ = 0;
@@ -369,13 +395,19 @@
     // In the constrained frame drop mode, if a given spatial is dropped all
     // upper layers must be dropped too.
     if (!layer_framedrop_) {
+      int num_layers_dropped = 0;
       for (int sl = 0; sl < number_spatial_layers_; ++sl) {
         if (!pkt->data.frame.spatial_layer_encoded[sl]) {
           // Check that all upper layers are dropped.
+          num_layers_dropped++;
           for (int sl2 = sl + 1; sl2 < number_spatial_layers_; ++sl2)
             ASSERT_EQ(pkt->data.frame.spatial_layer_encoded[sl2], 0);
         }
       }
+      if (num_layers_dropped == number_spatial_layers_ - 1)
+        force_key_ = 1;
+      else
+        force_key_ = 0;
     }
     // Keep track of number of non-reference frames, needed for mismatch check.
     // Non-reference frames are top spatial and temporal layer frames,
@@ -464,6 +496,8 @@
   int key_frame_spacing_;
   unsigned int num_nonref_frames_;
   int layer_framedrop_;
+  int force_key_;
+  int force_key_test_;
 };
 
 // Params: speed setting.
@@ -531,6 +565,53 @@
 }
 
 // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
+// 3 temporal layers, with force key frame after frame drop
+TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLForceKey) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 3;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 1;
+  cfg_.temporal_layering_mode = 3;
+  svc_params_.scaling_factor_num[0] = 72;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 144;
+  svc_params_.scaling_factor_den[1] = 288;
+  svc_params_.scaling_factor_num[2] = 288;
+  svc_params_.scaling_factor_den[2] = 288;
+  cfg_.rc_dropframe_thresh = 30;
+  cfg_.kf_max_dist = 9999;
+  number_spatial_layers_ = cfg_.ss_number_layers;
+  number_temporal_layers_ = cfg_.ts_number_layers;
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  top_sl_width_ = 640;
+  top_sl_height_ = 480;
+  cfg_.rc_target_bitrate = 100;
+  ResetModel();
+  AssignLayerBitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+                      cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+                      layer_target_avg_bandwidth_, bits_in_buffer_model_);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
+                          number_temporal_layers_, file_datarate_, 0.78, 1.25);
+#if CONFIG_VP9_DECODER
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
 // 3 temporal layers. Run CIF clip with 1 thread.
 TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TL) {
   cfg_.rc_buf_initial_sz = 500;
@@ -714,9 +795,9 @@
                       cfg_.ts_number_layers, cfg_.temporal_layering_mode,
                       layer_target_avg_bandwidth_, bits_in_buffer_model_);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  // Don't check rate targeting on top spatial layer since it will be skipped
-  // for part of the sequence.
-  CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
+  // Don't check rate targeting on two top spatial layer since they will be
+  // skipped for part of the sequence.
+  CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 2,
                           number_temporal_layers_, file_datarate_, 0.78, 1.15);
 #if CONFIG_VP9_DECODER
   // The non-reference frames are expected to be mismatched frames as the
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -55,6 +55,7 @@
     svc->downsample_filter_type[sl] = BILINEAR;
     svc->downsample_filter_phase[sl] = 8;  // Set to 8 for averaging filter.
     svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
+    svc->fb_idx_upd_tl0[sl] = -1;
   }
 
   if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
@@ -790,7 +791,9 @@
   if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0;
 
   if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
-      cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id]) {
+      cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] &&
+      cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] != -1 &&
+      !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
     // For fixed/non-flexible mode, if the previous frame (same spatial layer
     // from previous superframe) was dropped, make sure the lst_fb_idx
     // for this frame corresponds to the buffer index updated on (last) encoded
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -140,7 +140,7 @@
 
   // Keep track of the frame buffer index updated/refreshed on the base
   // temporal superframe.
-  uint8_t fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+  int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
 
   // Keep track of the spatial and temporal layer id of the frame that last
   // updated the frame buffer index.