shithub: libvpx

Download patch

ref: 2640f25072884abb363a011268e411e46e652da5
parent: d07a5bfbf868eec93f01dfc7cb106e44fe512db9
author: Marco Paniconi <marpan@google.com>
date: Wed Mar 14 06:03:31 EDT 2018

vp9-svc: Frame dropper for SVC.

If a given spatial layer decides to drop, due to the
buffer/overshoot conditions for that layer, then drop
that current spatial layer and all spatial layers above.

In the current implementation the svc frame counter
(and hence the pattern for the non-flexible SVC case)
are updated on frame drops.

Also add last spatial layer encoded to the pkt.
This is useful for RTC applications that enable
frame dropping for SVC.

Update to the SVC datarate tests:
enabled frame dropper on all SVC datarate tests, and
made a fix to properly set the temporal_layer_id, which
works now even on frame drops.

Change-Id: If828c193f3cb6b1839803fd52fe9fbbda5b5a039

--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -1366,6 +1366,8 @@
     last_pts_ref_ = 0;
     middle_bitrate_ = 0;
     top_bitrate_ = 0;
+    superframe_count_ = -1;
+    key_frame_spacing_ = 9999;
   }
   virtual void BeginPassHook(unsigned int /*pass*/) {}
 
@@ -1450,6 +1452,17 @@
       encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
     }
 
+    superframe_count_++;
+    temporal_layer_id_ = 0;
+    if (number_temporal_layers_ == 2)
+      temporal_layer_id_ = (superframe_count_ % 2 != 0);
+    else if (number_temporal_layers_ == 3) {
+      if (superframe_count_ % 2 != 0) temporal_layer_id_ = 2;
+      if (superframe_count_ > 1) {
+        if ((superframe_count_ - 2) % 4 == 0) temporal_layer_id_ = 1;
+      }
+    }
+
     if (update_pattern_ && video->frame() >= 100) {
       vpx_svc_layer_id_t layer_id;
       if (video->frame() == 100) {
@@ -1459,6 +1472,7 @@
       // Set layer id since the pattern changed.
       layer_id.spatial_layer_id = 0;
       layer_id.temporal_layer_id = (video->frame() % 2 != 0);
+      temporal_layer_id_ = layer_id.temporal_layer_id;
       encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
       set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
                                   number_spatial_layers_, 0, &ref_frame_config);
@@ -1528,14 +1542,7 @@
     duration_ = 0;
   }
 
-  virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
-    vpx_svc_layer_id_t layer_id;
-    encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
-    spatial_layer_id_ = layer_id.spatial_layer_id;
-    temporal_layer_id_ = layer_id.temporal_layer_id;
-    // Update buffer with per-layer target frame bandwidth, this is done
-    // for every frame passed to the encoder (encoded or dropped).
-    // For temporal layers, update the cumulative buffer level.
+  virtual void PostEncodeFrameHook() {
     for (int sl = 0; sl < number_spatial_layers_; ++sl) {
       for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
         const int layer = sl * number_temporal_layers_ + tl;
@@ -1585,9 +1592,14 @@
     last_pts_ = pkt->data.frame.pts;
     const bool key_frame =
         (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
+    if (key_frame) {
+      temporal_layer_id_ = 0;
+      superframe_count_ = 0;
+    }
     parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
                            pkt->data.frame.sz, sizes, &count);
-    if (!dynamic_drop_layer_) ASSERT_EQ(count, number_spatial_layers_);
+    // Count may be less than number of spatial layers because of frame drops.
+    ASSERT_LE(count, number_spatial_layers_);
     for (int sl = 0; sl < number_spatial_layers_; ++sl) {
       sizes[sl] = sizes[sl] << 3;
       // Update the total encoded bits per layer.
@@ -1599,7 +1611,8 @@
         bits_in_buffer_model_[layer] -= static_cast<int64_t>(sizes[sl]);
         // There should be no buffer underrun, except on the base
         // temporal layer, since there may be key frames there.
-        if (!key_frame && tl > 0) {
+        // Fo short key frame spacing, buffer can underrun on individual frames.
+        if (!key_frame && tl > 0 && key_frame_spacing_ < 100) {
           ASSERT_GE(bits_in_buffer_model_[layer], 0)
               << "Buffer Underrun at frame " << pkt->data.frame.pts;
         }
@@ -1663,6 +1676,8 @@
   vpx_codec_pts_t last_pts_ref_;
   int middle_bitrate_;
   int top_bitrate_;
+  int superframe_count_;
+  int key_frame_spacing_;
 };
 
 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
@@ -1728,7 +1743,7 @@
   svc_params_.scaling_factor_den[0] = 288;
   svc_params_.scaling_factor_num[1] = 288;
   svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -1780,7 +1795,7 @@
   svc_params_.scaling_factor_den[0] = 288;
   svc_params_.scaling_factor_num[1] = 288;
   svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -1850,6 +1865,7 @@
   // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
   for (int j = 64; j <= 67; j++) {
     cfg_.kf_max_dist = j;
+    key_frame_spacing_ = j;
     ResetModel();
     assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
                           cfg_.ts_number_layers, cfg_.temporal_layering_mode,
@@ -1883,7 +1899,7 @@
   svc_params_.scaling_factor_den[0] = 288;
   svc_params_.scaling_factor_num[1] = 288;
   svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -1931,7 +1947,7 @@
   svc_params_.scaling_factor_den[1] = 288;
   svc_params_.scaling_factor_num[2] = 288;
   svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -1980,7 +1996,7 @@
   svc_params_.scaling_factor_den[1] = 288;
   svc_params_.scaling_factor_num[2] = 288;
   svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -2031,7 +2047,7 @@
   svc_params_.scaling_factor_den[1] = 288;
   svc_params_.scaling_factor_num[2] = 288;
   svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -2062,7 +2078,7 @@
 // the fly switching to 1 and then 2 and back to 3 spatial layers. This switch
 // is done by setting spatial layer bitrates to 0, and then back to non-zero,
 // during the sequence.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_dynamic) {
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_DisableEnableLayers) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
@@ -2082,7 +2098,7 @@
   svc_params_.scaling_factor_den[1] = 288;
   svc_params_.scaling_factor_num[2] = 288;
   svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
@@ -2139,6 +2155,7 @@
   // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
   for (int j = 32; j <= 35; j++) {
     cfg_.kf_max_dist = j;
+    key_frame_spacing_ = j;
     ResetModel();
     assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
                           cfg_.ts_number_layers, cfg_.temporal_layering_mode,
@@ -2174,7 +2191,7 @@
   svc_params_.scaling_factor_den[1] = 288;
   svc_params_.scaling_factor_num[2] = 288;
   svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
   cfg_.kf_max_dist = 9999;
   number_spatial_layers_ = cfg_.ss_number_layers;
   number_temporal_layers_ = cfg_.ts_number_layers;
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -201,7 +201,7 @@
       PreEncodeFrameHook(video, encoder.get());
       encoder->EncodeFrame(video, frame_flags_);
 
-      PostEncodeFrameHook(encoder.get());
+      PostEncodeFrameHook();
 
       CxDataIterator iter = encoder->GetCxData();
 
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -221,7 +221,7 @@
   virtual void PreEncodeFrameHook(VideoSource * /*video*/,
                                   Encoder * /*encoder*/) {}
 
-  virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
+  virtual void PostEncodeFrameHook() {}
 
   // Hook to be called on every compressed data packet.
   virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -917,6 +917,7 @@
         pkt.data.frame.flags = lib_flags << 16;
         pkt.data.frame.width[0] = cpi->common.Width;
         pkt.data.frame.height[0] = cpi->common.Height;
+        pkt.data.frame.last_spatial_layer_encoded = 0;
 
         if (lib_flags & FRAMEFLAGS_KEY) {
           pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4499,7 +4499,7 @@
 
   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
-      !cpi->svc.rc_drop_superframe && cpi->oxcf.target_bandwidth == 0) {
+      cpi->oxcf.target_bandwidth == 0) {
     cpi->svc.skip_enhancement_layer = 1;
     vp9_rc_postencode_update_drop_frame(cpi);
     vp9_inc_frame_in_layer(cpi);
@@ -4591,23 +4591,29 @@
   }
 
   // For 1 pass CBR, check if we are dropping this frame.
-  // For spatial layers, for now only check for frame-dropping on first spatial
-  // layer, and if decision is to drop, we drop whole super-frame.
+  // For spatial layers, for now if we decide to drop current spatial
+  // layer then we will also drop all upper spatial layers.
+  // TODO(marpan): Allow for the case of dropping single layer only without
+  // dropping all upper layers.
   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi) ||
-        (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
+        (is_one_pass_cbr_svc(cpi) &&
+         cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {
       vp9_rc_postencode_update_drop_frame(cpi);
       cpi->ext_refresh_frame_flags_pending = 0;
-      cpi->svc.rc_drop_superframe = 1;
       cpi->last_frame_dropped = 1;
-      // TODO(marpan): Advancing the svc counters on dropped frames can break
-      // the referencing scheme for the fixed svc patterns defined in
-      // vp9_one_pass_cbr_svc_start_layer(). Look into fixing this issue, but
-      // for now, don't advance the svc frame counters on dropped frame.
-      // if (cpi->use_svc)
-      //   vp9_inc_frame_in_layer(cpi);
-
+      if (cpi->use_svc) {
+        int i;
+        // If we are dropping this spatial layer, then we will drop all
+        // upper spatial layers.
+        for (i = cpi->svc.spatial_layer_id; i < cpi->svc.number_spatial_layers;
+             i++)
+          cpi->svc.rc_drop_spatial_layer[i] = 1;
+        vp9_inc_frame_in_layer(cpi);
+        if (cpi->svc.rc_drop_spatial_layer[0] == 0)
+          cpi->svc.skip_enhancement_layer = 1;
+      }
       return;
     }
   }
@@ -4626,6 +4632,7 @@
   }
 
   cpi->last_frame_dropped = 0;
+  cpi->svc.last_layer_encoded = cpi->svc.spatial_layer_id;
 
   // Disable segmentation if it decrease rate/distortion ratio
   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -398,7 +398,7 @@
   RATE_CONTROL *const rc = &cpi->rc;
   if (!oxcf->drop_frames_water_mark ||
       (is_one_pass_cbr_svc(cpi) &&
-       cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) {
+       cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {
     return 0;
   } else {
     if (rc->buffer_level < 0) {
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -30,7 +30,6 @@
   svc->spatial_layer_id = 0;
   svc->temporal_layer_id = 0;
   svc->first_spatial_layer_to_encode = 0;
-  svc->rc_drop_superframe = 0;
   svc->force_zero_mode_spatial_ref = 0;
   svc->use_base_mv = 0;
   svc->use_partition_reuse = 0;
@@ -39,9 +38,11 @@
   svc->current_superframe = 0;
   svc->non_reference_frame = 0;
   svc->skip_enhancement_layer = 0;
+  svc->last_layer_encoded = 0;
 
   for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
   for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+    svc->rc_drop_spatial_layer[sl] = 0;
     svc->ext_frame_flags[sl] = 0;
     svc->ext_lst_fb_idx[sl] = 0;
     svc->ext_gld_fb_idx[sl] = 1;
@@ -648,8 +649,12 @@
     }
   }
 
-  if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode)
-    cpi->svc.rc_drop_superframe = 0;
+  // Reset the drop flags for all spatial lauyers, on the base layer.
+  if (cpi->svc.spatial_layer_id == 0) {
+    int i;
+    for (i = 0; i < cpi->svc.number_spatial_layers; i++)
+      cpi->svc.rc_drop_spatial_layer[i] = 0;
+  }
 
   lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
                                    cpi->svc.number_temporal_layers +
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -57,7 +57,7 @@
 
   int spatial_layer_to_encode;
   int first_spatial_layer_to_encode;
-  int rc_drop_superframe;
+  int rc_drop_spatial_layer[VPX_MAX_LAYERS];
 
   // Workaround for multiple frame contexts
   enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;
@@ -106,6 +106,8 @@
   int skip_enhancement_layer;
 
   int lower_layer_qindex;
+
+  int last_layer_encoded;
 } SVC;
 
 struct VP9_COMP;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -1205,6 +1205,8 @@
           cx_data_sz -= size;
           pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
           pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
+          pkt.data.frame.last_spatial_layer_encoded =
+              cpi->svc.last_layer_encoded;
 
           if (ctx->output_cx_pkt_cb.output_cx_pkt) {
             pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@@ -1233,6 +1235,7 @@
         pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
         pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
         pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
+        pkt.data.frame.last_spatial_layer_encoded = cpi->svc.last_layer_encoded;
 
         if (ctx->pending_cx_data) {
           if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -181,6 +181,9 @@
        * first one.*/
       unsigned int width[VPX_SS_MAX_LAYERS];  /**< frame width */
       unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */
+      /*!\brief Last spatial layer frame in this packet. VP8 will always be set
+       * to 0.*/
+      unsigned int last_spatial_layer_encoded;
     } frame;                            /**< data for compressed frame packet */
     vpx_fixed_buf_t twopass_stats;      /**< data for two-pass packet */
     vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */