shithub: libvpx

Download patch

ref: 69a6506a8ff81dc3d222df21d19812e3125779dd
parent: 729d7d6a2ff0f9a9d2f5534767ddb7edd6f598a7
author: Marco Paniconi <marpan@google.com>
date: Tue Jun 12 07:50:29 EDT 2018

vp9-svc: Add support for spatial layer sync frames.

Add encoder control to allow application to insert
spatial layer sync frame. The sync frame disables
temporal prediction for that spatial layer.

This is useful for RTC application to have receiver
start decoding a higher spatial layer, without inserting
a key frame on base spatial layer.

If the layer sync is requested on the base spatial layer
this then force a key frame, otherwise it only disables
the temporal reference for that spatial layer, allowing
temporal prediction to continue for the other layers.

Although the temporal prediction is disabled and reset
on a layer sync frame, the inter-layer prediction for the
sync frame is enabled on INTER frames. So the meaning of
INTER_LAYER_PRED_OFF_NONKEY is modified to mean disable
inter-layer prediction on non-key and non-sync frames.

Added unittest for inserting layer sync frames.

Bump up ABI version.
Change-Id: Id458acc400a77c853551f125c4e7b6d001991f03

--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -142,6 +142,12 @@
     const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
     ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
   }
+
+  void Control(int ctrl_id, struct vpx_svc_spatial_layer_sync *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
   void Control(int ctrl_id, vpx_active_map_t *arg) {
     const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -119,6 +119,8 @@
     layer_framedrop_ = 0;
     force_key_ = 0;
     force_key_test_ = 0;
+    insert_layer_sync_ = 0;
+    layer_sync_on_base_ = 0;
   }
   virtual void BeginPassHook(unsigned int /*pass*/) {}
 
@@ -322,6 +324,25 @@
     else
       frame_flags_ = 0;
 
+    if (insert_layer_sync_) {
+      vpx_svc_spatial_layer_sync_t svc_layer_sync;
+      svc_layer_sync.base_layer_intra_only = 0;
+      layer_sync_on_base_ = 0;
+      for (int i = 0; i < number_spatial_layers_; i++)
+        svc_layer_sync.spatial_layer_sync[i] = 0;
+      if (video->frame() == 150) {
+        svc_layer_sync.spatial_layer_sync[1] = 1;
+        encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync);
+      } else if (video->frame() == 240) {
+        svc_layer_sync.spatial_layer_sync[2] = 1;
+        encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync);
+      } else if (video->frame() == 320) {
+        svc_layer_sync.spatial_layer_sync[0] = 1;
+        layer_sync_on_base_ = 1;
+        encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync);
+      }
+    }
+
     const vpx_rational_t tb = video->timebase();
     timebase_ = static_cast<double>(tb.num) / tb.den;
     duration_ = 0;
@@ -383,6 +404,12 @@
     const bool key_frame =
         (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
     if (key_frame) {
+      // For test that inserts layer sync frames: requesting a layer_sync on
+      // the base layer must force key frame. So if any key frame occurs after
+      // first superframe it must due to layer sync on base spatial layer.
+      if (superframe_count_ > 0 && insert_layer_sync_) {
+        ASSERT_EQ(layer_sync_on_base_, 1);
+      }
       temporal_layer_id_ = 0;
       superframe_count_ = 0;
     }
@@ -503,6 +530,8 @@
   int force_key_;
   int force_key_test_;
   int inter_layer_pred_mode_;
+  int insert_layer_sync_;
+  int layer_sync_on_base_;
 };
 
 // Params: speed setting.
@@ -1245,6 +1274,56 @@
   cfg_.kf_max_dist = kf_dist;
   key_frame_spacing_ = kf_dist;
   ResetModel();
+  AssignLayerBitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+                      cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+                      layer_target_avg_bandwidth_, bits_in_buffer_model_);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
+                          number_temporal_layers_, file_datarate_, 0.78, 1.15);
+#if CONFIG_VP9_DECODER
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
+// temporal layers. Run VGA clip with 1 thread, and place layer sync frames:
+// one at middle layer first, then another one for top layer, and another
+// insert for base spatial layer (which forces key frame).
+TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLSyncFrames) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 3;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 1;
+  cfg_.temporal_layering_mode = 3;
+  svc_params_.scaling_factor_num[0] = 72;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 144;
+  svc_params_.scaling_factor_den[1] = 288;
+  svc_params_.scaling_factor_num[2] = 288;
+  svc_params_.scaling_factor_den[2] = 288;
+  cfg_.kf_max_dist = 9999;
+  cfg_.rc_dropframe_thresh = 10;
+  cfg_.rc_target_bitrate = 400;
+  number_spatial_layers_ = cfg_.ss_number_layers;
+  number_temporal_layers_ = cfg_.ts_number_layers;
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  top_sl_width_ = 640;
+  top_sl_height_ = 480;
+  ResetModel();
+  insert_layer_sync_ = 1;
   AssignLayerBitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
                       cfg_.ts_number_layers, cfg_.temporal_layering_mode,
                       layer_target_avg_bandwidth_, bits_in_buffer_model_);
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3650,7 +3650,7 @@
 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
                                       uint8_t *dest) {
   VP9_COMMON *const cm = &cpi->common;
-  int q = 0, bottom_index = 0, top_index = 0;  // Dummy variables.
+  int q = 0, bottom_index = 0, top_index = 0;
   const INTERP_FILTER filter_scaler =
       (is_one_pass_cbr_svc(cpi))
           ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id]
@@ -3772,11 +3772,12 @@
     cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;
 
   // For 1 pass CBR, check if we are dropping this frame.
-  // Never drop on key frame, or if base layer is key for svc.
-  // Don't drop on scene change.
+  // Never drop on key frame, if base layer is key for svc,
+  // on scene change, or if superframe has layer sync.
   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
       cm->frame_type != KEY_FRAME && !cpi->rc.high_source_sad &&
       !cpi->svc.high_source_sad_superframe &&
+      !cpi->svc.superframe_has_layer_sync &&
       (!cpi->use_svc ||
        !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
     if (vp9_rc_drop_frame(cpi)) return 0;
@@ -4793,12 +4794,15 @@
   }
   cm->prev_frame = cm->cur_frame;
 
-  if (cpi->use_svc)
+  if (cpi->use_svc) {
     cpi->svc
         .layer_context[cpi->svc.spatial_layer_id *
                            cpi->svc.number_temporal_layers +
                        cpi->svc.temporal_layer_id]
         .last_frame_type = cm->frame_type;
+    // Reset layer_sync back to 0 for next frame.
+    cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
+  }
 
   cpi->force_update_segmentation = 0;
 
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1853,10 +1853,14 @@
                                svc->number_temporal_layers);
   // Periodic key frames is based on the super-frame counter
   // (svc.current_superframe), also only base spatial layer is key frame.
+  // Key frame is set for any of the following: very first frame, frame flags
+  // indicates key, superframe counter hits key frequencey, or sync flag is
+  // set for spatial layer 0.
   if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key &&
        (svc->current_superframe % cpi->oxcf.key_freq == 0) &&
-       svc->spatial_layer_id == 0)) {
+       svc->spatial_layer_id == 0) ||
+      (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) {
     cm->frame_type = KEY_FRAME;
     rc->source_alt_ref_active = 0;
     if (is_one_pass_cbr_svc(cpi)) {
@@ -1880,6 +1884,10 @@
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
   }
+
+  // Check if superframe contains a sync layer request.
+  vp9_svc_check_spatial_layer_sync(cpi);
+
   // If long term termporal feature is enabled, set the period of the update.
   // The update/refresh of this reference frame is always on base temporal
   // layer frame.
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -41,6 +41,8 @@
   svc->skip_enhancement_layer = 0;
   svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON;
   svc->framedrop_mode = CONSTRAINED_LAYER_DROP;
+  svc->base_layer_intra_only = 0;
+  svc->superframe_has_layer_sync = 0;
 
   for (i = 0; i < REF_FRAMES; ++i) {
     svc->fb_idx_spatial_layer_id[i] = -1;
@@ -58,6 +60,7 @@
     svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
     svc->fb_idx_upd_tl0[sl] = -1;
     svc->drop_count[sl] = 0;
+    svc->spatial_layer_sync[sl] = 0;
   }
   svc->max_consec_drop = INT_MAX;
 
@@ -745,6 +748,17 @@
     svc->use_gf_temporal_ref_current_layer = 1;
   }
 
+  // Check if current superframe has any layer sync, only check once on
+  // base layer.
+  if (svc->spatial_layer_id == 0) {
+    int sl = 0;
+    // Default is no sync.
+    svc->superframe_has_layer_sync = 0;
+    for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
+      if (cpi->svc.spatial_layer_sync[sl]) svc->superframe_has_layer_sync = 1;
+    }
+  }
+
   // Reset the drop flags for all spatial layers, on the base layer.
   if (svc->spatial_layer_id == 0) {
     vp9_zero(svc->drop_spatial_layer);
@@ -927,8 +941,11 @@
   // Check for disabling inter-layer (spatial) prediction, if
   // svc.disable_inter_layer_pred is set. If the previous spatial layer was
   // dropped then disable the prediction from this (scaled) reference.
+  // For INTER_LAYER_PRED_OFF_NONKEY: inter-layer prediction is disabled
+  // on key frames or if any spatial layer is a sync layer.
   if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY &&
-       !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
+       !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+       !cpi->svc.superframe_has_layer_sync) ||
       cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF ||
       cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
     MV_REFERENCE_FRAME ref_frame;
@@ -1030,5 +1047,39 @@
     assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
            svc->spatial_layer_id);
     assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0);
+  }
+}
+
+void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) {
+  SVC *const svc = &cpi->svc;
+  // Only for superframes whose base is not key, as those are
+  // already sync frames.
+  if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+    if (svc->spatial_layer_id == 0) {
+      // On base spatial layer: if the current superframe has a layer sync then
+      // reset the pattern counters and reset to base temporal layer.
+      if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi);
+    }
+    // If the layer sync is set for this current spatial layer then
+    // disable the temporal reference.
+    if (svc->spatial_layer_id > 0 &&
+        svc->spatial_layer_sync[svc->spatial_layer_id]) {
+      cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
+      if (svc->use_gf_temporal_ref_current_layer) {
+        int index = svc->spatial_layer_id;
+        // If golden is used as second reference: need to remove it from
+        // prediction, reset refresh period to 0, and update the reference.
+        svc->use_gf_temporal_ref_current_layer = 0;
+        cpi->rc.baseline_gf_interval = 0;
+        cpi->rc.frames_till_gf_update_due = 0;
+        // On layer sync frame we must update the buffer index used for long
+        // term reference. Use the alt_ref since it is not used or updated on
+        // sync frames.
+        if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+        assert(index >= 0);
+        cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+        cpi->ext_refresh_alt_ref_frame = 1;
+      }
+    }
   }
 }
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -24,7 +24,7 @@
   INTER_LAYER_PRED_ON,
   // Inter-layer prediction is off on all frames.
   INTER_LAYER_PRED_OFF,
-  // Inter-layer prediction is off on non-key frames.
+  // Inter-layer prediction is off on non-key frames and non-sync frames.
   INTER_LAYER_PRED_OFF_NONKEY,
   // Inter-layer prediction is on on all frames, but constrained such
   // that any layer S (> 0) can only predict from previous spatial
@@ -158,6 +158,10 @@
   // updated the frame buffer index.
   uint8_t fb_idx_spatial_layer_id[REF_FRAMES];
   uint8_t fb_idx_temporal_layer_id[REF_FRAMES];
+
+  int spatial_layer_sync[VPX_SS_MAX_LAYERS];
+  int base_layer_intra_only;
+  uint8_t superframe_has_layer_sync;
 } SVC;
 
 struct VP9_COMP;
@@ -218,6 +222,8 @@
 void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi);
 
 void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi);
+
+void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi);
 
 #ifdef __cplusplus
 }  // extern "C"
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -1545,6 +1545,20 @@
   return VPX_CODEC_OK;
 }
 
+static vpx_codec_err_t ctrl_set_svc_spatial_layer_sync(
+    vpx_codec_alg_priv_t *ctx, va_list args) {
+  VP9_COMP *const cpi = ctx->cpi;
+  vpx_svc_spatial_layer_sync_t *data =
+      va_arg(args, vpx_svc_spatial_layer_sync_t *);
+  int sl;
+  for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl)
+    cpi->svc.spatial_layer_sync[sl] = data->spatial_layer_sync[sl];
+  // TODO(marpan): Implement intra_only feature. Currently this setting
+  // does nothing.
+  cpi->svc.base_layer_intra_only = data->base_layer_intra_only;
+  return VPX_CODEC_OK;
+}
+
 static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx,
                                                  va_list args) {
   vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp =
@@ -1631,6 +1645,7 @@
   { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred },
   { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer },
   { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref },
+  { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync },
 
   // Getters
   { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -637,6 +637,21 @@
    * Supported in codecs: VP9
    */
   VP9E_SET_SVC_GF_TEMPORAL_REF,
+
+  /*!\brief Codec control function to enable spatial layer sync frame, for any
+   * spatial layer. Enabling it for layer k means spatial layer k will disable
+   * all temporal prediction, but keep the inter-layer prediction. It will
+   * refresh any temporal reference buffer for that layer, and reset the
+   * temporal layer for the superframe to 0. Setting the layer sync for base
+   * spatial layer forces a key frame. Default is off (0) for all spatial
+   * layers. Spatial layer sync flag is reset to 0 after each encoded layer,
+   * so when control is invoked it is only used for the current superframe.
+   *
+   * 0: Off (default), 1: Enabled
+   *
+   * Supported in codecs: VP9
+   */
+  VP9E_SET_SVC_SPATIAL_LAYER_SYNC,
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -814,6 +829,16 @@
   int max_consec_drop; /**< Maximum consecutive drops, for any layer. */
 } vpx_svc_frame_drop_t;
 
+/*!\brief vp9 svc spatial layer sync parameters.
+ *
+ * This defines the spatial layer sync flag, defined per spatial layer.
+ *
+ */
+typedef struct vpx_svc_spatial_layer_sync {
+  int spatial_layer_sync[VPX_SS_MAX_LAYERS]; /**< Sync layer flags */
+  int base_layer_intra_only; /**< Flag for setting Intra-only frame on base */
+} vpx_svc_spatial_layer_sync_t;
+
 /*!\cond */
 /*!\brief VP8 encoder control function parameter type
  *
@@ -959,6 +984,10 @@
 
 VPX_CTRL_USE_TYPE(VP9E_SET_SVC_GF_TEMPORAL_REF, unsigned int)
 #define VPX_CTRL_VP9E_SET_SVC_GF_TEMPORAL_REF
+
+VPX_CTRL_USE_TYPE(VP9E_SET_SVC_SPATIAL_LAYER_SYNC,
+                  vpx_svc_spatial_layer_sync_t *)
+#define VPX_CTRL_VP9E_SET_SVC_SPATIAL_LAYER_SYNC
 
 /*!\endcond */
 /*! @} - end defgroup vp8_encoder */
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -63,7 +63,7 @@
  * fields to structures
  */
 #define VPX_ENCODER_ABI_VERSION \
-  (13 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+  (14 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
 
 /*! \brief Encoder capabilities bitfield
  *