shithub: libvpx

Download patch

ref: f058688eaa8b9fb2ff1f4d92eb02cf888fd28ec0
parent: 3f7e6cc020446ee29439f1cd7d3d5c39adaf64c0
author: Jerome Jiang <jianj@google.com>
date: Fri Jun 1 10:27:34 EDT 2018

vp9-svc: Allow usage of second (long term) temporal reference.

Allow for second temporal reference for top spatial layer in SVC,
when inter-layer prediction is disabled on INTER frames.
The second temporal reference is labelled as the golden reference
and the update/refresh of this reference buffer is only on base
temporal layer superframes. For now the period of refresh is
fixed at every 20 TL0 superframes.

Average gain is ~4% on RTC set, several clips up
by ~8-12%. Speed loss is about ~2% on mac.

Feature is disabled as default for now.

Change-Id: I2e5db5052c62dbe958a3b14be97d043823b7a529

--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1497,6 +1497,7 @@
   int skip_ref_find_pred[4] = { 0 };
   unsigned int sse_zeromv_normalized = UINT_MAX;
   unsigned int best_sse_sofar = UINT_MAX;
+  int gf_is_longterm_ref = 0;
 #if CONFIG_VP9_TEMPORAL_DENOISING
   VP9_PICKMODE_CTX_DEN ctx_den;
   int64_t zero_last_cost_orig = INT64_MAX;
@@ -1538,6 +1539,11 @@
            cm->base_qindex < svc->lower_layer_qindex - 20)
     thresh_svc_skip_golden = 1000;
 
+  if (!cpi->use_svc ||
+      (svc->use_longterm_ref_current_layer &&
+       !svc->layer_context[svc->temporal_layer_id].is_key_frame))
+    gf_is_longterm_ref = 1;
+
   init_ref_frame_cost(cm, xd, ref_frame_cost);
   memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES);
 
@@ -1610,7 +1616,7 @@
   }
 #endif
 
-  if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc &&
+  if (cpi->rc.frames_since_golden == 0 && gf_is_longterm_ref &&
       !cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) {
     usable_ref_frame = LAST_FRAME;
   } else {
@@ -1637,7 +1643,7 @@
   // For svc mode, on spatial_layer_id > 0: if the reference has different scale
   // constrain the inter mode to only test zero motion.
   if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
-      svc->spatial_layer_id > 0) {
+      svc->spatial_layer_id > 0 && !gf_is_longterm_ref) {
     if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
       struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
       if (vp9_is_scaled(sf)) {
@@ -1716,7 +1722,8 @@
   // The nonzero motion is half pixel shifted to left and top (-4, -4).
   if (cpi->use_svc && svc->spatial_layer_id > 0 &&
       svc_force_zero_mode[inter_layer_ref - 1] &&
-      svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8) {
+      svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
+      !gf_is_longterm_ref) {
     svc_mv_col = -4;
     svc_mv_row = -4;
     flag_svc_subpel = 1;
@@ -1789,7 +1796,7 @@
 
     // For SVC, skip the golden (spatial) reference search if sse of zeromv_last
     // is below threshold.
-    if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
+    if (cpi->use_svc && ref_frame == GOLDEN_FRAME && !gf_is_longterm_ref &&
         sse_zeromv_normalized < thresh_svc_skip_golden)
       continue;
 
@@ -1909,7 +1916,7 @@
       if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
 
     if (this_mode == NEWMV && !force_mv_inter_layer) {
-      if (ref_frame > LAST_FRAME && !cpi->use_svc &&
+      if (ref_frame > LAST_FRAME && gf_is_longterm_ref &&
           cpi->oxcf.rc_mode == VPX_CBR) {
         int tmp_sad;
         uint32_t dis;
@@ -2277,7 +2284,7 @@
   // layer is chosen as the reference. Always perform intra prediction if
   // LAST is the only reference, or is_key_frame is set, or on base
   // temporal layer.
-  if (svc->spatial_layer_id) {
+  if (svc->spatial_layer_id && !gf_is_longterm_ref) {
     perform_intra_pred =
         svc->temporal_layer_id == 0 ||
         svc->layer_context[svc->temporal_layer_id].is_key_frame ||
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1597,6 +1597,18 @@
       update_golden_frame_stats(cpi);
   }
 
+  // If second (long term) temporal reference is used for SVC,
+  // update the golden frame counter, only for base temporal layer.
+  if (cpi->use_svc && cpi->svc.use_longterm_ref_current_layer &&
+      cpi->svc.temporal_layer_id == 0) {
+    if (cpi->refresh_golden_frame)
+      rc->frames_since_golden = 0;
+    else
+      rc->frames_since_golden++;
+    // Decrement count down till next gf
+    if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
+  }
+
   if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
   if (cm->show_frame) {
     rc->frames_since_key++;
@@ -1861,7 +1873,31 @@
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
   }
-
+  // If long term termporal feature is enabled, set the period of the update.
+  // The update/refresh of this reference frame  is always on base temporal
+  // layer frame.
+  if (cpi->svc.use_longterm_ref_current_layer &&
+      cpi->svc.temporal_layer_id == 0) {
+    if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+      // On key frame we update the buffer index used for long term reference.
+      // Use the alt_ref since it is not used or updated on key frames.
+      cpi->ext_refresh_alt_ref_frame = 1;
+      cpi->alt_fb_idx = cpi->svc.buffer_idx_longterm_ref;
+    } else if (rc->frames_till_gf_update_due == 0) {
+      // Set perdiod of next update. Make it a multiple of 10, as the cyclic
+      // refresh is typically ~10%, and we'd like the update to happen after
+      // a few cylces of the refresh (so it better quality frame). Note the
+      // cyclic refresh for SVC only operates on base temporal layer frames.
+      // Choose 20 as perdiod for now (2 cycles).
+      rc->baseline_gf_interval = 20;
+      rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+      cpi->ext_refresh_golden_frame = 1;
+      rc->gfu_boost = DEFAULT_GF_BOOST;
+    }
+  } else if (!cpi->svc.use_longterm_ref) {
+    rc->frames_till_gf_update_due = INT_MAX;
+    rc->baseline_gf_interval = INT_MAX;
+  }
   // Any update/change of global cyclic refresh parameters (amount/delta-qp)
   // should be done here, before the frame qp is selected.
   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
@@ -1868,8 +1904,6 @@
     vp9_cyclic_refresh_update_parameters(cpi);
 
   vp9_rc_set_frame_target(cpi, target);
-  rc->frames_till_gf_update_due = INT_MAX;
-  rc->baseline_gf_interval = INT_MAX;
 }
 
 void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -33,6 +33,8 @@
   svc->force_zero_mode_spatial_ref = 0;
   svc->use_base_mv = 0;
   svc->use_partition_reuse = 0;
+  svc->use_longterm_ref = 0;
+  svc->use_longterm_ref_current_layer = 0;
   svc->scaled_temp_is_alloc = 0;
   svc->scaled_one_half = 0;
   svc->current_superframe = 0;
@@ -709,6 +711,34 @@
     }
   }
 
+  // For the fixed (non-flexible/bypass) SVC mode:
+  // If long term temporal reference is enabled at the sequence level
+  // (use_longterm_ref == 1), and inter_layer is disabled (on inter-frames),
+  // we can use golden as a second temporal reference
+  // (since the spatial/inter-layer reference is disabled).
+  // To be safe we use fb_index 7 for this, since for 3-3 layer system slot 7
+  // should be free/un-used. For now usage of this second temporal reference
+  // will only be used for highest spatial layer.
+  cpi->svc.use_longterm_ref_current_layer = 0;
+  cpi->svc.buffer_idx_longterm_ref = 7;
+  if (cpi->svc.use_longterm_ref &&
+      cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+      cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_ON &&
+      cpi->svc.number_spatial_layers <= 3 &&
+      cpi->svc.number_temporal_layers <= 3 &&
+      cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
+    // Enable the second (long-term) temporal reference at the frame-level.
+    cpi->svc.use_longterm_ref_current_layer = 1;
+    // Only used for prediction for on non-key superframes.
+    if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+      // Use golden for this reference which will be used for prediction.
+      cpi->gld_fb_idx = cpi->svc.buffer_idx_longterm_ref;
+      // Enable prediction off LAST (last reference) and golden (which will
+      // generally be further behind/long-term reference).
+      cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+    }
+  }
+
   // Reset the drop flags for all spatial layers, on the base layer.
   if (cpi->svc.spatial_layer_id == 0) {
     vp9_zero(cpi->svc.drop_spatial_layer);
@@ -955,7 +985,7 @@
   if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
       svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON &&
       svc->framedrop_mode != LAYER_DROP) {
-    if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+    if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
       // On non-key frames: LAST is always temporal reference, GOLDEN is
       // spatial reference.
       if (svc->temporal_layer_id == 0)
@@ -988,5 +1018,14 @@
                svc->temporal_layer_id);
       }
     }
+  } else if (svc->use_longterm_ref_current_layer &&
+             !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+    // If the usage of golden as second long term reference is enabled for this
+    // layer, then temporal_layer_id of that reference must be base temporal
+    // layer 0, and spatial_layer_id of that reference must be same as current
+    // spatial_layer_id.
+    assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+           svc->spatial_layer_id);
+    assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0);
   }
 }
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -97,6 +97,11 @@
   int gld_fb_idx[VPX_MAX_LAYERS];
   int alt_fb_idx[VPX_MAX_LAYERS];
   int force_zero_mode_spatial_ref;
+  // Sequence level flag to enable second (long term) temporal reference.
+  int use_longterm_ref;
+  // Frame level flag to enable second (long term) temporal reference.
+  int use_longterm_ref_current_layer;
+  int buffer_idx_longterm_ref;
   int current_superframe;
   int non_reference_frame;
   int use_base_mv;