ref: f058688eaa8b9fb2ff1f4d92eb02cf888fd28ec0
parent: 3f7e6cc020446ee29439f1cd7d3d5c39adaf64c0
author: Jerome Jiang <jianj@google.com>
date: Fri Jun 1 10:27:34 EDT 2018
vp9-svc: Allow usage of second (long term) temporal reference. Allow for second temporal reference for top spatial layer in SVC, when inter-layer prediction is disabled on INTER frames. The second temporal reference is labelled as the golden reference and the update/refresh of this reference buffer is only on base temporal layer superframes. For now the period of refresh is fixed at every 20 TL0 superframes. Average gain is ~4% on RTC set, several clips up by ~8-12%. Speed loss is about ~2% on mac. Feature is disabled as default for now. Change-Id: I2e5db5052c62dbe958a3b14be97d043823b7a529
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1497,6 +1497,7 @@
int skip_ref_find_pred[4] = { 0 };
unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX;
+ int gf_is_longterm_ref = 0;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
@@ -1538,6 +1539,11 @@
cm->base_qindex < svc->lower_layer_qindex - 20)
thresh_svc_skip_golden = 1000;
+ if (!cpi->use_svc ||
+ (svc->use_longterm_ref_current_layer &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame))
+ gf_is_longterm_ref = 1;
+
init_ref_frame_cost(cm, xd, ref_frame_cost);
memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES);
@@ -1610,7 +1616,7 @@
}
#endif
- if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc &&
+ if (cpi->rc.frames_since_golden == 0 && gf_is_longterm_ref &&
!cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) {
usable_ref_frame = LAST_FRAME;
} else {
@@ -1637,7 +1643,7 @@
// For svc mode, on spatial_layer_id > 0: if the reference has different scale
// constrain the inter mode to only test zero motion.
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
- svc->spatial_layer_id > 0) {
+ svc->spatial_layer_id > 0 && !gf_is_longterm_ref) {
if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
@@ -1716,7 +1722,8 @@
// The nonzero motion is half pixel shifted to left and top (-4, -4).
if (cpi->use_svc && svc->spatial_layer_id > 0 &&
svc_force_zero_mode[inter_layer_ref - 1] &&
- svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8) {
+ svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
+ !gf_is_longterm_ref) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
@@ -1789,7 +1796,7 @@
// For SVC, skip the golden (spatial) reference search if sse of zeromv_last
// is below threshold.
- if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
+ if (cpi->use_svc && ref_frame == GOLDEN_FRAME && !gf_is_longterm_ref &&
sse_zeromv_normalized < thresh_svc_skip_golden)
continue;
@@ -1909,7 +1916,7 @@
if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
if (this_mode == NEWMV && !force_mv_inter_layer) {
- if (ref_frame > LAST_FRAME && !cpi->use_svc &&
+ if (ref_frame > LAST_FRAME && gf_is_longterm_ref &&
cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
uint32_t dis;
@@ -2277,7 +2284,7 @@
// layer is chosen as the reference. Always perform intra prediction if
// LAST is the only reference, or is_key_frame is set, or on base
// temporal layer.
- if (svc->spatial_layer_id) {
+ if (svc->spatial_layer_id && !gf_is_longterm_ref) {
perform_intra_pred =
svc->temporal_layer_id == 0 ||
svc->layer_context[svc->temporal_layer_id].is_key_frame ||
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1597,6 +1597,18 @@
update_golden_frame_stats(cpi);
}
+ // If second (long term) temporal reference is used for SVC,
+ // update the golden frame counter, only for base temporal layer.
+ if (cpi->use_svc && cpi->svc.use_longterm_ref_current_layer &&
+ cpi->svc.temporal_layer_id == 0) {
+ if (cpi->refresh_golden_frame)
+ rc->frames_since_golden = 0;
+ else
+ rc->frames_since_golden++;
+ // Decrement count down till next gf
+ if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
+ }
+
if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
if (cm->show_frame) {
rc->frames_since_key++;
@@ -1861,7 +1873,31 @@
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
}
-
+ // If long term termporal feature is enabled, set the period of the update.
+ // The update/refresh of this reference frame is always on base temporal
+ // layer frame.
+ if (cpi->svc.use_longterm_ref_current_layer &&
+ cpi->svc.temporal_layer_id == 0) {
+ if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+ // On key frame we update the buffer index used for long term reference.
+ // Use the alt_ref since it is not used or updated on key frames.
+ cpi->ext_refresh_alt_ref_frame = 1;
+ cpi->alt_fb_idx = cpi->svc.buffer_idx_longterm_ref;
+ } else if (rc->frames_till_gf_update_due == 0) {
+ // Set perdiod of next update. Make it a multiple of 10, as the cyclic
+ // refresh is typically ~10%, and we'd like the update to happen after
+ // a few cylces of the refresh (so it better quality frame). Note the
+ // cyclic refresh for SVC only operates on base temporal layer frames.
+ // Choose 20 as perdiod for now (2 cycles).
+ rc->baseline_gf_interval = 20;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ cpi->ext_refresh_golden_frame = 1;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+ } else if (!cpi->svc.use_longterm_ref) {
+ rc->frames_till_gf_update_due = INT_MAX;
+ rc->baseline_gf_interval = INT_MAX;
+ }
// Any update/change of global cyclic refresh parameters (amount/delta-qp)
// should be done here, before the frame qp is selected.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
@@ -1868,8 +1904,6 @@
vp9_cyclic_refresh_update_parameters(cpi);
vp9_rc_set_frame_target(cpi, target);
- rc->frames_till_gf_update_due = INT_MAX;
- rc->baseline_gf_interval = INT_MAX;
}
void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -33,6 +33,8 @@
svc->force_zero_mode_spatial_ref = 0;
svc->use_base_mv = 0;
svc->use_partition_reuse = 0;
+ svc->use_longterm_ref = 0;
+ svc->use_longterm_ref_current_layer = 0;
svc->scaled_temp_is_alloc = 0;
svc->scaled_one_half = 0;
svc->current_superframe = 0;
@@ -709,6 +711,34 @@
}
}
+ // For the fixed (non-flexible/bypass) SVC mode:
+ // If long term temporal reference is enabled at the sequence level
+ // (use_longterm_ref == 1), and inter_layer is disabled (on inter-frames),
+ // we can use golden as a second temporal reference
+ // (since the spatial/inter-layer reference is disabled).
+ // To be safe we use fb_index 7 for this, since for 3-3 layer system slot 7
+ // should be free/un-used. For now usage of this second temporal reference
+ // will only be used for highest spatial layer.
+ cpi->svc.use_longterm_ref_current_layer = 0;
+ cpi->svc.buffer_idx_longterm_ref = 7;
+ if (cpi->svc.use_longterm_ref &&
+ cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_ON &&
+ cpi->svc.number_spatial_layers <= 3 &&
+ cpi->svc.number_temporal_layers <= 3 &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
+ // Enable the second (long-term) temporal reference at the frame-level.
+ cpi->svc.use_longterm_ref_current_layer = 1;
+ // Only used for prediction for on non-key superframes.
+ if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+ // Use golden for this reference which will be used for prediction.
+ cpi->gld_fb_idx = cpi->svc.buffer_idx_longterm_ref;
+ // Enable prediction off LAST (last reference) and golden (which will
+ // generally be further behind/long-term reference).
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ }
+
// Reset the drop flags for all spatial layers, on the base layer.
if (cpi->svc.spatial_layer_id == 0) {
vp9_zero(cpi->svc.drop_spatial_layer);
@@ -955,7 +985,7 @@
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON &&
svc->framedrop_mode != LAYER_DROP) {
- if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
// On non-key frames: LAST is always temporal reference, GOLDEN is
// spatial reference.
if (svc->temporal_layer_id == 0)
@@ -988,5 +1018,14 @@
svc->temporal_layer_id);
}
}
+ } else if (svc->use_longterm_ref_current_layer &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // If the usage of golden as second long term reference is enabled for this
+ // layer, then temporal_layer_id of that reference must be base temporal
+ // layer 0, and spatial_layer_id of that reference must be same as current
+ // spatial_layer_id.
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0);
}
}
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -97,6 +97,11 @@
int gld_fb_idx[VPX_MAX_LAYERS];
int alt_fb_idx[VPX_MAX_LAYERS];
int force_zero_mode_spatial_ref;
+ // Sequence level flag to enable second (long term) temporal reference.
+ int use_longterm_ref;
+ // Frame level flag to enable second (long term) temporal reference.
+ int use_longterm_ref_current_layer;
+ int buffer_idx_longterm_ref;
int current_superframe;
int non_reference_frame;
int use_base_mv;