ref: 541ab3db721cd593bb840319741c5135a88219a8
parent: 6b191c16c08de2431083410e33db72b97d65e184
parent: 7195ded2c5a8be2c343ce0372d6bc4d2c8389962
author: Marco Paniconi <marpan@google.com>
date: Tue Nov 20 17:02:59 EST 2018
Merge "vp9-svc: Reset temporal layers on scene change"
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3700,15 +3700,16 @@
static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
uint8_t *dest) {
VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
int q = 0, bottom_index = 0, top_index = 0;
int no_drop_scene_change = 0;
const INTERP_FILTER filter_scaler =
(is_one_pass_cbr_svc(cpi))
- ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id]
+ ? svc->downsample_filter_type[svc->spatial_layer_id]
: EIGHTTAP;
const int phase_scaler =
(is_one_pass_cbr_svc(cpi))
- ? cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id]
+ ? svc->downsample_filter_phase[svc->spatial_layer_id]
: 0;
if (cm->show_existing_frame) {
@@ -3716,8 +3717,7 @@
return 1;
}
- cpi->svc.time_stamp_prev[cpi->svc.spatial_layer_id] =
- cpi->svc.time_stamp_superframe;
+ svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
// Flag to check if its valid to compute the source sad (used for
// scene detection and for superblock content state in CBR mode).
@@ -3731,25 +3731,25 @@
if (is_one_pass_cbr_svc(cpi) &&
cpi->un_scaled_source->y_width == cm->width << 2 &&
cpi->un_scaled_source->y_height == cm->height << 2 &&
- cpi->svc.scaled_temp.y_width == cm->width << 1 &&
- cpi->svc.scaled_temp.y_height == cm->height << 1) {
+ svc->scaled_temp.y_width == cm->width << 1 &&
+ svc->scaled_temp.y_height == cm->height << 1) {
// For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
// advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
// result will be saved in scaled_temp and might be used later.
- const INTERP_FILTER filter_scaler2 = cpi->svc.downsample_filter_type[1];
- const int phase_scaler2 = cpi->svc.downsample_filter_phase[1];
+ const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
+ const int phase_scaler2 = svc->downsample_filter_phase[1];
cpi->Source = vp9_svc_twostage_scale(
- cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp,
+ cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
- cpi->svc.scaled_one_half = 1;
+ svc->scaled_one_half = 1;
} else if (is_one_pass_cbr_svc(cpi) &&
cpi->un_scaled_source->y_width == cm->width << 1 &&
cpi->un_scaled_source->y_height == cm->height << 1 &&
- cpi->svc.scaled_one_half) {
+ svc->scaled_one_half) {
// If the spatial layer is 1/2x1/2 and the scaling is already done in the
// two-stage scaling, use the result directly.
- cpi->Source = &cpi->svc.scaled_temp;
- cpi->svc.scaled_one_half = 0;
+ cpi->Source = &svc->scaled_temp;
+ svc->scaled_one_half = 0;
} else {
cpi->Source = vp9_scale_if_required(
cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
@@ -3757,8 +3757,8 @@
}
#ifdef OUTPUT_YUV_SVC_SRC
// Write out at most 3 spatial layers.
- if (is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id < 3) {
- vpx_write_yuv_frame(yuv_svc_src[cpi->svc.spatial_layer_id], cpi->Source);
+ if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
+ vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
}
#endif
// Unfiltered raw source used in metrics calculation if the source
@@ -3778,9 +3778,9 @@
}
if ((cpi->use_svc &&
- (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 ||
- cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 ||
- cpi->svc.current_superframe < 1)) ||
+ (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
+ svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
+ svc->current_superframe < 1)) ||
cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
cpi->resize_state != ORIG) {
cpi->compute_source_sad_onepass = 0;
@@ -3829,20 +3829,31 @@
(cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
vp9_scene_detection_onepass(cpi);
- if (cpi->svc.spatial_layer_id == 0)
- cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;
+ if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
+ svc->high_source_sad_superframe = cpi->rc.high_source_sad;
+ // On scene change reset temporal layer pattern to TL0.
+ // TODO(marpan/jianj): Fix this to handle case where base
+ // spatial layers are skipped, in which case we should insert
+ // and reset to spatial layer 0 on scene change.
+ if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0) {
+ // rc->high_source_sad will get reset so copy it to restore it.
+ int tmp_high_source_sad = cpi->rc.high_source_sad;
+ vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
+ cpi->rc.high_source_sad = tmp_high_source_sad;
+ }
+ }
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame, if base layer is key for svc,
// on scene change, or if superframe has layer sync.
- if ((cpi->rc.high_source_sad || cpi->svc.high_source_sad_superframe) &&
- !(cpi->rc.use_post_encode_drop && cpi->svc.last_layer_dropped[0]))
+ if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
+ !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
no_drop_scene_change = 1;
if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
!frame_is_intra_only(cm) && !no_drop_scene_change &&
- !cpi->svc.superframe_has_layer_sync &&
+ !svc->superframe_has_layer_sync &&
(!cpi->use_svc ||
- !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
if (vp9_rc_drop_frame(cpi)) return 0;
}
@@ -3850,7 +3861,7 @@
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames).
if (frame_is_intra_only(cm) == 0 &&
- !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) {
+ !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
vp9_scale_references(cpi);
}
@@ -3860,12 +3871,12 @@
if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
if (cpi->sf.svc_use_lowres_part &&
- cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) {
- if (cpi->svc.prev_partition_svc == NULL) {
+ svc->spatial_layer_id == svc->number_spatial_layers - 2) {
+ if (svc->prev_partition_svc == NULL) {
CHECK_MEM_ERROR(
- cm, cpi->svc.prev_partition_svc,
+ cm, svc->prev_partition_svc,
(BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
- sizeof(*cpi->svc.prev_partition_svc)));
+ sizeof(*svc->prev_partition_svc)));
}
}
@@ -3893,13 +3904,13 @@
if (cpi->use_svc) {
// On non-zero spatial layer, check for disabling inter-layer
// prediction.
- if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
+ if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
vp9_svc_assert_constraints_pattern(cpi);
}
if (cpi->rc.last_post_encode_dropped_scene_change) {
cpi->rc.high_source_sad = 1;
- cpi->svc.high_source_sad_superframe = 1;
+ svc->high_source_sad_superframe = 1;
// For now disable use_source_sad since Last_Source will not be the previous
// encoded but the dropped one.
cpi->sf.use_source_sad = 0;
@@ -3910,7 +3921,7 @@
// control parameters.
if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
(cpi->rc.high_source_sad ||
- (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
+ (cpi->use_svc && svc->high_source_sad_superframe))) {
if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
vp9_set_quantizer(cm, q);
vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -3945,7 +3956,7 @@
// For SVC: all spatial layers are checked for re-encoding.
if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
(cpi->rc.high_source_sad ||
- (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
+ (cpi->use_svc && svc->high_source_sad_superframe))) {
int frame_size = 0;
// Get an estimate of the encoded frame size.
save_coding_context(cpi);
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2122,7 +2122,7 @@
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
if (is_one_pass_cbr_svc(cpi)) {
- if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi);
+ if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1);
layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
svc->number_temporal_layers);
svc->layer_context[layer].is_key_frame = 1;
@@ -2750,8 +2750,10 @@
#endif
rc->high_source_sad = 0;
rc->high_num_blocks_with_motion = 0;
- if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width &&
- src_height == last_src_height) {
+ // For SVC: scene detection is only checked on first spatial layer of
+ // the superframe using the original/unscaled resolutions.
+ if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode &&
+ src_width == last_src_width && src_height == last_src_height) {
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
int num_mi_cols = cm->mi_cols;
int num_mi_rows = cm->mi_rows;
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -932,7 +932,7 @@
}
// Reset on key frame: reset counters, references and buffer updates.
-void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
+void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) {
int sl, tl;
SVC *const svc = &cpi->svc;
LAYER_CONTEXT *lc = NULL;
@@ -940,7 +940,7 @@
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl];
lc->current_video_frame_in_layer = 0;
- lc->frames_from_key_frame = 0;
+ if (is_key) lc->frames_from_key_frame = 0;
}
}
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
@@ -1108,7 +1108,8 @@
if (svc->spatial_layer_id == 0) {
// On base spatial layer: if the current superframe has a layer sync then
// reset the pattern counters and reset to base temporal layer.
- if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi);
+ if (svc->superframe_has_layer_sync)
+ vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME);
}
// If the layer sync is set for this current spatial layer then
// disable the temporal reference.
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -237,7 +237,7 @@
void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
-void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
+void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);