ref: 97848890a99af6604df13ec38c094a01aea40f0e
parent: 69e9a3949870cdc3e18b93a4451867c18a3cf8fc
author: Marco Paniconi <marpan@google.com>
date: Wed Aug 8 10:01:26 EDT 2018
vp9: Allow for overshoot detection for non-screen CBR mode. For CBR real-time mode: refactor usage of speed feature to handle overshoot on slide/scene change. Add 2 modes to indicate how slide/scene change is processed for re-setting Q/rate control. Keep the speed setting to 1 for speed >= 5, otherwise set to 0. Video content and screen content are now handled in similar way, though with different thresholds. Some fixes to thresholds and reset: correct the reset of the buffer level to optimal level for each temporal layer, if scene change frame will be encoded at max_q. Also increase the min_thresh for video mode (non-screen content): this is to avoid scene change detection on cases like large lighting changes, cameras focus. And increase in min_thresh makes it more robust to sudden increase in noise level. Change-Id: I256d350da6e92d2ddc09f100fc06ac147cbc1e49
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4052,13 +4052,12 @@
vp9_svc_assert_constraints_pattern(cpi);
}
- if (!cpi->sf.re_encode_overshoot_rt &&
- cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+ // Check if this high_source_sad (scene/slide change) frame should be
+ // encoded at high/max QP, and if so, set the q and adjust some rate
+ // control parameters.
+ if (cpi->sf.overshoot_detection_rt == 1 &&
(cpi->rc.high_source_sad ||
(cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
- // Check if this high_source_sad (scene/slide change) frame should be
- // encoded at high/max QP, and if so, set the q and adjust some rate
- // control parameters.
if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
vp9_set_quantizer(cm, q);
vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -4087,10 +4086,11 @@
vp9_encode_frame(cpi);
- // Check if we should drop this frame because of high overshoot.
- // Only for frames where high temporal-source SAD is detected.
+ // Check if we should re-encode this frame at high Q because of high
+ // overshoot based on the encoded frame size. Only for frames where
+ // high temporal-source SAD is detected.
// For SVC: all spatial layers are checked for re-encoding.
- if (cpi->sf.re_encode_overshoot_rt &&
+ if (cpi->sf.overshoot_detection_rt == 2 &&
(cpi->rc.high_source_sad ||
(cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
int frame_size = 0;
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2648,10 +2648,8 @@
float thresh = 8.0f;
uint32_t thresh_key = 140000;
if (cpi->oxcf.speed <= 5) thresh_key = 240000;
- if (cpi->oxcf.rc_mode == VPX_VBR) {
- min_thresh = 65000;
- thresh = 2.1f;
- }
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000;
+ if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f;
if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) {
const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2);
@@ -2822,14 +2820,16 @@
SPEED_FEATURES *const sf = &cpi->sf;
int thresh_qp = 7 * (rc->worst_quality >> 3);
int thresh_rate = rc->avg_frame_bandwidth << 3;
- // Lower rate threshold for video.
+ // Lower thresh_qp for video (more overshoot at lower Q) to be
+ // more conservative for video.
if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
- thresh_rate = rc->avg_frame_bandwidth << 2;
+ thresh_qp = rc->worst_quality >> 1;
// If this decision is not based on an encoded frame size but just on
- // scene/slide change detection (i.e., re_encode_overshoot_rt = 0), adjust the
- // qp_thresh and skip the (frame_size > thresh_rate) condition in this case.
- if (!sf->re_encode_overshoot_rt) thresh_qp = 3 * (rc->worst_quality >> 2);
- if ((!sf->re_encode_overshoot_rt || frame_size > thresh_rate) &&
+ // scene/slide change detection (i.e., re_encode_overshoot_rt = 1),
+ // for now skip the (frame_size > thresh_rate) condition in this case.
+ // TODO(marpan): Use a better size/rate condition for this case and
+ // adjust thresholds.
+ if ((sf->overshoot_detection_rt == 1 || frame_size > thresh_rate) &&
cm->base_qindex < thresh_qp) {
double rate_correction_factor =
cpi->rc.rate_correction_factors[INTER_NORMAL];
@@ -2846,7 +2846,7 @@
// and the encoded frame used alot of Intra modes, then force hybrid_intra
// encoding for the re-encode on this scene change. hybrid_intra will
// use rd-based intra mode selection for small blocks.
- if (sf->re_encode_overshoot_rt && frame_size > (thresh_rate << 1) &&
+ if (sf->overshoot_detection_rt == 2 && frame_size > (thresh_rate << 1) &&
cpi->svc.spatial_layer_id == 0) {
MODE_INFO **mi = cm->mi_grid_visible;
int sum_intra_usage = 0;
@@ -2900,8 +2900,8 @@
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
lrc->avg_frame_qindex[INTER_FRAME] = *q;
- lrc->buffer_level = rc->optimal_buffer_level;
- lrc->bits_off_target = rc->optimal_buffer_level;
+ lrc->buffer_level = lrc->optimal_buffer_level;
+ lrc->bits_off_target = lrc->optimal_buffer_level;
lrc->rc_1_frame = 0;
lrc->rc_2_frame = 0;
lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -405,7 +405,7 @@
sf->use_compound_nonrd_pickmode = 0;
sf->nonrd_keyframe = 0;
sf->svc_use_lowres_part = 0;
- sf->re_encode_overshoot_rt = 0;
+ sf->overshoot_detection_rt = 0;
sf->disable_16x16part_nonkey = 0;
sf->disable_golden_ref = 0;
sf->enable_tpl_model = 0;
@@ -570,11 +570,9 @@
// Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
// increase in encoding time.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
- cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
- (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
- sf->re_encode_overshoot_rt = 1;
- }
+ if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
+ cpi->oxcf.rc_mode == VPX_CBR)
+ sf->overshoot_detection_rt = 1;
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
cm->width <= 1280 && cm->height <= 720) {
sf->use_altref_onepass = 1;
@@ -583,7 +581,6 @@
}
if (speed >= 6) {
- sf->re_encode_overshoot_rt = 0;
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) {
sf->use_altref_onepass = 1;
sf->use_compound_nonrd_pickmode = 1;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -542,9 +542,17 @@
// For SVC: enables use of partition from lower spatial resolution.
int svc_use_lowres_part;
- // Enable re-encoding on scene change with potential high overshoot,
- // for real-time encoding flow.
- int re_encode_overshoot_rt;
+ // Flag to indicate process for handling overshoot on slide/scene change,
+ // for real-time CBR mode.
+ // 0: no reaction to rate control on a detected slide/scene change
+ // (prior to encoding the frame).
+ // 1: set to larger Q based only on the detected slide/scene change
+ // and current/past Q. No second pass encoding, so faster than option 2.
+ // 2: based on (first pass) encoded frame, if large frame size is detected
+ // then set to higher Q for second encode. This involves 2 pass encoding
+ // on slide change, so slower than 1, but more accurate for detecting
+ // overshoot.
+ int overshoot_detection_rt;
// Disable partitioning of 16x16 blocks.
int disable_16x16part_nonkey;