shithub: libvpx

--- a/examples/vp9_spatial_svc_encoder.c

+++ b/examples/vp9_spatial_svc_encoder.c

@@ -730,6 +730,8 @@

   vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);

+  vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);

   // Encode frames

   while (!end_of_stream) {

     vpx_codec_iter_t iter = NULL;

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -3683,12 +3683,15 @@

   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now

   // (need to check encoding time cost for doing this for speed 8).

   cpi->rc.high_source_sad = 0;

-  if (cpi->compute_source_sad_onepass && cm->show_frame &&

+  if (cm->show_frame &&

       (cpi->oxcf.rc_mode == VPX_VBR ||

        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||

        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc)))

     vp9_scene_detection_onepass(cpi);

+  if (cpi->svc.spatial_layer_id == 0)

+    cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;

   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame

   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can

   // avoid this frame-level upsampling (for non intra_only frames).

@@ -3774,10 +3777,10 @@

   // Check if we should drop this frame because of high overshoot.

   // Only for frames where high temporal-source SAD is detected.

-  if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&

-      cpi->resize_state == ORIG && cm->frame_type != KEY_FRAME &&

-      cpi->oxcf.content == VP9E_CONTENT_SCREEN &&

-      cpi->rc.high_source_sad == 1) {

+  // For SVC: all spatial layers are checked for re-encoding.

+  if (cpi->sf.re_encode_overshoot_rt &&

+      (cpi->rc.high_source_sad ||

+       (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {

     int frame_size = 0;

     // Get an estimate of the encoded frame size.

     save_coding_context(cpi);

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -1503,10 +1503,12 @@

   int svc_mv_col = 0;

   int svc_mv_row = 0;

   unsigned int thresh_svc_skip_golden = 500;

+  if (cpi->svc.spatial_layer_id > 0 && cpi->svc.high_source_sad_superframe)

+    thresh_svc_skip_golden = 0;

   // Lower the skip threshold if lower spatial layer is better quality relative

   // to current layer.

-  if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&

-      cm->base_qindex > cpi->svc.lower_layer_qindex + 15)

+  else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&

+           cm->base_qindex > cpi->svc.lower_layer_qindex + 15)

     thresh_svc_skip_golden = 100;

   // Increase skip threshold if lower spatial layer is lower quality relative

   // to current layer.

--- a/vp9/encoder/vp9_ratectrl.c

+++ b/vp9/encoder/vp9_ratectrl.c

@@ -2284,18 +2284,34 @@

 void vp9_scene_detection_onepass(VP9_COMP *cpi) {

   VP9_COMMON *const cm = &cpi->common;

   RATE_CONTROL *const rc = &cpi->rc;

+  YV12_BUFFER_CONFIG const *unscaled_src = cpi->un_scaled_source;

+  YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source;

+  uint8_t *src_y;

+  int src_ystride;

+  int src_width;

+  int src_height;

+  uint8_t *last_src_y;

+  int last_src_ystride;

+  int last_src_width;

+  int last_src_height;

+  if (cpi->un_scaled_source == NULL || cpi->unscaled_last_source == NULL ||

+      (cpi->use_svc && cpi->svc.current_superframe == 0))

+    return;

+  src_y = unscaled_src->y_buffer;

+  src_ystride = unscaled_src->y_stride;

+  src_width = unscaled_src->y_width;

+  src_height = unscaled_src->y_height;

+  last_src_y = unscaled_last_src->y_buffer;

+  last_src_ystride = unscaled_last_src->y_stride;

+  last_src_width = unscaled_last_src->y_width;

+  last_src_height = unscaled_last_src->y_height;

 #if CONFIG_VP9_HIGHBITDEPTH

   if (cm->use_highbitdepth) return;

 #endif

   rc->high_source_sad = 0;

-  if (cpi->Last_Source != NULL &&

-      cpi->Last_Source->y_width == cpi->Source->y_width &&

-      cpi->Last_Source->y_height == cpi->Source->y_height) {

+  if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width &&

+      src_height == last_src_height) {

     YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };

-    uint8_t *src_y = cpi->Source->y_buffer;

-    int src_ystride = cpi->Source->y_stride;

-    uint8_t *last_src_y = cpi->Last_Source->y_buffer;

-    int last_src_ystride = cpi->Last_Source->y_stride;

     int start_frame = 0;

     int frames_to_buffer = 1;

     int frame = 0;

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -374,6 +374,7 @@

   sf->use_compound_nonrd_pickmode = 0;

   sf->nonrd_keyframe = 0;

   sf->svc_use_lowres_part = 0;

+  sf->re_encode_overshoot_rt = 0;

   if (speed >= 1) {

     sf->allow_txfm_domain_distortion = 1;

@@ -534,6 +535,10 @@

     // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent

     // increase in encoding time.

     if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;

+    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&

+        cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&

+        cpi->oxcf.content == VP9E_CONTENT_SCREEN)

+      sf->re_encode_overshoot_rt = 1;

   if (speed >= 6) {

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -508,6 +508,10 @@

   // For SVC: enables use of partition from lower spatial resolution.

   int svc_use_lowres_part;

+  // Enable re-encoding on scene change with potential high overshoot,

+  // for real-time encoding flow.

+  int re_encode_overshoot_rt;

 } SPEED_FEATURES;

 struct VP9_COMP;

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -712,6 +712,8 @@

     cpi->svc.non_reference_frame = 1;

+  if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0;

   if (vp9_set_size_literal(cpi, width, height) != 0)

     return VPX_CODEC_INVALID_PARAM;

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -118,6 +118,11 @@

   SVC_LAYER_DROP_MODE framedrop_mode;

   INTER_LAYER_PRED disable_inter_layer_pred;

+  // Flag to indicate scene change at current superframe, scene detection is

+  // currently checked for each superframe prior to encoding, on the full

+  // resolution source.

+  int high_source_sad_superframe;

 } SVC;

 struct VP9_COMP;

--

⑨