shithub: libvpx

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -175,6 +175,10 @@

   uint8_t last_sb_high_content;

+  // For each superblock: saves the content value (e.g., low/high sad/sumdiff)

+  // based on source sad, prior to encoding the frame.

+  uint8_t content_state_sb;

   // Used to save the status of whether a block has a low variance in

   // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for

   // 32x32, 9~24 for 16x16.

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -963,6 +963,46 @@

+static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,

+                           int sb_offset) {

+  unsigned int tmp_sse;

+  uint64_t tmp_sad;

+  unsigned int tmp_variance;

+  const BLOCK_SIZE bsize = BLOCK_64X64;

+  uint8_t *src_y = cpi->Source->y_buffer;

+  int src_ystride = cpi->Source->y_stride;

+  uint8_t *last_src_y = cpi->Last_Source->y_buffer;

+  int last_src_ystride = cpi->Last_Source->y_stride;

+  uint64_t avg_source_sad_threshold = 10000;

+  uint64_t avg_source_sad_threshold2 = 12000;

+#if CONFIG_VP9_HIGHBITDEPTH

+  if (cpi->common.use_highbitdepth) return;

+#endif

+  src_y += shift;

+  last_src_y += shift;

+  tmp_sad =

+      cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);

+  tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,

+                                   last_src_ystride, &tmp_sse);

+  // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)

+  if (tmp_sad < avg_source_sad_threshold)

+    x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff

+                                                          : kLowSadHighSumdiff;

+  else

+    x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff

+                                                          : kHighSadHighSumdiff;

+  if (cpi->content_state_sb_fd != NULL) {

+    if (tmp_sad < avg_source_sad_threshold2) {

+      // Cap the increment to 255.

+      if (cpi->content_state_sb_fd[sb_offset] < 255)

+        cpi->content_state_sb_fd[sb_offset]++;

+    } else {

+      cpi->content_state_sb_fd[sb_offset] = 0;

+    }

+  }

+  return;

+}

 // This function chooses partitioning based on the variance between source and

 // reconstructed last, where variance is computed for down-sampled inputs.

 static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,

@@ -1011,17 +1051,15 @@

   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);

   segment_id = xd->mi[0]->segment_id;

-  if (cpi->sf.use_source_sad && cpi->content_state_sb != NULL &&

-      !is_key_frame) {

-    // The sb_offset2 is to make it consistent with the index in the function

-    // vp9_avg_source_sad() in vp9_ratectrl.c.

+  if (cpi->sf.use_source_sad && !is_key_frame) {

     int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);

-    content_state = cpi->content_state_sb[sb_offset2];

+    content_state = x->content_state_sb;

     x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||

                               content_state == kLowSadHighSumdiff)

? 1

                                  : 0;

-    x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];

+    if (cpi->content_state_sb_fd != NULL)

+      x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];

     // If source_sad is low copy the partition without computing the y_sad.

     if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&

         copy_partitioning(cpi, x, mi_row, mi_col, segment_id, sb_offset)) {

@@ -4063,6 +4101,7 @@

     x->color_sensitivity[1] = 0;

     x->sb_is_skin = 0;

     x->skip_low_source_sad = 0;

+    x->content_state_sb = 0;

     if (seg->enabled) {

       const uint8_t *const map =

@@ -4072,6 +4111,12 @@

       if (seg_skip) {

         partition_search_type = FIXED_PARTITION;

+    }

+    if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {

+      int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);

+      int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);

+      avg_source_sad(cpi, x, shift, sb_offset2);

     // Set the partition type of the 64X64 block

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -463,9 +463,6 @@

   vpx_free(cpi->copied_frame_cnt);

   cpi->copied_frame_cnt = NULL;

-  vpx_free(cpi->content_state_sb);

-  cpi->content_state_sb = NULL;

   vpx_free(cpi->content_state_sb_fd);

   cpi->content_state_sb_fd = NULL;

@@ -3094,9 +3091,11 @@

                                        uint8_t *dest) {

   VP9_COMMON *const cm = &cpi->common;

   int q = 0, bottom_index = 0, top_index = 0;  // Dummy variables.

-  int compute_source_sad = cpi->sf.use_source_sad ||

-                           cpi->oxcf.content == VP9E_CONTENT_SCREEN ||

-                           cpi->oxcf.rc_mode == VPX_VBR;

+  // Flag to check if its valid to compute the source sad (used for

+  // scene detection and for superblock content state in CBR mode).

+  // The flag may get reset below based on SVC or resizing state.

+  cpi->compute_source_sad_onepass =

+      cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cm->show_frame;

   vpx_clear_system_state();

@@ -3144,16 +3143,13 @@

   if ((cpi->use_svc &&

        (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 ||

         cpi->svc.current_superframe < 1)) ||

-      cpi->resize_pending || cpi->resize_state || cpi->external_resize) {

-    compute_source_sad = 0;

-    if (cpi->content_state_sb != NULL) {

-      memset(cpi->content_state_sb, 0, (cm->mi_stride >> 3) *

-                                           ((cm->mi_rows >> 3) + 1) *

-                                           sizeof(*cpi->content_state_sb));

+      cpi->resize_pending || cpi->resize_state || cpi->external_resize ||

+      cpi->resize_state != ORIG) {

+    cpi->compute_source_sad_onepass = 0;

+    if (cpi->content_state_sb_fd != NULL)

       memset(cpi->content_state_sb_fd, 0,

              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *

                  sizeof(*cpi->content_state_sb_fd));

-    }

   // Avoid scaling last_source unless its needed.

@@ -3166,11 +3162,16 @@

         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||

        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||

        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||

-       compute_source_sad))

+       cpi->compute_source_sad_onepass))

     cpi->Last_Source =

         vp9_scale_if_required(cm, cpi->unscaled_last_source,

                               &cpi->scaled_last_source, (cpi->oxcf.pass == 0));

+  if (cpi->Last_Source == NULL ||

+      cpi->Last_Source->y_width != cpi->Source->y_width ||

+      cpi->Last_Source->y_height != cpi->Source->y_height)

+    cpi->compute_source_sad_onepass = 0;

   if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) {

     memset(cpi->consec_zero_mv, 0,

            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));

@@ -3178,15 +3179,13 @@

   vp9_update_noise_estimate(cpi);

-  // Compute source_sad if the flag compute_source_sad is set, and

-  // only for 1 pass realtime speed >= 5 with show_frame = 1.

-  // TODO(jianj): Look into removing the condition on resize_state,

-  // and improving these conditions (i.e., better handle SVC case and combine

-  // them with condition above in compute_source_sad).

-  if (cpi->oxcf.pass == 0 && cpi->oxcf.mode == REALTIME &&

-      cpi->oxcf.speed >= 5 && cpi->resize_state == ORIG && compute_source_sad &&

-      cm->show_frame)

-    vp9_avg_source_sad(cpi);

+  // Scene detection is used for VBR mode or screen-content case.

+  // Make sure compute_source_sad_onepass is set (which handles SVC case

+  // and dynamic resize).

+  if (cpi->compute_source_sad_onepass &&

+      (cpi->oxcf.rc_mode == VPX_VBR ||

+       cpi->oxcf.content == VP9E_CONTENT_SCREEN))

+    vp9_scene_detection_onepass(cpi);

   // For 1 pass SVC, since only ZEROMV is allowed for upsampled reference

   // frame (i.e, svc->force_zero_mode_spatial_ref = 0), we can avoid this

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -708,12 +708,11 @@

   uint8_t *copied_frame_cnt;

   uint8_t max_copied_frame;

-  // For each superblock: saves the content value (e.g., low/high sad/sumdiff)

-  // based on source sad, prior to encoding the frame.

-  uint8_t *content_state_sb;

   // For each superblock: keeps track of the last time (in frame distance) the

   // the superblock did not have low source sad.

   uint8_t *content_state_sb_fd;

+  int compute_source_sad_onepass;

   LevelConstraint level_constraint;

 } VP9_COMP;

--- a/vp9/encoder/vp9_ratectrl.c

+++ b/vp9/encoder/vp9_ratectrl.c

@@ -2213,7 +2213,7 @@

 // in content and allow rate control to react.

 // This function also handles special case of lag_in_frames, to measure content

 // level in #future frames set by the lag_in_frames.

-void vp9_avg_source_sad(VP9_COMP *cpi) {

+void vp9_scene_detection_onepass(VP9_COMP *cpi) {

   VP9_COMMON *const cm = &cpi->common;

   RATE_CONTROL *const rc = &cpi->rc;

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -2284,8 +2284,6 @@

         int num_samples = 0;

         int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;

         int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;

-        uint64_t avg_source_sad_threshold = 10000;

-        uint64_t avg_source_sad_threshold2 = 12000;

         if (cpi->oxcf.lag_in_frames > 0) {

           src_y = frames[frame]->y_buffer;

           src_ystride = frames[frame]->y_stride;

@@ -2296,34 +2294,12 @@

           for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) {

             // Checker-board pattern, ignore boundary.

             // If the use_source_sad is on, compute for every superblock.

-            if (cpi->sf.use_source_sad ||

-                ((sbi_row > 0 && sbi_col > 0) &&

+            if (((sbi_row > 0 && sbi_col > 0) &&

                  (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) &&

                  ((sbi_row % 2 == 0 && sbi_col % 2 == 0) ||

                   (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) {

               tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,

                                                last_src_ystride);

-              if (cpi->sf.use_source_sad && cpi->content_state_sb != NULL) {

-                unsigned int tmp_sse;

-                unsigned int tmp_variance = vpx_variance64x64(

-                    src_y, src_ystride, last_src_y, last_src_ystride, &tmp_sse);

-                // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)

-                if (tmp_sad < avg_source_sad_threshold)

-                  cpi->content_state_sb[num_samples] =

-                      ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff

-                                                      : kLowSadHighSumdiff;

-                else

-                  cpi->content_state_sb[num_samples] =

-                      ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff

-                                                      : kHighSadHighSumdiff;

-                if (tmp_sad < avg_source_sad_threshold2) {

-                  // Cap the increment to 255.

-                  if (cpi->content_state_sb_fd[num_samples] < 255)

-                    cpi->content_state_sb_fd[num_samples]++;

-                } else {

-                  cpi->content_state_sb_fd[num_samples] = 0;

-                }

-              }

               avg_sad += tmp_sad;

               num_samples++;

--- a/vp9/encoder/vp9_ratectrl.h

+++ b/vp9/encoder/vp9_ratectrl.h

@@ -283,7 +283,7 @@

 int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi);

-void vp9_avg_source_sad(struct VP9_COMP *cpi);

+void vp9_scene_detection_onepass(struct VP9_COMP *cpi);

 int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q);

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -512,12 +512,9 @@

     if (!cpi->external_resize) sf->use_source_sad = 1;

     if (sf->use_source_sad) {

-      // For SVC allocate for top layer.

-      if (cpi->content_state_sb == NULL &&

+      if (cpi->content_state_sb_fd == NULL &&

           (!cpi->use_svc ||

            cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {

-        cpi->content_state_sb = (uint8_t *)vpx_calloc(

-            (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));

         cpi->content_state_sb_fd = (uint8_t *)vpx_calloc(

             (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));

--

⑨