shithub: libvpx

Download patch

ref: 7f2daa74a05beee77003ef4288eafb3e3db9a531
parent: 6036a0d24fa0946c5830f2a925bd427428c8afb1
author: Marco <marpan@google.com>
date: Mon Feb 13 05:16:42 EST 2017

vp9: Incorporate source sum_diff into non-rd partition thresholds.

Increase the variance partition thresholds for superblocks that
have low sum-diff (from source analysis prior to encoding frame).
Use it for now only for speed >= 7 or for denoising on.

Small change on metrics for rtc set: less than ~0.1 avgPNSR decrease
on RTC set, for both speed 7 and 8.

Change-Id: I38325046ebd5f371f51d6e91233d68ff73561af1

--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -565,12 +565,13 @@
 }
 
 // Scale/increase the partition threshold for denoiser speed-up.
-int64_t vp9_scale_part_thresh(int64_t threshold,
-                              VP9_DENOISER_LEVEL noise_level) {
-  if (noise_level >= kDenLow)
-    return ((5 * threshold) >> 2);
+int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level,
+                              int content_state) {
+  if ((content_state == kLowSadLowSumdiff) ||
+      (content_state == kHighSadLowSumdiff) || noise_level == kDenHigh)
+    return (3 * threshold) >> 1;
   else
-    return threshold;
+    return (5 * threshold) >> 2;
 }
 
 //  Scale/increase the ac skip threshold for denoiser speed-up.
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -97,8 +97,8 @@
 
 void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level);
 
-int64_t vp9_scale_part_thresh(int64_t threshold,
-                              VP9_DENOISER_LEVEL noise_level);
+int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level,
+                              int content_state);
 
 int64_t vp9_scale_acskip_thresh(int64_t threshold,
                                 VP9_DENOISER_LEVEL noise_level,
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -461,16 +461,35 @@
   return 0;
 }
 
+int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, int width,
+                                  int height, int content_state) {
+  if (speed >= 8) {
+    if (width <= 640 && height <= 480)
+      return (5 * threshold_base) >> 2;
+    else if ((content_state == kLowSadLowSumdiff) ||
+             (content_state == kHighSadLowSumdiff))
+      return (5 * threshold_base) >> 2;
+  } else if (speed == 7) {
+    if ((content_state == kLowSadLowSumdiff) ||
+        (content_state == kHighSadLowSumdiff)) {
+      return (5 * threshold_base) >> 2;
+    }
+  }
+  return threshold_base;
+}
+
 // Set the variance split thresholds for following the block sizes:
 // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
 // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
 // currently only used on key frame.
-static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
+                               int content_state) {
   VP9_COMMON *const cm = &cpi->common;
   const int is_key_frame = (cm->frame_type == KEY_FRAME);
   const int threshold_multiplier = is_key_frame ? 20 : 1;
   int64_t threshold_base =
       (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
+
   if (is_key_frame) {
     thresholds[0] = threshold_base;
     thresholds[1] = threshold_base >> 2;
@@ -489,14 +508,18 @@
         threshold_base = (7 * threshold_base) >> 3;
     }
 #if CONFIG_VP9_TEMPORAL_DENOISING
-    if (cpi->oxcf.noise_sensitivity > 0)
+    if (cpi->oxcf.noise_sensitivity > 0 &&
+        cpi->denoiser.denoising_level >= kDenLow)
+      threshold_base = vp9_scale_part_thresh(
+          threshold_base, cpi->denoiser.denoising_level, content_state);
+    else
       threshold_base =
-          vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level);
-    else if (cpi->oxcf.speed >= 8 && cm->width <= 640 && cm->height <= 480)
-      threshold_base = (5 * threshold_base) >> 2;
+          scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
+                                    cm->height, content_state);
 #else
-    if (cpi->oxcf.speed >= 8 && cm->width <= 640 && cm->height <= 480)
-      threshold_base = (5 * threshold_base) >> 2;
+    // Increase base variance threshold based on content_state/sum_diff level.
+    threshold_base = scale_part_thresh_sumdiff(
+        threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
 #endif
     thresholds[0] = threshold_base;
     thresholds[2] = threshold_base << cpi->oxcf.speed;
@@ -514,7 +537,8 @@
   }
 }
 
-void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
+void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
+                                           int content_state) {
   VP9_COMMON *const cm = &cpi->common;
   SPEED_FEATURES *const sf = &cpi->sf;
   const int is_key_frame = (cm->frame_type == KEY_FRAME);
@@ -522,7 +546,7 @@
       sf->partition_search_type != REFERENCE_PARTITION) {
     return;
   } else {
-    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
     // The thresholds below are not changed locally.
     if (is_key_frame) {
       cpi->vbp_threshold_sad = 0;
@@ -929,6 +953,7 @@
   int avg_16x16[4];
   int64_t threshold_4x4avg;
   NOISE_LEVEL noise_level = kLow;
+  int content_state = 0;
   uint8_t *s;
   const uint8_t *d;
   int sp;
@@ -956,25 +981,31 @@
 
   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
   segment_id = xd->mi[0]->segment_id;
-  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
-    if (cyclic_refresh_segment_id_boosted(segment_id)) {
-      int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-      set_vbp_thresholds(cpi, thresholds, q);
-    }
-  }
 
   if (cpi->sf.use_source_sad && !is_key_frame) {
     // The sb_offset2 is to make it consistent with the index in the function
     // vp9_avg_source_sad() in vp9_ratectrl.c.
     int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
-    x->skip_low_source_sad = cpi->avg_source_sad_sb[sb_offset2] == 1 ? 1 : 0;
-    // If avg_source_sad is lower than the threshold, copy the partition without
-    // computing the y_sad.
-    if (cpi->avg_source_sad_sb[sb_offset2] && cpi->sf.copy_partition_flag &&
+    content_state = cpi->content_state_sb[sb_offset2];
+    x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
+                              content_state == kLowSadHighSumdiff)
+                                 ? 1
+                                 : 0;
+    // If source_sad is low copy the partition without computing the y_sad.
+    if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
         copy_partitioning(cpi, x, mi_row, mi_col, segment_id, sb_offset)) {
       chroma_check(cpi, x, bsize, y_sad, is_key_frame);
       return 0;
     }
+  }
+
+  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+    if (cyclic_refresh_segment_id_boosted(segment_id)) {
+      int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+      set_vbp_thresholds(cpi, thresholds, q, content_state);
+    }
+  } else {
+    set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
   }
 
   // For non keyframes, disable 4x4 average for low resolution when speed = 8
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -42,7 +42,8 @@
 void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td,
                        int tile_row, int tile_col, int mi_row);
 
-void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);
+void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q,
+                                           int content_state);
 
 #ifdef __cplusplus
 }  // extern "C"
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -463,8 +463,8 @@
   vpx_free(cpi->copied_frame_cnt);
   cpi->copied_frame_cnt = NULL;
 
-  vpx_free(cpi->avg_source_sad_sb);
-  cpi->avg_source_sad_sb = NULL;
+  vpx_free(cpi->content_state_sb);
+  cpi->content_state_sb = NULL;
 
   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
   cpi->cyclic_refresh = NULL;
@@ -3221,7 +3221,7 @@
   }
 
   vp9_set_quantizer(cm, q);
-  vp9_set_variance_partition_thresholds(cpi, q);
+  vp9_set_variance_partition_thresholds(cpi, q, 0);
 
   setup_frame(cpi);
 
@@ -3264,7 +3264,7 @@
     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
       vpx_clear_system_state();
       vp9_set_quantizer(cm, q);
-      vp9_set_variance_partition_thresholds(cpi, q);
+      vp9_set_variance_partition_thresholds(cpi, q, 0);
       suppress_active_map(cpi);
       // Turn-off cyclic refresh for re-encoded frame.
       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -131,6 +131,14 @@
   RESIZE_DYNAMIC = 2  // Coded size of each frame is determined by the codec.
 } RESIZE_TYPE;
 
+typedef enum {
+  kInvalid = 0,
+  kLowSadLowSumdiff = 1,
+  kLowSadHighSumdiff = 2,
+  kHighSadLowSumdiff = 3,
+  kHighSadHighSumdiff = 4,
+} CONTENT_STATE_SB;
+
 typedef struct VP9EncoderConfig {
   BITSTREAM_PROFILE profile;
   vpx_bit_depth_t bit_depth;     // Codec bit-depth.
@@ -697,7 +705,7 @@
   uint8_t *copied_frame_cnt;
   uint8_t max_copied_frame;
 
-  uint8_t *avg_source_sad_sb;
+  uint8_t *content_state_sb;
 
   LevelConstraint level_constraint;
 } VP9_COMP;
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
@@ -2290,9 +2291,20 @@
                   (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) {
               tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
                                                last_src_ystride);
-              if (cpi->sf.use_source_sad)
-                cpi->avg_source_sad_sb[num_samples] =
-                    tmp_sad < avg_source_sad_threshold ? 1 : 0;
+              if (cpi->sf.use_source_sad) {
+                unsigned int tmp_sse;
+                unsigned int tmp_variance = vpx_variance64x64(
+                    src_y, src_ystride, last_src_y, last_src_ystride, &tmp_sse);
+                // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
+                if (tmp_sad < avg_source_sad_threshold)
+                  cpi->content_state_sb[num_samples] =
+                      ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
+                                                      : kLowSadHighSumdiff;
+                else
+                  cpi->content_state_sb[num_samples] =
+                      ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
+                                                      : kHighSadHighSumdiff;
+              }
               avg_sad += tmp_sad;
               num_samples++;
             }
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -498,8 +498,8 @@
         !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE)
       sf->use_source_sad = 1;
     if (sf->use_source_sad) {
-      if (cpi->avg_source_sad_sb == NULL) {
-        cpi->avg_source_sad_sb = (uint8_t *)vpx_calloc(
+      if (cpi->content_state_sb == NULL) {
+        cpi->content_state_sb = (uint8_t *)vpx_calloc(
             (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
       }
     }