shithub: libvpx

Download patch

ref: be013eb396603bd7091cc3bca3b07a28b6ecf027
parent: d004c640133b5def534b219cbf66fce9784581e6
author: paulwilkins <paulwilkins@google.com>
date: Wed Feb 3 10:37:32 EST 2016

Add experimental spatial de-noise filter on key frames.

For forced key frames in particular this helps to make them
blend better with the surrounding frames where noise tends
to be suppressed by a combination of quantization and alt
ref filtering.

Currently disabled by default under and IFDEF flag pending
wider testing.

Change-Id: I971b5cc2b2a4b9e1f11fe06c67ef073f01b25056

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -45,6 +45,7 @@
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_extend.h"
 #include "vp9/encoder/vp9_ethread.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mbgraph.h"
@@ -86,6 +87,25 @@
 FILE *keyfile;
 #endif
 
+#ifdef ENABLE_KF_DENOISE
+// Test condition for spatial denoise of source.
+static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+
+  return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
+         frame_is_intra_only(cm);
+}
+#endif
+
+// Test for whether to calculate metrics for the frame.
+static int is_psnr_calc_enabled(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+
+  return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
+}
+
 static const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
   {LEVEL_1,   829440,      36864,    200,    400,   2, 1,  4,  8},
   {LEVEL_1_1, 2764800,     73728,    800,    1000,  2, 1,  4,  8},
@@ -445,6 +465,11 @@
   vpx_free_frame_buffer(&cpi->scaled_source);
   vpx_free_frame_buffer(&cpi->scaled_last_source);
   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
+#ifdef ENABLE_KF_DENOISE
+  vpx_free_frame_buffer(&cpi->raw_unscaled_source);
+  vpx_free_frame_buffer(&cpi->raw_scaled_source);
+#endif
+
   vp9_lookahead_destroy(cpi->lookahead);
 
   vpx_free(cpi->tile_tok[0][0]);
@@ -738,6 +763,29 @@
                                NULL, NULL, NULL))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate scaled last source buffer");
+#ifdef ENABLE_KF_DENOISE
+  if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source,
+                               cm->width, cm->height,
+                               cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                               cm->use_highbitdepth,
+#endif
+                               VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+                               NULL, NULL, NULL))
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to allocate unscaled raw source frame buffer");
+
+  if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source,
+                               cm->width, cm->height,
+                               cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                               cm->use_highbitdepth,
+#endif
+                               VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+                               NULL, NULL, NULL))
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to allocate scaled raw source frame buffer");
+#endif
 }
 
 
@@ -2225,10 +2273,10 @@
   int i;
   PSNR_STATS psnr;
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_calc_highbd_psnr(cpi->Source, cpi->common.frame_to_show, &psnr,
+  vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr,
                        cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
 #else
-  vpx_calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr);
+  vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr);
 #endif
 
   for (i = 0; i < 4; ++i) {
@@ -3224,6 +3272,23 @@
                                         &cpi->scaled_source,
                                         (cpi->oxcf.pass == 0));
   }
+  // Unfiltered raw source used in metrics calculation if the source
+  // has been filtered.
+  if (is_psnr_calc_enabled(cpi)) {
+#ifdef ENABLE_KF_DENOISE
+    if (is_spatial_denoise_enabled(cpi)) {
+      cpi->raw_source_frame =
+          vp9_scale_if_required(cm, &cpi->raw_unscaled_source,
+                                &cpi->raw_scaled_source,
+                                (cpi->oxcf.pass == 0));
+    } else {
+      cpi->raw_source_frame = cpi->Source;
+    }
+#else
+    cpi->raw_source_frame = cpi->Source;
+#endif
+  }
+
   // Avoid scaling last_source unless its needed.
   // Last source is needed if vp9_avg_source_sad() is used, or if
   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
@@ -3394,6 +3459,23 @@
                                       &cpi->scaled_source,
                                       (cpi->oxcf.pass == 0));
 
+    // Unfiltered raw source used in metrics calculation if the source
+    // has been filtered.
+    if (is_psnr_calc_enabled(cpi)) {
+#ifdef ENABLE_KF_DENOISE
+      if (is_spatial_denoise_enabled(cpi)) {
+        cpi->raw_source_frame =
+            vp9_scale_if_required(cm, &cpi->raw_unscaled_source,
+                                  &cpi->raw_scaled_source,
+                                  (cpi->oxcf.pass == 0));
+      } else {
+        cpi->raw_source_frame = cpi->Source;
+      }
+#else
+      cpi->raw_source_frame = cpi->Source;
+#endif
+    }
+
     if (cpi->unscaled_last_source != NULL)
       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
                                                &cpi->scaled_last_source,
@@ -3736,6 +3818,170 @@
   return mask;
 }
 
+#ifdef ENABLE_KF_DENOISE
+// Baseline Kernal weights for denoise
+static uint8_t dn_kernal_3[9] = {
+  1, 2, 1,
+  2, 4, 2,
+  1, 2, 1};
+static uint8_t dn_kernal_5[25] = {
+  1, 1, 1, 1, 1,
+  1, 1, 2, 1, 1,
+  1, 2, 4, 2, 1,
+  1, 1, 2, 1, 1,
+  1, 1, 1, 1, 1};
+
+static INLINE void add_denoise_point(int centre_val, int data_val,
+                                     int thresh, uint8_t point_weight,
+                                     int *sum_val, int *sum_weight) {
+  if (abs(centre_val - data_val) <= thresh) {
+    *sum_weight += point_weight;
+    *sum_val += (int)data_val * (int)point_weight;
+  }
+}
+
+static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
+                                  const int strength) {
+  int sum_weight = 0;
+  int sum_val = 0;
+  int thresh = strength;
+  int kernal_size = 5;
+  int half_k_size = 2;
+  int i, j;
+  int max_diff = 0;
+  uint8_t *tmp_ptr;
+  uint8_t *kernal_ptr;
+
+  // Find the maximum deviation from the source point in the locale.
+  tmp_ptr = src_ptr -  (stride * (half_k_size + 1)) - (half_k_size + 1);
+  for (i = 0; i < kernal_size + 2; ++i) {
+    for (j = 0; j < kernal_size + 2; ++j) {
+      max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
+    }
+    tmp_ptr += stride;
+  }
+
+  // Select the kernal size.
+  if (max_diff > (strength + (strength >> 1))) {
+    kernal_size = 3;
+    half_k_size = 1;
+    thresh = thresh >> 1;
+  }
+  kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
+
+  // Apply the kernal
+  tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
+  for (i = 0; i < kernal_size; ++i) {
+    for (j = 0; j < kernal_size; ++j) {
+      add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh,
+                        *kernal_ptr, &sum_val, &sum_weight);
+      ++kernal_ptr;
+    }
+    tmp_ptr += stride;
+  }
+
+  // Update the source value with the new filtered value
+  *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
+                                        const int strength) {
+  int sum_weight = 0;
+  int sum_val = 0;
+  int thresh = strength;
+  int kernal_size = 5;
+  int half_k_size = 2;
+  int i, j;
+  int max_diff = 0;
+  uint16_t *tmp_ptr;
+  uint8_t *kernal_ptr;
+
+  // Find the maximum deviation from the source point in the locale.
+  tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
+  for (i = 0; i < kernal_size + 2; ++i) {
+    for (j = 0; j < kernal_size + 2; ++j) {
+      max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
+    }
+    tmp_ptr += stride;
+  }
+
+  // Select the kernal size.
+  if (max_diff > (strength + (strength >> 1))) {
+    kernal_size = 3;
+    half_k_size = 1;
+    thresh = thresh >> 1;
+  }
+  kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
+
+  // Apply the kernal
+  tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
+  for (i = 0; i < kernal_size; ++i) {
+    for (j = 0; j < kernal_size; ++j) {
+      add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh,
+                        *kernal_ptr, &sum_val, &sum_weight);
+      ++kernal_ptr;
+    }
+    tmp_ptr += stride;
+  }
+
+  // Update the source value with the new filtered value
+  *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+// Apply thresholded spatial noise supression to a given buffer.
+static void spatial_denoise_buffer(VP9_COMP *cpi,
+                                   uint8_t * buffer, const int stride,
+                                   const int width, const int height,
+                                   const int strength) {
+  VP9_COMMON *const cm = &cpi->common;
+  uint8_t * src_ptr = buffer;
+  int row;
+  int col;
+
+  for (row = 0; row < height; ++row) {
+    for (col = 0; col < width; ++col) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (cm->use_highbitdepth)
+        highbd_spatial_denoise_point(
+            CONVERT_TO_SHORTPTR(&src_ptr[col]), stride, strength);
+      else
+        spatial_denoise_point(&src_ptr[col], stride, strength);
+#else
+      spatial_denoise_point(&src_ptr[col], stride, strength);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    }
+    src_ptr += stride;
+  }
+}
+
+// Apply thresholded spatial noise supression to source.
+static void spatial_denoise_frame(VP9_COMP *cpi) {
+  YV12_BUFFER_CONFIG *src = cpi->Source;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+  TWO_PASS *const twopass = &cpi->twopass;
+  VP9_COMMON *const cm = &cpi->common;
+
+  // Base the filter strength on the current active max Q.
+  const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
+                                              cm->bit_depth));
+  int strength =
+      VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
+
+  // Denoise each of Y,U and V buffers.
+  spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride,
+                         src->y_width, src->y_height, strength);
+
+  strength += (strength >> 1);
+  spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride,
+                         src->uv_width, src->uv_height, strength << 1);
+
+  spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride,
+                         src->uv_width, src->uv_height, strength << 1);
+}
+#endif  // ENABLE_KF_DENOISE
+
 static void encode_frame_to_data_rate(VP9_COMP *cpi,
                                       size_t *size,
                                       uint8_t *dest,
@@ -3748,6 +3994,12 @@
   set_ext_overrides(cpi);
   vpx_clear_system_state();
 
+#ifdef ENABLE_KF_DENOISE
+  // Spatial denoise of key frame.
+  if (is_spatial_denoise_enabled(cpi))
+    spatial_denoise_frame(cpi);
+#endif
+
   // Set the arf sign bias for this frame.
   set_arf_sign_bias(cpi);
 
@@ -4473,6 +4725,12 @@
     cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer
                                                            : &source->img;
 
+#ifdef ENABLE_KF_DENOISE
+    // Copy of raw source for metrics calculation.
+    if (is_psnr_calc_enabled(cpi))
+      vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
+#endif
+
     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
 
     *time_stamp = source->ts_start;
@@ -4597,7 +4855,8 @@
   vpx_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
 
-  if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
+  // Should we calculate metrics for the frame.
+  if (is_psnr_calc_enabled(cpi))
     generate_psnr_packet(cpi);
 
   if (cpi->keep_level_stats && oxcf->pass != 1)
@@ -4621,7 +4880,7 @@
 #endif
 
       if (cpi->b_calculate_psnr) {
-        YV12_BUFFER_CONFIG *orig = cpi->Source;
+        YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
         PSNR_STATS psnr;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -301,6 +301,9 @@
   double worst;
 } ImageStat;
 
+// Kf noise filtering currently disabled by default in build.
+// #define ENABLE_KF_DENOISE 1
+
 #define CPB_WINDOW_SIZE 4
 #define FRAME_WINDOW_SIZE 128
 #define SAMPLE_RATE_GRACE_P 0.015
@@ -381,6 +384,11 @@
   YV12_BUFFER_CONFIG scaled_source;
   YV12_BUFFER_CONFIG *unscaled_last_source;
   YV12_BUFFER_CONFIG scaled_last_source;
+#ifdef ENABLE_KF_DENOISE
+  YV12_BUFFER_CONFIG raw_unscaled_source;
+  YV12_BUFFER_CONFIG raw_scaled_source;
+#endif
+  YV12_BUFFER_CONFIG *raw_source_frame;
 
   TileDataEnc *tile_data;
   int allocated_tiles;  // Keep track of memory allocated for tiles.