shithub: libvpx

Download patch

ref: dd6134b472f755112391bc6197d87c868a67f132
parent: fc2fc899ae774b724b0aa2e718a8a6aa039d2c04
author: Stefan Holmer <holmer@google.com>
date: Wed May 30 08:17:06 EDT 2012

Added another denoising threshold for finding DC shifts.

Compares the sum of differences between the input block and the averaged
block. If they differ too much the block will not be filtered. Negligible
perfomance hit.

Change-Id: Ib1c31a265efd4d100b3abc4a1ea6675038c8ddde

--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -505,7 +505,7 @@
 # Denoiser filter
 #
 if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
-    prototype void vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
+    prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
     specialize vp8_denoiser_filter sse2
 fi
 
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -15,14 +15,11 @@
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_rtcd.h"
 
-
-static const unsigned int NOISE_MOTION_THRESHOLD = 20 * 20;
+static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
 // SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
 static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
 static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
 
-
-
 // The filtering coefficients used for denoizing are adjusted for static
 // blocks, or blocks with very small motion vectors. This is done through
 // the motion magnitude parameter.
@@ -82,11 +79,15 @@
 
 
 
-void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
-                           YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal,
-                           unsigned int motion_magnitude, int y_offset,
-                           int uv_offset)
+int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
+                          YV12_BUFFER_CONFIG *running_avg,
+                          MACROBLOCK *signal,
+                          unsigned int motion_magnitude,
+                          int y_offset,
+                          int uv_offset)
 {
+    unsigned char filtered_buf[16*16];
+    unsigned char *filtered = filtered_buf;
     unsigned char *sig = signal->thismb;
     int sig_stride = 16;
     unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
@@ -95,6 +96,7 @@
     int avg_y_stride = running_avg->y_stride;
     const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
     int r, c;
+    int sum_diff = 0;
 
     for (r = 0; r < 16; ++r)
     {
@@ -133,13 +135,15 @@
         for (c = 0; c < 16; ++c)
         {
             const int diff = sig[c] - running_avg_y[c];
+            sum_diff += diff;
 
             if (diff * diff < NOISE_DIFF2_THRESHOLD)
             {
-                sig[c] = running_avg_y[c];
+                filtered[c] = running_avg_y[c];
             }
             else
             {
+                filtered[c] = sig[c];
                 running_avg_y[c] = sig[c];
             }
         }
@@ -146,9 +150,16 @@
 
         // Update pointers for next iteration.
         sig += sig_stride;
+        filtered += 16;
         mc_running_avg_y += mc_avg_y_stride;
         running_avg_y += avg_y_stride;
     }
+    if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+    {
+        return COPY_BLOCK;
+    }
+    vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
+    return FILTER_BLOCK;
 }
 
 
@@ -215,8 +226,10 @@
     MV_REFERENCE_FRAME frame = x->best_reference_frame;
     MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
 
+    enum vp8_denoiser_decision decision = FILTER_BLOCK;
+
     // Motion compensate the running average.
-    if(zero_frame)
+    if (zero_frame)
     {
         YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
         YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
@@ -238,7 +251,7 @@
         mv_col = x->best_sse_mv.as_mv.col;
         mv_row = x->best_sse_mv.as_mv.row;
 
-        if(frame == INTRA_FRAME ||
+        if (frame == INTRA_FRAME ||
             (mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD &&
              sse_diff < SSE_DIFF_THRESHOLD))
         {
@@ -296,6 +309,20 @@
     if (best_sse > SSE_THRESHOLD || motion_magnitude2
            > 8 * NOISE_MOTION_THRESHOLD)
     {
+        decision = COPY_BLOCK;
+    }
+
+    if (decision == FILTER_BLOCK)
+    {
+        // Filter.
+        decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
+                                       &denoiser->yv12_running_avg[LAST_FRAME],
+                                       x,
+                                       motion_magnitude2,
+                                       recon_yoffset, recon_uvoffset);
+    }
+    if (decision == COPY_BLOCK)
+    {
         // No filtering of this block; it differs too much from the predictor,
         // or the motion vector magnitude is considered too big.
         vp8_copy_mem16x16(
@@ -302,12 +329,5 @@
                 x->thismb, 16,
                 denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset,
                 denoiser->yv12_running_avg[LAST_FRAME].y_stride);
-        return;
     }
-
-    // Filter.
-    vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
-                        &denoiser->yv12_running_avg[LAST_FRAME], x,
-                        motion_magnitude2,
-                        recon_yoffset, recon_uvoffset);
 }
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -14,6 +14,13 @@
 #include "block.h"
 
 #define NOISE_DIFF2_THRESHOLD (75)
+#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
+
+enum vp8_denoiser_decision
+{
+  COPY_BLOCK,
+  FILTER_BLOCK,
+};
 
 typedef struct vp8_denoiser
 {
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -17,11 +17,25 @@
 
 #include <emmintrin.h>
 
-void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
-                              YV12_BUFFER_CONFIG *running_avg,
-                              MACROBLOCK *signal, unsigned int motion_magnitude,
-                              int y_offset, int uv_offset)
+union sum_union {
+    __m128i v;
+    short e[8];
+};
+
+inline int sum_vec_128i(__m128i vec)
 {
+    union sum_union s = { .v = vec };
+    return s.e[0] + s.e[1] + s.e[2] + s.e[3] +
+        s.e[4] + s.e[5] + s.e[6] + s.e[7];
+}
+
+int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
+                             YV12_BUFFER_CONFIG *running_avg,
+                             MACROBLOCK *signal, unsigned int motion_magnitude,
+                             int y_offset, int uv_offset)
+{
+    unsigned char filtered_buf[16*16];
+    unsigned char *filtered = filtered_buf;
     unsigned char *sig = signal->thismb;
     int sig_stride = 16;
     unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
@@ -30,6 +44,7 @@
     int avg_y_stride = running_avg->y_stride;
     const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
     int r, c;
+    __m128i sum_diff = { 0 };
 
     for (r = 0; r < 16; ++r)
     {
@@ -110,6 +125,8 @@
         // isn't classified as noise.
         diff0 = _mm_sub_epi16(v_sig0, res0);
         diff1 = _mm_sub_epi16(v_sig1, res2);
+        sum_diff = _mm_add_epi16(sum_diff, _mm_add_epi16(diff0, diff1));
+
         diff0sq = _mm_mullo_epi16(diff0, diff0);
         diff1sq = _mm_mullo_epi16(diff1, diff1);
         diff_sq = _mm_packus_epi16(diff0sq, diff1sq);
@@ -118,11 +135,18 @@
         p1 = _mm_andnot_si128(take_running, v_sig);
         p2 = _mm_or_si128(p0, p1);
         _mm_storeu_si128((__m128i *)(&running_avg_y[0]), p2);
-        _mm_storeu_si128((__m128i *)(&sig[0]), p2);
+        _mm_storeu_si128((__m128i *)(&filtered[0]), p2);
 
         // Update pointers for next iteration.
         sig += sig_stride;
+        filtered += 16;
         mc_running_avg_y += mc_avg_y_stride;
         running_avg_y += avg_y_stride;
     }
+    if (abs(sum_vec_128i(sum_diff)) > SUM_DIFF_THRESHOLD)
+    {
+        return COPY_BLOCK;
+    }
+    vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
+    return FILTER_BLOCK;
 }