shithub: libvpx

Download patch

ref: 302e425453e35123cefab8200b976fc6dbf8fd39
parent: 60ada7edb4eb7527859006390b804034a80f1afc
parent: 0dc69c70f70bb320101064fa0dc0643e3d266f57
author: James Bankoski <jimbankoski@google.com>
date: Fri Jul 15 13:33:53 EDT 2016

Merge "postproc : fix function parameters for noise functions."

--- a/test/add_noise_test.cc
+++ b/test/add_noise_test.cc
@@ -18,11 +18,12 @@
 
 namespace {
 
+static const int kNoiseSize = 3072;
+
 // TODO(jimbankoski): make width and height integers not unsigned.
-typedef void (*AddNoiseFunc)(unsigned char *start, char *noise,
-                             char blackclamp[16], char whiteclamp[16],
-                             char bothclamp[16], unsigned int width,
-                             unsigned int height, int pitch);
+typedef void (*AddNoiseFunc)(uint8_t *start, const int8_t *noise,
+                             int blackclamp, int whiteclamp,
+                             int width, int height, int pitch);
 
 class AddNoiseTest
     : public ::testing::TestWithParam<AddNoiseFunc> {
@@ -42,27 +43,19 @@
 }
 
 TEST_P(AddNoiseTest, CheckNoiseAdded) {
-  DECLARE_ALIGNED(16, char, blackclamp[16]);
-  DECLARE_ALIGNED(16, char, whiteclamp[16]);
-  DECLARE_ALIGNED(16, char, bothclamp[16]);
   const int width  = 64;
   const int height = 64;
   const int image_size = width * height;
-  char noise[3072];
-  const int clamp = vpx_setup_noise(4.4, sizeof(noise), noise);
+  int8_t noise[kNoiseSize];
+  const int clamp = vpx_setup_noise(4.4, noise, kNoiseSize);
+  uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size,
+                                                            sizeof(*s)));
+  ASSERT_TRUE(s != NULL);
+  memset(s, 99, image_size * sizeof(*s));
 
-  for (int i = 0; i < 16; i++) {
-    blackclamp[i] = clamp;
-    whiteclamp[i] = clamp;
-    bothclamp[i] = 2 * clamp;
-  }
+  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, clamp, clamp,
+                                      width, height, width));
 
-  uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
-  memset(s, 99, image_size);
-
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
-
   // Check to make sure we don't end up having either the same or no added
   // noise either vertically or horizontally.
   for (int i = 0; i < image_size - 6 * width - 6; ++i) {
@@ -79,8 +72,8 @@
   // Initialize pixels in the image to 255 and check for roll over.
   memset(s, 255, image_size);
 
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
+  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, clamp, clamp,
+                           width, height, width));
 
   // Check to make sure don't roll over.
   for (int i = 0; i < image_size; ++i) {
@@ -90,8 +83,8 @@
   // Initialize pixels in the image to 0 and check for roll under.
   memset(s, 0, image_size);
 
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
+  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, clamp, clamp,
+                           width, height, width));
 
   // Check to make sure don't roll under.
   for (int i = 0; i < image_size; ++i) {
@@ -102,35 +95,26 @@
 }
 
 TEST_P(AddNoiseTest, CheckCvsAssembly) {
-  DECLARE_ALIGNED(16, char, blackclamp[16]);
-  DECLARE_ALIGNED(16, char, whiteclamp[16]);
-  DECLARE_ALIGNED(16, char, bothclamp[16]);
   const int width  = 64;
   const int height = 64;
   const int image_size = width * height;
-  char noise[3072];
+  int8_t noise[kNoiseSize];
+  const int clamp = vpx_setup_noise(4.4, noise, kNoiseSize);
 
-  const int clamp = vpx_setup_noise(4.4, sizeof(noise), noise);
-
-  for (int i = 0; i < 16; i++) {
-    blackclamp[i] = clamp;
-    whiteclamp[i] = clamp;
-    bothclamp[i] = 2 * clamp;
-  }
-
   uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
   uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
+  ASSERT_TRUE(s != NULL);
+  ASSERT_TRUE(d != NULL);
 
   memset(s, 99, image_size);
   memset(d, 99, image_size);
 
   srand(0);
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(s, noise, clamp, clamp, width, height, width));
   srand(0);
-  ASM_REGISTER_STATE_CHECK(vpx_plane_add_noise_c(d, noise, blackclamp,
-                                                 whiteclamp, bothclamp,
-                                                 width, height, width));
+  ASM_REGISTER_STATE_CHECK(
+      vpx_plane_add_noise_c(d, noise, clamp, clamp, width, height, width));
 
   for (int i = 0; i < image_size; ++i) {
     EXPECT_EQ(static_cast<int>(s[i]), static_cast<int>(d[i])) << "i = " << i;
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -491,19 +491,12 @@
             || oci->postproc_state.last_noise != noise_level)
         {
             double sigma;
-            int clamp, i;
             struct postproc_state *ppstate = &oci->postproc_state;
             vp8_clear_system_state();
             sigma = noise_level + .5 + .6 * q / 63.0;
-            clamp = vpx_setup_noise(sigma, sizeof(ppstate->noise),
-                                    ppstate->noise);
-            for (i = 0; i < 16; i++)
-            {
-                ppstate->blackclamp[i] = clamp;
-                ppstate->whiteclamp[i] = clamp;
-                ppstate->bothclamp[i] = 2 * clamp;
-            }
-
+            oci->postproc_state.clamp = vpx_setup_noise(sigma,
+                                                        ppstate->noise,
+                                                        sizeof(ppstate->noise));
             ppstate->last_q = q;
             ppstate->last_noise = noise_level;
         }
@@ -511,9 +504,8 @@
         vpx_plane_add_noise
         (oci->post_proc_buffer.y_buffer,
          oci->postproc_state.noise,
-         oci->postproc_state.blackclamp,
-         oci->postproc_state.whiteclamp,
-         oci->postproc_state.bothclamp,
+         oci->postproc_state.clamp,
+         oci->postproc_state.clamp,
          oci->post_proc_buffer.y_width, oci->post_proc_buffer.y_height,
          oci->post_proc_buffer.y_stride);
     }
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -17,12 +17,10 @@
 {
     int           last_q;
     int           last_noise;
-    char          noise[3072];
+    int8_t        noise[3072];
     int           last_base_qindex;
     int           last_frame_valid;
-    DECLARE_ALIGNED(16, char, blackclamp[16]);
-    DECLARE_ALIGNED(16, char, whiteclamp[16]);
-    DECLARE_ALIGNED(16, char, bothclamp[16]);
+    int           clamp;
 };
 #include "onyxc_int.h"
 #include "ppflags.h"
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -412,29 +412,21 @@
 
   ppstate->last_base_qindex = cm->base_qindex;
   ppstate->last_frame_valid = 1;
-
   if (flags & VP9D_ADDNOISE) {
     const int noise_level = ppflags->noise_level;
     if (ppstate->last_q != q ||
         ppstate->last_noise != noise_level) {
       double sigma;
-      int clamp, i;
       vpx_clear_system_state();
       sigma = noise_level + .5 + .6 * q / 63.0;
-      clamp = vpx_setup_noise(sigma, sizeof(ppstate->noise),
-                              ppstate->noise);
-
-      for (i = 0; i < 16; i++) {
-        ppstate->blackclamp[i] = clamp;
-        ppstate->whiteclamp[i] = clamp;
-        ppstate->bothclamp[i] = 2 * clamp;
-      }
+      ppstate->clamp = vpx_setup_noise(sigma, ppstate->noise,
+                                       sizeof(ppstate->noise));
       ppstate->last_q = q;
       ppstate->last_noise = noise_level;
     }
-    vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
-                        ppstate->whiteclamp, ppstate->bothclamp,
-                        ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
+    vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->clamp,
+                        ppstate->clamp, ppbuf->y_width, ppbuf->y_height,
+                        ppbuf->y_stride);
   }
 
   *dest = *ppbuf;
--- a/vp9/common/vp9_postproc.h
+++ b/vp9/common/vp9_postproc.h
@@ -25,14 +25,12 @@
 struct postproc_state {
   int last_q;
   int last_noise;
-  char noise[3072];
+  int8_t noise[3072];
   int last_base_qindex;
   int last_frame_valid;
   MODE_INFO *prev_mip;
   MODE_INFO *prev_mi;
-  DECLARE_ALIGNED(16, char, blackclamp[16]);
-  DECLARE_ALIGNED(16, char, whiteclamp[16]);
-  DECLARE_ALIGNED(16, char, bothclamp[16]);
+  int clamp;
   uint8_t *limits;
 };
 
--- a/vpx_dsp/add_noise.c
+++ b/vpx_dsp/add_noise.c
@@ -17,23 +17,20 @@
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
 
-void vpx_plane_add_noise_c(uint8_t *start, char *noise,
-                           char blackclamp[16],
-                           char whiteclamp[16],
-                           char bothclamp[16],
-                           unsigned int width, unsigned int height, int pitch) {
+void vpx_plane_add_noise_c(uint8_t *start, const int8_t *noise, int blackclamp,
+                           int whiteclamp, int width, int height, int pitch) {
   unsigned int i, j;
-
+  int bothclamp = blackclamp + whiteclamp;
   for (i = 0; i < height; ++i) {
     uint8_t *pos = start + i * pitch;
-    char  *ref = (char *)(noise + (rand() & 0xff));  // NOLINT
+    const int8_t *ref = (const int8_t *)(noise + (rand() & 0xff));  // NOLINT
 
     for (j = 0; j < width; ++j) {
       int v = pos[j];
 
-      v = clamp(v - blackclamp[0], 0, 255);
-      v = clamp(v + bothclamp[0], 0, 255);
-      v = clamp(v - whiteclamp[0], 0, 255);
+      v = clamp(v - blackclamp, 0, 255);
+      v = clamp(v + bothclamp, 0, 255);
+      v = clamp(v - whiteclamp, 0, 255);
 
       pos[j] = v + ref[j];
     }
@@ -45,7 +42,7 @@
          (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
 }
 
-int vpx_setup_noise(double sigma, int size, char *noise) {
+int vpx_setup_noise(double sigma, int8_t *noise, int size) {
   char char_dist[256];
   int next = 0, i, j;
 
--- a/vpx_dsp/mips/add_noise_msa.c
+++ b/vpx_dsp/mips/add_noise_msa.c
@@ -11,17 +11,16 @@
 #include <stdlib.h>
 #include "./macros_msa.h"
 
-void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
-                             char blackclamp[16], char whiteclamp[16],
-                             char bothclamp[16], uint32_t width,
-                             uint32_t height, int32_t pitch) {
+void vpx_plane_add_noise_msa(uint8_t *start_ptr, const int8_t *noise,
+                             int blackclamp, int whiteclamp,
+                             int width, int height, int32_t pitch) {
   uint32_t i, j;
 
   for (i = 0; i < height / 2; ++i) {
     uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
-    int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff));
+    const int8_t *ref0_ptr = noise + (rand() & 0xff);
     uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
-    int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff));
+    const int8_t *ref1_ptr = noise + (rand() & 0xff);
     for (j = width / 16; j--;) {
       v16i8 temp00_s, temp01_s;
       v16u8 temp00, temp01, black_clamp, white_clamp;
@@ -32,8 +31,8 @@
       ref0 = LD_UB(ref0_ptr);
       pos1 = LD_UB(pos1_ptr);
       ref1 = LD_UB(ref1_ptr);
-      black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
-      white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
+      black_clamp = (v16u8)__msa_fill_b(blackclamp);
+      white_clamp = (v16u8)__msa_fill_b(whiteclamp);
       temp00 = (pos0 < black_clamp);
       pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
       temp01 = (pos1 < black_clamp);
--- a/vpx_dsp/postproc.h
+++ b/vpx_dsp/postproc.h
@@ -16,7 +16,7 @@
 #endif
 
 // Fills a noise buffer with gaussian noise strength determined by sigma.
-int vpx_setup_noise(double sigma, int size, char *noise);
+int vpx_setup_noise(double sigma, int8_t *noise, int size);
 
 #ifdef __cplusplus
 }
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1892,7 +1892,7 @@
 # Post Processing
 #
 if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
-    add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
+    add_proto qw/void vpx_plane_add_noise/, "uint8_t *start, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int pitch";
     specialize qw/vpx_plane_add_noise sse2 msa/;
 
     add_proto qw/void vpx_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
--- a/vpx_dsp/x86/add_noise_sse2.asm
+++ b/vpx_dsp/x86/add_noise_sse2.asm
@@ -11,30 +11,33 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-;void vpx_plane_add_noise_sse2(unsigned char *start, unsigned char *noise,
-;                              unsigned char blackclamp[16],
-;                              unsigned char whiteclamp[16],
-;                              unsigned char bothclamp[16],
-;                              unsigned int width, unsigned int height,
-;                              int pitch)
+;void vpx_plane_add_noise_sse2(uint8_t *start, const int8_t *noise,
+;                              int blackclamp, int whiteclamp,
+;                              int width, int height, int pitch)
 global sym(vpx_plane_add_noise_sse2) PRIVATE
 sym(vpx_plane_add_noise_sse2):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 8
+    SHADOW_ARGS_TO_STACK 7
     GET_GOT     rbx
     push        rsi
     push        rdi
-    ; end prolog
 
-    ; get the clamps in registers
-    mov     rdx, arg(2) ; blackclamp
-    movdqu  xmm3, [rdx]
-    mov     rdx, arg(3) ; whiteclamp
-    movdqu  xmm4, [rdx]
-    mov     rdx, arg(4) ; bothclamp
-    movdqu  xmm5, [rdx]
+    mov         rdx, 0x01010101
+    mov         rax, arg(2)
+    mul         rdx
+    movd        xmm3, rax
+    pshufd      xmm3, xmm3, 0  ; xmm3 is 16 copies of char in blackclamp
 
+    mov         rdx, 0x01010101
+    mov         rax, arg(3)
+    mul         rdx
+    movd        xmm4, rax
+    pshufd      xmm4, xmm4, 0  ; xmm4 is 16 copies of char in whiteclamp
+
+    movdqu      xmm5, xmm3     ; both clamp = black clamp + white clamp
+    paddusb     xmm5, xmm4
+
 .addnoise_loop:
     call sym(LIBVPX_RAND) WRT_PLT
     mov     rcx, arg(1) ;noise
@@ -42,9 +45,9 @@
     add     rcx, rax
 
     mov     rdi, rcx
-    movsxd  rcx, dword arg(5) ;[Width]
+    movsxd  rcx, dword arg(4) ;[Width]
     mov     rsi, arg(0) ;Pos
-    xor         rax,rax
+    xor     rax, rax
 
 .addnoise_nextset:
       movdqu      xmm1,[rsi+rax]         ; get the source
@@ -62,9 +65,9 @@
       cmp         rax, rcx
       jl          .addnoise_nextset
 
-    movsxd  rax, dword arg(7) ; Pitch
+    movsxd  rax, dword arg(6) ; Pitch
     add     arg(0), rax ; Start += Pitch
-    sub     dword arg(6), 1   ; Height -= 1
+    sub     dword arg(5), 1   ; Height -= 1
     jg      .addnoise_loop
 
     ; begin epilog