shithub: libvpx

Download patch

ref: 9b63cb057a73fa1f053dfd665b23fb944a083443
parent: d1eca240fb04fae3039ac28510ba0992eb03248d
parent: 5993b808f03874570830b30f3c91501a04e0964c
author: Johann Koenig <johannkoenig@google.com>
date: Fri Dec 16 20:12:34 EST 2016

Merge "post proc test: add padding for sse2 tests"

--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -120,7 +120,7 @@
   vpx_free(flimits);
 };
 
-TEST_P(VpxPostProcDownAndAcrossMbRowTest, DISABLED_CheckCvsAssembly) {
+TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
   // Size of the underlying data block that will be filtered.
   // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V
   // blocks are always a multiple of 8 wide and exactly 8 high.
@@ -128,13 +128,15 @@
   const int block_height = 16;
 
   // 5-tap filter needs 2 padding rows above and below the block in the input.
+  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
   const int input_width = block_width;
-  const int input_height = block_height + 4;
+  const int input_height = block_height + 4 + 8;
   const int input_stride = input_width;
   const int input_size = input_stride * input_height;
 
   // Filter extends output block by 8 samples at left and right edges.
-  const int output_width = block_width + 16;
+  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
+  const int output_width = block_width + 24;
   const int output_height = block_height;
   const int output_stride = output_width;
   const int output_size = output_stride * output_height;
@@ -158,9 +160,11 @@
   uint8_t *const dst_image_ref_ptr = dst_image + 16;
 
   // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock
-  // can have a different filter.
+  // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so
+  // it must be padded out.
+  const int flimits_width = block_width % 16 ? block_width + 8 : block_width;
   uint8_t *const flimits =
-      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
+      reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
 
   ACMRandom rnd;
   rnd.Reset(ACMRandom::DeterministicSeed());
@@ -177,7 +181,7 @@
   }
 
   for (int blocks = 0; blocks < block_width; blocks += 8) {
-    (void)memset(flimits, 0, sizeof(*flimits) * block_width);
+    (void)memset(flimits, 0, sizeof(*flimits) * flimits_width);
 
     for (int f = 0; f < 255; f++) {
       (void)memset(flimits + blocks, f, sizeof(*flimits) * 8);