shithub: libvpx

--- a/test/acm_random.h

+++ b/test/acm_random.h

@@ -35,6 +35,13 @@

     return (rand() >> 8) & 0xff;

+  uint8_t Rand8Extremes(void) {

+    // Returns a random value near 0 or near 255, to better exercise

+    // saturation behavior.

+    const uint8_t r = Rand8();

+    return r < 128 ? r << 4 : r >> 4;

+  }

   int PseudoUniform(int range) {

     return (rand() >> 8) % range;

--- a/test/convolve_test.cc

+++ b/test/convolve_test.cc

@@ -66,7 +66,7 @@

   // support.

   const int kInterp_Extend = 4;

   const unsigned int intermediate_height =

-    (kInterp_Extend - 1) +     output_height + kInterp_Extend;

+    (kInterp_Extend - 1) + output_height + kInterp_Extend;

   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,

    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height

@@ -75,7 +75,7 @@

    *                               = 23

    * and filter_max_width = 16

*/

-  uint8_t intermediate_buffer[23 * 16];

+  uint8_t intermediate_buffer[71 * 64];

   const int intermediate_next_stride = 1 - intermediate_height * output_width;

   // Horizontal pass (src -> transposed intermediate).

@@ -158,13 +158,13 @@

                                        unsigned int dst_stride,

                                        unsigned int output_width,

                                        unsigned int output_height) {

-  uint8_t tmp[16*16];

+  uint8_t tmp[64*64];

-  assert(output_width <= 16);

-  assert(output_height <= 16);

-  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16,

+  assert(output_width <= 64);

+  assert(output_height <= 64);

+  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,

                      output_width, output_height);

-  block2d_average_c(tmp, 16, dst_ptr, dst_stride,

+  block2d_average_c(tmp, 64, dst_ptr, dst_stride,

                     output_width, output_height);

@@ -188,10 +188,10 @@

   protected:

     static const int kDataAlignment = 16;

-    static const int kOuterBlockSize = 32;

+    static const int kOuterBlockSize = 128;

     static const int kInputStride = kOuterBlockSize;

     static const int kOutputStride = kOuterBlockSize;

-    static const int kMaxDimension = 16;

+    static const int kMaxDimension = 64;

     int Width() const { return GET_PARAM(0); }

     int Height() const { return GET_PARAM(1); }

@@ -221,7 +221,7 @@

       ::libvpx_test::ACMRandom prng;

       for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i)

-        input_[i] = prng.Rand8();

+        input_[i] = prng.Rand8Extremes();

     void CheckGuardBlocks() {

@@ -308,7 +308,30 @@

   vp9_sub_pel_filters_8s,

   vp9_sub_pel_filters_8lp

};

+const int kNumFilterBanks = sizeof(kTestFilterList) /

+    sizeof(kTestFilterList[0]);

+const int kNumFilters = 16;

+TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {

+  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {

+    const int16_t (*filters)[8] = kTestFilterList[filter_bank];

+    for (int i = 0; i < kNumFilters; i++) {

+      const int p0 = filters[i][0] + filters[i][1];

+      const int p1 = filters[i][2] + filters[i][3];

+      const int p2 = filters[i][4] + filters[i][5];

+      const int p3 = filters[i][6] + filters[i][7];

+      EXPECT_LE(p0, 128);

+      EXPECT_LE(p1, 128);

+      EXPECT_LE(p2, 128);

+      EXPECT_LE(p3, 128);

+      EXPECT_LE(p0 + p3, 128);

+      EXPECT_LE(p0 + p3 + p1, 128);

+      EXPECT_LE(p0 + p3 + p1 + p2, 128);

+      EXPECT_EQ(p0 + p1 + p2 + p3, 128);

+    }

+  }

+}

 const int16_t kInvalidFilter[8] = { 0 };

 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {

@@ -316,12 +339,9 @@

   uint8_t* const out = output();

   uint8_t ref[kOutputStride * kMaxDimension];

-  const int kNumFilterBanks = sizeof(kTestFilterList) /

-      sizeof(kTestFilterList[0]);

   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {

     const int16_t (*filters)[8] = kTestFilterList[filter_bank];

-    const int kNumFilters = 16;

     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {

       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {

@@ -368,7 +388,7 @@

   ::libvpx_test::ACMRandom prng;

   for (int y = 0; y < Height(); ++y) {

     for (int x = 0; x < Width(); ++x) {

-      const uint8_t r = prng.Rand8();

+      const uint8_t r = prng.Rand8Extremes();

       out[y * kOutputStride + x] = r;

       ref[y * kOutputStride + x] = r;

@@ -440,6 +460,7 @@

 TEST_P(ConvolveTest, ChangeFilterWorks) {

   uint8_t* const in = input();

   uint8_t* const out = output();

+  const int kPixelSelected = 4;

   REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,

                                  kChangeFilters[8], 17, kChangeFilters[4], 16,

@@ -446,10 +467,10 @@

                                  Width(), Height()));

   for (int x = 0; x < Width(); ++x) {

-    if (x < 8)

-      ASSERT_EQ(in[4], out[x]) << "x == " << x;

-    else

-      ASSERT_EQ(in[12], out[x]) << "x == " << x;

+    const int kQ4StepAdjust = x >> 4;

+    const int kFilterPeriodAdjust = (x >> 3) << 3;

+    const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;

+    ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;

   REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,

@@ -457,10 +478,10 @@

                                  Width(), Height()));

   for (int y = 0; y < Height(); ++y) {

-    if (y < 8)

-      ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;

-    else

-      ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y;

+    const int kQ4StepAdjust = y >> 4;

+    const int kFilterPeriodAdjust = (y >> 3) << 3;

+    const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;

+    ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;

   REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,

@@ -468,9 +489,13 @@

                                   Width(), Height()));

   for (int y = 0; y < Height(); ++y) {

+    const int kQ4StepAdjustY = y >> 4;

+    const int kFilterPeriodAdjustY = (y >> 3) << 3;

+    const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;

     for (int x = 0; x < Width(); ++x) {

-      const int ref_x = x < 8 ? 4 : 12;

-      const int ref_y = y < 8 ? 4 : 12;

+      const int kQ4StepAdjustX = x >> 4;

+      const int kFilterPeriodAdjustX = (x >> 3) << 3;

+      const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;

       ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])

           << "x == " << x << ", y == " << y;

@@ -489,9 +514,17 @@

 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(

     make_tuple(4, 4, &convolve8_c),

     make_tuple(8, 4, &convolve8_c),

+    make_tuple(4, 8, &convolve8_c),

     make_tuple(8, 8, &convolve8_c),

     make_tuple(16, 8, &convolve8_c),

-    make_tuple(16, 16, &convolve8_c)));

+    make_tuple(8, 16, &convolve8_c),

+    make_tuple(16, 16, &convolve8_c),

+    make_tuple(32, 16, &convolve8_c),

+    make_tuple(16, 32, &convolve8_c),

+    make_tuple(32, 32, &convolve8_c),

+    make_tuple(64, 32, &convolve8_c),

+    make_tuple(32, 64, &convolve8_c),

+    make_tuple(64, 64, &convolve8_c)));

 #if HAVE_SSSE3

@@ -503,7 +536,15 @@

 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(

     make_tuple(4, 4, &convolve8_ssse3),

     make_tuple(8, 4, &convolve8_ssse3),

+    make_tuple(4, 8, &convolve8_ssse3),

     make_tuple(8, 8, &convolve8_ssse3),

     make_tuple(16, 8, &convolve8_ssse3),

-    make_tuple(16, 16, &convolve8_ssse3)));

+    make_tuple(8, 16, &convolve8_ssse3),

+    make_tuple(16, 16, &convolve8_ssse3),

+    make_tuple(32, 16, &convolve8_ssse3),

+    make_tuple(16, 32, &convolve8_ssse3),

+    make_tuple(32, 32, &convolve8_ssse3),

+    make_tuple(64, 32, &convolve8_ssse3),

+    make_tuple(32, 64, &convolve8_ssse3),

+    make_tuple(64, 64, &convolve8_ssse3)));

 #endif

--- a/vp9/common/vp9_convolve.c

+++ b/vp9/common/vp9_convolve.c

@@ -331,14 +331,14 @@

                        const int16_t *filter_y, int y_step_q4,

                        int w, int h, int taps) {

   /* Fixed size intermediate buffer places limits on parameters.

-   * Maximum intermediate_height is 39, for y_step_q4 == 32,

-   * h == 16, taps == 8.

+   * Maximum intermediate_height is 135, for y_step_q4 == 32,

+   * h == 64, taps == 8.

*/

-  uint8_t temp[16 * 39];

+  uint8_t temp[64 * 135];

   int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;

-  assert(w <= 16);

-  assert(h <= 16);

+  assert(w <= 64);

+  assert(h <= 64);

   assert(taps <= 8);

   assert(y_step_q4 <= 32);

@@ -346,10 +346,10 @@

     intermediate_height = h;

   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

-                   temp, 16,

+                   temp, 64,

                    filter_x, x_step_q4, filter_y, y_step_q4,

                    w, intermediate_height, taps);

-  convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,

+  convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,

                   filter_x, x_step_q4, filter_y, y_step_q4,

                   w, h, taps);

@@ -360,14 +360,14 @@

                            const int16_t *filter_y, int y_step_q4,

                            int w, int h, int taps) {

   /* Fixed size intermediate buffer places limits on parameters.

-   * Maximum intermediate_height is 39, for y_step_q4 == 32,

-   * h == 16, taps == 8.

+   * Maximum intermediate_height is 135, for y_step_q4 == 32,

+   * h == 64, taps == 8.

*/

-  uint8_t temp[16 * 39];

+  uint8_t temp[64 * 135];

   int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;

-  assert(w <= 16);

-  assert(h <= 16);

+  assert(w <= 64);

+  assert(h <= 64);

   assert(taps <= 8);

   assert(y_step_q4 <= 32);

@@ -375,10 +375,10 @@

     intermediate_height = h;

   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

-                   temp, 16,

+                   temp, 64,

                    filter_x, x_step_q4, filter_y, y_step_q4,

                    w, intermediate_height, taps);

-  convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,

+  convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,

                       filter_x, x_step_q4, filter_y, y_step_q4,

                       w, h, taps);

@@ -563,16 +563,16 @@

                          const int16_t *filter_y, int y_step_q4,

                          int w, int h) {

   /* Fixed size intermediate buffer places limits on parameters. */

-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);

-  assert(w <= 16);

-  assert(h <= 16);

+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);

+  assert(w <= 64);

+  assert(h <= 64);

   vp9_convolve8(src, src_stride,

-                temp, 16,

+                temp, 64,

                 filter_x, x_step_q4,

                 filter_y, y_step_q4,

                 w, h);

-  vp9_convolve_avg(temp, 16,

+  vp9_convolve_avg(temp, 64,

                    dst, dst_stride,

                    NULL, 0, /* These unused parameter should be removed! */

                    NULL, 0, /* These unused parameter should be removed! */

--- a/vp9/common/x86/vp9_asm_stubs.c

+++ b/vp9/common/x86/vp9_asm_stubs.c

@@ -278,11 +278,9 @@

                          const int16_t *filter_x, int x_step_q4,

                          const int16_t *filter_y, int y_step_q4,

                          int w, int h) {

-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);

+  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);

-  // check w/h due to fixed size fdata2 array

-  assert(w <= 16);

-  assert(h <= 16);

+  assert(h <= 64);

   if (x_step_q4 == 16 && y_step_q4 == 16 &&

       filter_x[3] != 128 && filter_y[3] != 128) {

@@ -324,11 +322,9 @@

                          const int16_t *filter_x, int x_step_q4,

                          const int16_t *filter_y, int y_step_q4,

                          int w, int h) {

-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);

+  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);

-  // check w/h due to fixed size fdata2 array

-  assert(w <= 16);

-  assert(h <= 16);

+  assert(h <= 64);

   if (x_step_q4 == 16 && y_step_q4 == 16 &&

       filter_x[3] != 128 && filter_y[3] != 128) {

--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

@@ -81,10 +81,10 @@

     pmaddubsw   xmm4, k4k5

     pmaddubsw   xmm6, k6k7

+    paddsw      xmm0, xmm6

     paddsw      xmm0, xmm2

-    paddsw      xmm0, krd

-    paddsw      xmm4, xmm6

     paddsw      xmm0, xmm4

+    paddsw      xmm0, krd

     psraw       xmm0, 7

     packuswb    xmm0, xmm0

@@ -165,10 +165,10 @@

     pmaddubsw   xmm4, k4k5

     pmaddubsw   xmm6, k6k7

+    paddsw      xmm0, xmm6

     paddsw      xmm0, xmm2

-    paddsw      xmm0, krd

-    paddsw      xmm4, xmm6

     paddsw      xmm0, xmm4

+    paddsw      xmm0, krd

     psraw       xmm0, 7

     packuswb    xmm0, xmm0

@@ -250,10 +250,10 @@

     pmaddubsw   xmm4, k4k5

     pmaddubsw   xmm6, k6k7

+    paddsw      xmm0, xmm6

     paddsw      xmm0, xmm2

-    paddsw      xmm0, krd

-    paddsw      xmm4, xmm6

     paddsw      xmm0, xmm4

+    paddsw      xmm0, krd

     psraw       xmm0, 7

     packuswb    xmm0, xmm0

@@ -285,10 +285,10 @@

     pmaddubsw   xmm4, k4k5

     pmaddubsw   xmm6, k6k7

+    paddsw      xmm0, xmm6

     paddsw      xmm0, xmm2

-    paddsw      xmm4, xmm6

-    paddsw      xmm0, krd

     paddsw      xmm0, xmm4

+    paddsw      xmm0, krd

     psraw       xmm0, 7

     packuswb    xmm0, xmm0

--

⑨