shithub: libvpx

--- a/test/convolve_test.cc

+++ b/test/convolve_test.cc

@@ -456,45 +456,86 @@

     { 128}

};

+/* This test exercises the horizontal and vertical filter functions. */

 TEST_P(ConvolveTest, ChangeFilterWorks) {

   uint8_t* const in = input();

   uint8_t* const out = output();

+  /* Assume that the first input sample is at the 8/16th position. */

+  const int kInitialSubPelOffset = 8;

+  /* Filters are 8-tap, so the first filter tap will be applied to the pixel

+   * at position -3 with respect to the current filtering position. Since

+   * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,

+   * which is non-zero only in the last tap. So, applying the filter at the

+   * current input position will result in an output equal to the pixel at

+   * offset +4 (-3 + 7) with respect to the current filtering position.

+   */

   const int kPixelSelected = 4;

+  /* Assume that each output pixel requires us to step on by 17/16th pixels in

+   * the input.

+   */

+  const int kInputPixelStep = 17;

+  /* The filters are setup in such a way that the expected output produces

+   * sets of 8 identical output samples. As the filter position moves to the

+   * next 1/16th pixel position the only active (=128) filter tap moves one

+   * position to the left, resulting in the same input pixel being replicated

+   * in to the output for 8 consecutive samples. After each set of 8 positions

+   * the filters select a different input pixel. kFilterPeriodAdjust below

+   * computes which input pixel is written to the output for a specified

+   * x or y position.

+   */

+  /* Test the horizontal filter. */

   REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,

-                                 kChangeFilters[8], 17, kChangeFilters[4], 16,

-                                 Width(), Height()));

+                                 kChangeFilters[kInitialSubPelOffset],

+                                 kInputPixelStep, NULL, 0, Width(), Height()));

   for (int x = 0; x < Width(); ++x) {

-    const int kQ4StepAdjust = x >> 4;

     const int kFilterPeriodAdjust = (x >> 3) << 3;

-    const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;

-    ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;

+    const int ref_x =

+        kPixelSelected + ((kInitialSubPelOffset

+            + kFilterPeriodAdjust * kInputPixelStep)

+                          >> SUBPEL_BITS);

+    ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width();

+  /* Test the vertical filter. */

   REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,

-                                 kChangeFilters[4], 16, kChangeFilters[8], 17,

-                                 Width(), Height()));

+                                 NULL, 0, kChangeFilters[kInitialSubPelOffset],

+                                 kInputPixelStep, Width(), Height()));

   for (int y = 0; y < Height(); ++y) {

-    const int kQ4StepAdjust = y >> 4;

     const int kFilterPeriodAdjust = (y >> 3) << 3;

-    const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;

+    const int ref_y =

+        kPixelSelected + ((kInitialSubPelOffset

+            + kFilterPeriodAdjust * kInputPixelStep)

+                          >> SUBPEL_BITS);

     ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;

+  /* Test the horizontal and vertical filters in combination. */

   REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,

-                                  kChangeFilters[8], 17, kChangeFilters[8], 17,

+                                  kChangeFilters[kInitialSubPelOffset],

+                                  kInputPixelStep,

+                                  kChangeFilters[kInitialSubPelOffset],

+                                  kInputPixelStep,

                                   Width(), Height()));

   for (int y = 0; y < Height(); ++y) {

-    const int kQ4StepAdjustY = y >> 4;

     const int kFilterPeriodAdjustY = (y >> 3) << 3;

-    const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;

+    const int ref_y =

+        kPixelSelected + ((kInitialSubPelOffset

+            + kFilterPeriodAdjustY * kInputPixelStep)

+                          >> SUBPEL_BITS);

     for (int x = 0; x < Width(); ++x) {

-      const int kQ4StepAdjustX = x >> 4;

       const int kFilterPeriodAdjustX = (x >> 3) << 3;

-      const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;

+      const int ref_x =

+          kPixelSelected + ((kInitialSubPelOffset

+              + kFilterPeriodAdjustX * kInputPixelStep)

+                            >> SUBPEL_BITS);

       ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])

           << "x == " << x << ", y == " << y;

@@ -501,7 +542,6 @@

 using std::tr1::make_tuple;

--- a/vp9/common/vp9_convolve.c

+++ b/vp9/common/vp9_convolve.c

@@ -14,27 +14,10 @@

 #include "./vpx_config.h"

 #include "./vp9_rtcd.h"

 #include "vp9/common/vp9_common.h"

+#include "vp9/common/vp9_filter.h"

 #include "vpx/vpx_integer.h"

 #include "vpx_ports/mem.h"

-/* Assume a bank of 16 filters to choose from. There are two implementations

- * for filter wrapping behavior, since we want to be able to pick which filter

- * to start with. We could either:

- *

- * 1) make filter_ a pointer to the base of the filter array, and then add an

- *    additional offset parameter, to choose the starting filter.

- * 2) use a pointer to 2 periods worth of filters, so that even if the original

- *    phase offset is at 15/16, we'll have valid data to read. The filter

- *    tables become [32][8], and the second half is duplicated.

- * 3) fix the alignment of the filter tables, so that we know the 0/16 is

- *    always 256 byte aligned.

- *

- * Implementations 2 and 3 are likely preferable, as they avoid an extra 2

- * parameters, and switching between them is trivial, with the

- * ALIGN_FILTERS_256 macro, below.

- */

- #define ALIGN_FILTERS_256 1

 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

                              uint8_t *dst, ptrdiff_t dst_stride,

                              const int16_t *filter_x0, int x_step_q4,

@@ -41,36 +24,35 @@

                              const int16_t *filter_y, int y_step_q4,

                              int w, int h, int taps) {

   int x, y, k;

-  const int16_t *filter_x_base = filter_x0;

-#if ALIGN_FILTERS_256

-  filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

-#endif

+  /* NOTE: This assumes that the filter table is 256-byte aligned. */

+  /* TODO(agrange) Modify to make independent of table alignment. */

+  const int16_t *const filter_x_base =

+      (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

   /* Adjust base pointer address for this source line */

   src -= taps / 2 - 1;

   for (y = 0; y < h; ++y) {

-    /* Pointer to filter to use */

-    const int16_t *filter_x = filter_x0;

     /* Initial phase offset */

-    int x0_q4 = (filter_x - filter_x_base) / taps;

-    int x_q4 = x0_q4;

+    int x_q4 = (filter_x0 - filter_x_base) / taps;

     for (x = 0; x < w; ++x) {

       /* Per-pixel src offset */

-      int src_x = (x_q4 - x0_q4) >> 4;

+      const int src_x = x_q4 >> SUBPEL_BITS;

       int sum = 0;

+      /* Pointer to filter to use */

+      const int16_t *const filter_x = filter_x_base +

+          (x_q4 & SUBPEL_MASK) * taps;

       for (k = 0; k < taps; ++k)

         sum += src[src_x + k] * filter_x[k];

       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS));

-      /* Adjust source and filter to use for the next pixel */

+      /* Move to the next source pixel */

       x_q4 += x_step_q4;

-      filter_x = filter_x_base + (x_q4 & 0xf) * taps;

     src += src_stride;

     dst += dst_stride;

@@ -83,28 +65,28 @@

                                  const int16_t *filter_y, int y_step_q4,

                                  int w, int h, int taps) {

   int x, y, k;

-  const int16_t *filter_x_base = filter_x0;

-#if ALIGN_FILTERS_256

-  filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

-#endif

+  /* NOTE: This assumes that the filter table is 256-byte aligned. */

+  /* TODO(agrange) Modify to make independent of table alignment. */

+  const int16_t *const filter_x_base =

+      (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

   /* Adjust base pointer address for this source line */

   src -= taps / 2 - 1;

   for (y = 0; y < h; ++y) {

-    /* Pointer to filter to use */

-    const int16_t *filter_x = filter_x0;

     /* Initial phase offset */

-    int x0_q4 = (filter_x - filter_x_base) / taps;

-    int x_q4 = x0_q4;

+    int x_q4 = (filter_x0 - filter_x_base) / taps;

     for (x = 0; x < w; ++x) {

       /* Per-pixel src offset */

-      int src_x = (x_q4 - x0_q4) >> 4;

+      const int src_x = x_q4 >> SUBPEL_BITS;

       int sum = 0;

+      /* Pointer to filter to use */

+      const int16_t *const filter_x = filter_x_base +

+          (x_q4 & SUBPEL_MASK) * taps;

       for (k = 0; k < taps; ++k)

         sum += src[src_x + k] * filter_x[k];

@@ -111,9 +93,8 @@

       dst[x] = ROUND_POWER_OF_TWO(dst[x] +

                    clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1);

-      /* Adjust source and filter to use for the next pixel */

+      /* Move to the next source pixel */

       x_q4 += x_step_q4;

-      filter_x = filter_x_base + (x_q4 & 0xf) * taps;

     src += src_stride;

     dst += dst_stride;

@@ -127,27 +108,27 @@

                             int w, int h, int taps) {

   int x, y, k;

-  const int16_t *filter_y_base = filter_y0;

+  /* NOTE: This assumes that the filter table is 256-byte aligned. */

+  /* TODO(agrange) Modify to make independent of table alignment. */

+  const int16_t *const filter_y_base =

+      (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

-#if ALIGN_FILTERS_256

-  filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

-#endif

   /* Adjust base pointer address for this source column */

   src -= src_stride * (taps / 2 - 1);

-  for (x = 0; x < w; ++x) {

-    /* Pointer to filter to use */

-    const int16_t *filter_y = filter_y0;

+  for (x = 0; x < w; ++x) {

     /* Initial phase offset */

-    int y0_q4 = (filter_y - filter_y_base) / taps;

-    int y_q4 = y0_q4;

+    int y_q4 = (filter_y0 - filter_y_base) / taps;

     for (y = 0; y < h; ++y) {

       /* Per-pixel src offset */

-      int src_y = (y_q4 - y0_q4) >> 4;

+      const int src_y = y_q4 >> SUBPEL_BITS;

       int sum = 0;

+      /* Pointer to filter to use */

+      const int16_t *const filter_y = filter_y_base +

+          (y_q4 & SUBPEL_MASK) * taps;

       for (k = 0; k < taps; ++k)

         sum += src[(src_y + k) * src_stride] * filter_y[k];

@@ -154,9 +135,8 @@

       dst[y * dst_stride] =

           clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS));

-      /* Adjust source and filter to use for the next pixel */

+      /* Move to the next source pixel */

       y_q4 += y_step_q4;

-      filter_y = filter_y_base + (y_q4 & 0xf) * taps;

     ++src;

     ++dst;

@@ -170,27 +150,27 @@

                                 int w, int h, int taps) {

   int x, y, k;

-  const int16_t *filter_y_base = filter_y0;

+  /* NOTE: This assumes that the filter table is 256-byte aligned. */

+  /* TODO(agrange) Modify to make independent of table alignment. */

+  const int16_t *const filter_y_base =

+      (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

-#if ALIGN_FILTERS_256

-  filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

-#endif

   /* Adjust base pointer address for this source column */

   src -= src_stride * (taps / 2 - 1);

-  for (x = 0; x < w; ++x) {

-    /* Pointer to filter to use */

-    const int16_t *filter_y = filter_y0;

+  for (x = 0; x < w; ++x) {

     /* Initial phase offset */

-    int y0_q4 = (filter_y - filter_y_base) / taps;

-    int y_q4 = y0_q4;

+    int y_q4 = (filter_y0 - filter_y_base) / taps;

     for (y = 0; y < h; ++y) {

       /* Per-pixel src offset */

-      int src_y = (y_q4 - y0_q4) >> 4;

+      const int src_y = y_q4 >> SUBPEL_BITS;

       int sum = 0;

+      /* Pointer to filter to use */

+      const int16_t *const filter_y = filter_y_base +

+          (y_q4 & SUBPEL_MASK) * taps;

       for (k = 0; k < taps; ++k)

         sum += src[(src_y + k) * src_stride] * filter_y[k];

@@ -197,9 +177,8 @@

       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +

            clip_pixel(ROUND_POWER_OF_TWO(sum, VP9_FILTER_BITS)), 1);

-      /* Adjust source and filter to use for the next pixel */

+      /* Move to the next source pixel */

       y_q4 += y_step_q4;

-      filter_y = filter_y_base + (y_q4 & 0xf) * taps;

     ++src;

     ++dst;

@@ -227,13 +206,11 @@

   if (intermediate_height < h)

     intermediate_height = h;

-  convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

-                   temp, 64,

-                   filter_x, x_step_q4, filter_y, y_step_q4,

-                   w, intermediate_height, taps);

-  convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,

-                  filter_x, x_step_q4, filter_y, y_step_q4,

-                  w, h, taps);

+  convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,

+                   filter_x, x_step_q4, filter_y, y_step_q4, w,

+                   intermediate_height, taps);

+  convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,

+                  x_step_q4, filter_y, y_step_q4, w, h, taps);

 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

--

⑨