shithub: libvpx

Download patch

ref: bcd17e32c9498cb7a2375999226a60c500cc29da
parent: c875803def7344aa69334f4dada2a3812a10a964
author: Yunqing Wang <yunqingwang@google.com>
date: Tue Oct 16 05:24:18 EDT 2018

Fix the filter tap calculation in mips optimizations

The interp filter tap calculation was not accurate to tell the
difference between 2 taps and 4 taps. This patch fixed the bug, and
resolved Jenkins test failures in mips sub-pel filter optimizations.

BUG=webm:1568

Change-Id: I51eb8adb7ed194ef2ea7dd4aa57aa9870ee38cfc

--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -789,13 +789,7 @@
   }
 }
 
-#if HAVE_MSA
-// TODO(any) MSA optimizations doesn't work with 4-tap interp filter. Need to be
-// fixed.
-const int kNumFilterBanks = 4;
-#else
 const int kNumFilterBanks = 5;
-#endif
 const int kNumFilters = 16;
 
 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
--- a/vpx_dsp/mips/convolve8_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_dspr2.c
@@ -15,6 +15,7 @@
 #include "vpx_dsp/mips/convolve_common_dspr2.h"
 #include "vpx_dsp/vpx_convolve.h"
 #include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_ports/mem.h"
 
 #if HAVE_DSPR2
@@ -341,7 +342,7 @@
   assert(y_step_q4 == 16);
   assert(((const int32_t *)filter_y)[1] != 0x800000);
 
-  if (((const int32_t *)filter_y)[0] == 0) {
+  if (vpx_get_filter_taps(filter_y) == 2) {
     vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter,
                                  x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
   } else {
--- a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
@@ -15,6 +15,7 @@
 #include "vpx_dsp/mips/convolve_common_dspr2.h"
 #include "vpx_dsp/vpx_convolve.h"
 #include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_ports/mem.h"
 
 #if HAVE_DSPR2
@@ -945,7 +946,7 @@
   assert(x_step_q4 == 16);
   assert(((const int32_t *)filter_x)[1] != 0x800000);
 
-  if (((const int32_t *)filter_x)[0] == 0) {
+  if (vpx_get_filter_taps(filter_x) == 2) {
     vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter,
                                   x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
   } else {
--- a/vpx_dsp/mips/convolve8_dspr2.c
+++ b/vpx_dsp/mips/convolve8_dspr2.c
@@ -1322,7 +1322,7 @@
   if (filter_x[3] == 0x80) {
     copy_horiz_transposed(src - src_stride * 3, src_stride, temp,
                           intermediate_height, w, intermediate_height);
-  } else if (((const int32_t *)filter_x)[0] == 0) {
+  } else if (vpx_get_filter_taps(filter_x) == 2) {
     vpx_convolve2_dspr2(src - src_stride * 3, src_stride, temp,
                         intermediate_height, filter_x, w, intermediate_height);
   } else {
@@ -1365,7 +1365,7 @@
   /* copy the src to dst */
   if (filter_y[3] == 0x80) {
     copy_horiz_transposed(temp + 3, intermediate_height, dst, dst_stride, h, w);
-  } else if (((const int32_t *)filter_y)[0] == 0) {
+  } else if (vpx_get_filter_taps(filter_y) == 2) {
     vpx_convolve2_dspr2(temp + 3, intermediate_height, dst, dst_stride,
                         filter_y, h, w);
   } else {
--- a/vpx_dsp/mips/convolve8_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_horiz_dspr2.c
@@ -825,7 +825,7 @@
   assert(x_step_q4 == 16);
   assert(((const int32_t *)filter_x)[1] != 0x800000);
 
-  if (((const int32_t *)filter_x)[0] == 0) {
+  if (vpx_get_filter_taps(filter_x) == 2) {
     vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
                               x_step_q4, y0_q4, y_step_q4, w, h);
   } else {
--- a/vpx_dsp/mips/convolve8_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve8_vert_dspr2.c
@@ -325,7 +325,7 @@
   assert(y_step_q4 == 16);
   assert(((const int32_t *)filter_y)[1] != 0x800000);
 
-  if (((const int32_t *)filter_y)[0] == 0) {
+  if (vpx_get_filter_taps(filter_y) == 2) {
     vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
                              x_step_q4, y0_q4, y_step_q4, w, h);
   } else {
--- a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
@@ -658,7 +658,7 @@
     filt_hor[cnt] = filter_x[cnt];
   }
 
-  if (((const int32_t *)filter_x)[0] == 0) {
+  if (vpx_get_filter_taps(filter_x) == 2) {
     switch (w) {
       case 4:
         common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
--- a/vpx_dsp/mips/vpx_convolve8_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
@@ -538,8 +538,8 @@
     filt_ver[cnt] = filter_y[cnt];
   }
 
-  if (((const int32_t *)filter_x)[0] == 0 &&
-      ((const int32_t *)filter_y)[0] == 0) {
+  if (vpx_get_filter_taps(filter_x) == 2 &&
+      vpx_get_filter_taps(filter_y) == 2) {
     switch (w) {
       case 4:
         common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
@@ -571,8 +571,8 @@
                             x_step_q4, y0_q4, y_step_q4, w, h);
         break;
     }
-  } else if (((const int32_t *)filter_x)[0] == 0 ||
-             ((const int32_t *)filter_y)[0] == 0) {
+  } else if (vpx_get_filter_taps(filter_x) == 2 ||
+             vpx_get_filter_taps(filter_y) == 2) {
     vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
                         x_step_q4, y0_q4, y_step_q4, w, h);
   } else {
--- a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
@@ -625,7 +625,7 @@
     filt_ver[cnt] = filter_y[cnt];
   }
 
-  if (((const int32_t *)filter_y)[0] == 0) {
+  if (vpx_get_filter_taps(filter_y) == 2) {
     switch (w) {
       case 4:
         common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst,
--- a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
@@ -634,7 +634,7 @@
     filt_hor[cnt] = filter_x[cnt];
   }
 
-  if (((const int32_t *)filter_x)[0] == 0) {
+  if (vpx_get_filter_taps(filter_x) == 2) {
     switch (w) {
       case 4:
         common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
--- a/vpx_dsp/mips/vpx_convolve8_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_msa.c
@@ -558,8 +558,8 @@
     filt_ver[cnt] = filter_y[cnt];
   }
 
-  if (((const int32_t *)filter_x)[0] == 0 &&
-      ((const int32_t *)filter_y)[0] == 0) {
+  if (vpx_get_filter_taps(filter_x) == 2 &&
+      vpx_get_filter_taps(filter_y) == 2) {
     switch (w) {
       case 4:
         common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride, dst,
@@ -591,8 +591,8 @@
                         x_step_q4, y0_q4, y_step_q4, w, h);
         break;
     }
-  } else if (((const int32_t *)filter_x)[0] == 0 ||
-             ((const int32_t *)filter_y)[0] == 0) {
+  } else if (vpx_get_filter_taps(filter_x) == 2 ||
+             vpx_get_filter_taps(filter_y) == 2) {
     vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
                     y0_q4, y_step_q4, w, h);
   } else {
--- a/vpx_dsp/mips/vpx_convolve8_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
@@ -641,7 +641,7 @@
     filt_ver[cnt] = filter_y[cnt];
   }
 
-  if (((const int32_t *)filter_y)[0] == 0) {
+  if (vpx_get_filter_taps(filter_y) == 2) {
     switch (w) {
       case 4:
         common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
--- a/vpx_dsp/vpx_filter.h
+++ b/vpx_dsp/vpx_filter.h
@@ -11,6 +11,7 @@
 #ifndef VPX_VPX_DSP_VPX_FILTER_H_
 #define VPX_VPX_DSP_VPX_FILTER_H_
 
+#include <assert.h>
 #include "vpx/vpx_integer.h"
 
 #ifdef __cplusplus
@@ -25,6 +26,14 @@
 #define SUBPEL_TAPS 8
 
 typedef int16_t InterpKernel[SUBPEL_TAPS];
+
+static INLINE int vpx_get_filter_taps(const int16_t *const filter) {
+  assert(filter[3] != 128);
+  if (!filter[0] && !filter[1] && !filter[2])
+    return 2;
+  else
+    return 8;
+}
 
 #ifdef __cplusplus
 }  // extern "C"