shithub: libvpx

Download patch

ref: c005792951199bd40724a028120e80888ac38b57
parent: fd891a965572315f971effd908c4a499127cebcf
parent: eb88b172fe5e5fece6676b24a2b6b787e4901753
author: Johann Koenig <johann.koenig@gmail.com>
date: Thu Jun 4 02:16:12 EDT 2015

Merge "Make vp9 subpixel match vp8"

--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -21,6 +21,9 @@
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
+#if CONFIG_VP8_ENCODER
+# include "./vp8_rtcd.h"
+#endif  // CONFIG_VP8_ENCODER
 #if CONFIG_VP9_ENCODER
 # include "./vp9_rtcd.h"
 # include "vp9/encoder/vp9_variance.h"
@@ -32,10 +35,13 @@
 typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
                                         const uint8_t *b, int b_stride,
                                         unsigned int *sse);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+                                         int xoffset, int yoffset,
+                                         const uint8_t *b, int b_stride,
+                                         unsigned int *sse);
 typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
                                       const uint8_t *b, int b_stride);
 
-
 using ::std::tr1::get;
 using ::std::tr1::make_tuple;
 using ::std::tr1::tuple;
@@ -102,6 +108,12 @@
                                 (l2w + l2h)));
 }
 
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
                                     int l2w, int l2h, int xoff, int yoff,
                                     uint32_t *sse_ptr,
@@ -111,6 +123,10 @@
   uint64_t sse = 0;
   const int w = 1 << l2w;
   const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
   for (int y = 0; y < h; y++) {
     for (int x = 0; x < w; x++) {
       // Bilinear interpolation at a 16th pel step.
@@ -480,6 +496,10 @@
   uint64_t sse = 0;
   const int w = 1 << l2w;
   const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
   for (int y = 0; y < h; y++) {
     for (int x = 0; x < w; x++) {
       // bilinear interpolation at a 16th pel step
@@ -598,8 +618,8 @@
 
 template<typename SubpelVarianceFunctionType>
 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
       if (!use_high_bit_depth_) {
         for (int j = 0; j < block_size_; j++) {
           src_[j] = rnd_.Rand8();
@@ -621,8 +641,9 @@
       unsigned int var1;
       ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
                                                        src_, width_, &sse1));
-      const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
-                                                    log2height_, x, y, &sse2,
+      const unsigned int var2 = subpel_variance_ref(ref_, src_,
+                                                    log2width_, log2height_,
+                                                    x, y, &sse2,
                                                     use_high_bit_depth_,
                                                     bit_depth_);
       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -636,8 +657,8 @@
   // Compare against reference.
   // Src: Set the first half of values to 0, the second half to the maximum.
   // Ref: Set the first half of values to the maximum, the second half to 0.
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
       const int half = block_size_ / 2;
       if (!use_high_bit_depth_) {
         memset(src_, 0, half);
@@ -658,10 +679,10 @@
       ASM_REGISTER_STATE_CHECK(
           var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
       const unsigned int var2 =
-          subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
-                              use_high_bit_depth_, bit_depth_);
-      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
-      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+          subpel_variance_ref(ref_, src_, log2width_, log2height_,
+                              x, y, &sse2, use_high_bit_depth_, bit_depth_);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
     }
   }
 }
@@ -669,8 +690,8 @@
 #if CONFIG_VP9_ENCODER
 template<>
 void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
       if (!use_high_bit_depth_) {
         for (int j = 0; j < block_size_; j++) {
           src_[j] = rnd_.Rand8();
@@ -795,7 +816,6 @@
 const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
 const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
 const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
-
 INSTANTIATE_TEST_CASE_P(
     C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
                                         make_tuple(4, 4, highbd_12_mse16x8_c),
@@ -811,7 +831,6 @@
                                         make_tuple(4, 4, highbd_8_mse8x8_c)));
 */
 
-
 const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
 const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
 const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
@@ -976,7 +995,6 @@
 const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
 const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
 const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
-
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
                                            make_tuple(4, 3, highbd_12_mse16x8_sse2),
@@ -1088,8 +1106,15 @@
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2
 
+#if CONFIG_VP8
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP8SubpelVarianceTest;
+
+TEST_P(VP8SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP8SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceTest;
 typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
 
 TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
@@ -1097,7 +1122,7 @@
 TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceHighTest;
 typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
     VP9SubpelAvgVarianceHighTest;
 
@@ -1106,32 +1131,19 @@
 TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-const vp9_subpixvariance_fn_t subpel_variance4x4_c =
-    vp9_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t subpel_variance4x8_c =
-    vp9_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x4_c =
-    vp9_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t subpel_variance8x8_c =
-    vp9_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x16_c =
-    vp9_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x8_c =
-    vp9_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t subpel_variance16x16_c =
-    vp9_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x32_c =
-    vp9_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x16_c =
-    vp9_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t subpel_variance32x32_c =
-    vp9_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x64_c =
-    vp9_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t subpel_variance64x32_c =
-    vp9_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t subpel_variance64x64_c =
-    vp9_sub_pixel_variance64x64_c;
+const SubpixVarMxNFunc subpel_variance4x4_c = vp9_sub_pixel_variance4x4_c;
+const SubpixVarMxNFunc subpel_variance4x8_c = vp9_sub_pixel_variance4x8_c;
+const SubpixVarMxNFunc subpel_variance8x4_c = vp9_sub_pixel_variance8x4_c;
+const SubpixVarMxNFunc subpel_variance8x8_c = vp9_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc subpel_variance8x16_c = vp9_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc subpel_variance16x8_c = vp9_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc subpel_variance16x16_c = vp9_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc subpel_variance16x32_c = vp9_sub_pixel_variance16x32_c;
+const SubpixVarMxNFunc subpel_variance32x16_c = vp9_sub_pixel_variance32x16_c;
+const SubpixVarMxNFunc subpel_variance32x32_c = vp9_sub_pixel_variance32x32_c;
+const SubpixVarMxNFunc subpel_variance32x64_c = vp9_sub_pixel_variance32x64_c;
+const SubpixVarMxNFunc subpel_variance64x32_c = vp9_sub_pixel_variance64x32_c;
+const SubpixVarMxNFunc subpel_variance64x64_c = vp9_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelVarianceTest,
     ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
@@ -1147,6 +1159,23 @@
                       make_tuple(5, 6, subpel_variance32x64_c, 0),
                       make_tuple(6, 5, subpel_variance64x32_c, 0),
                       make_tuple(6, 6, subpel_variance64x64_c, 0)));
+
+#if CONFIG_VP8
+const SubpixVarMxNFunc vp8_subpel_variance16x16_c =
+    vp8_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_c = vp8_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_c = vp8_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_c = vp8_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_c = vp8_sub_pixel_variance4x4_c;
+INSTANTIATE_TEST_CASE_P(
+    C, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_c, 0),
+                      make_tuple(3, 3, vp8_subpel_variance8x8_c, 0),
+                      make_tuple(3, 4, vp8_subpel_variance8x16_c, 0),
+                      make_tuple(4, 3, vp8_subpel_variance16x8_c, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_c, 0)));
+#endif  // CONFIG_VP8
+
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
     vp9_sub_pixel_avg_variance4x4_c;
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
@@ -1189,83 +1218,83 @@
                       make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
                       make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
 #if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x4_c =
     vp9_highbd_10_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x8_c =
     vp9_highbd_10_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_c =
     vp9_highbd_10_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_c =
     vp9_highbd_10_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_c =
     vp9_highbd_10_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_c =
     vp9_highbd_10_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_c =
     vp9_highbd_10_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_c =
     vp9_highbd_10_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_c =
     vp9_highbd_10_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_c =
     vp9_highbd_10_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_c =
     vp9_highbd_10_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_c =
     vp9_highbd_10_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_c =
     vp9_highbd_10_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x4_c =
     vp9_highbd_12_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x8_c =
     vp9_highbd_12_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_c =
     vp9_highbd_12_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_c =
     vp9_highbd_12_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_c =
     vp9_highbd_12_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_c =
     vp9_highbd_12_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_c =
     vp9_highbd_12_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_c =
     vp9_highbd_12_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_c =
     vp9_highbd_12_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_c =
     vp9_highbd_12_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_c =
     vp9_highbd_12_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_c =
     vp9_highbd_12_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_c =
     vp9_highbd_12_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x4_c =
     vp9_highbd_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x8_c =
     vp9_highbd_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_c =
     vp9_highbd_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_c =
     vp9_highbd_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_c =
     vp9_highbd_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_c =
     vp9_highbd_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_c =
     vp9_highbd_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_c =
     vp9_highbd_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_c =
     vp9_highbd_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_c =
     vp9_highbd_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_c =
     vp9_highbd_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_c =
     vp9_highbd_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_c =
     vp9_highbd_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelVarianceHighTest,
@@ -1431,34 +1460,48 @@
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_VP9_ENCODER
 
+#if CONFIG_VP8
+#if HAVE_MMX
+const SubpixVarMxNFunc subpel_variance16x16_mmx =
+    vp8_sub_pixel_variance16x16_mmx;
+const SubpixVarMxNFunc subpel_variance16x8_mmx = vp8_sub_pixel_variance16x8_mmx;
+const SubpixVarMxNFunc subpel_variance8x16_mmx = vp8_sub_pixel_variance8x16_mmx;
+const SubpixVarMxNFunc subpel_variance8x8_mmx = vp8_sub_pixel_variance8x8_mmx;
+const SubpixVarMxNFunc subpel_variance4x4_mmx = vp8_sub_pixel_variance4x4_mmx;
+INSTANTIATE_TEST_CASE_P(
+    MMX, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 4, subpel_variance16x16_mmx, 0),
+                      make_tuple(4, 3, subpel_variance16x8_mmx, 0),
+                      make_tuple(3, 4, subpel_variance8x16_mmx, 0),
+                      make_tuple(3, 3, subpel_variance8x8_mmx, 0),
+                      make_tuple(2, 2, subpel_variance4x4_mmx, 0)));
+#endif  // HAVE_MMX
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
 #if HAVE_SSE2
 #if CONFIG_USE_X86INC
-const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
-    vp9_sub_pixel_variance4x4_sse;
-const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
-    vp9_sub_pixel_variance4x8_sse;
-const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
-    vp9_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
-    vp9_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc subpel_variance4x4_sse = vp9_sub_pixel_variance4x4_sse;
+const SubpixVarMxNFunc subpel_variance4x8_sse = vp9_sub_pixel_variance4x8_sse;
+const SubpixVarMxNFunc subpel_variance8x4_sse2 = vp9_sub_pixel_variance8x4_sse2;
+const SubpixVarMxNFunc subpel_variance8x8_sse2 = vp9_sub_pixel_variance8x8_sse2;
+const SubpixVarMxNFunc subpel_variance8x16_sse2 =
     vp9_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc subpel_variance16x8_sse2 =
     vp9_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc subpel_variance16x16_sse2 =
     vp9_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc subpel_variance16x32_sse2 =
     vp9_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc subpel_variance32x16_sse2 =
     vp9_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc subpel_variance32x32_sse2 =
     vp9_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc subpel_variance32x64_sse2 =
     vp9_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc subpel_variance64x32_sse2 =
     vp9_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc subpel_variance64x64_sse2 =
     vp9_sub_pixel_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelVarianceTest,
@@ -1517,71 +1560,71 @@
                       make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
                       make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
 #if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_sse2 =
     vp9_highbd_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_sse2 =
     vp9_highbd_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_sse2 =
     vp9_highbd_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_sse2 =
     vp9_highbd_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_sse2 =
     vp9_highbd_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_sse2 =
     vp9_highbd_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_sse2 =
     vp9_highbd_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_sse2 =
     vp9_highbd_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_sse2 =
     vp9_highbd_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_sse2 =
     vp9_highbd_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_sse2 =
     vp9_highbd_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
     vp9_highbd_10_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
     vp9_highbd_10_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
     vp9_highbd_10_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
     vp9_highbd_10_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
     vp9_highbd_10_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
     vp9_highbd_10_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
     vp9_highbd_10_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
     vp9_highbd_10_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
     vp9_highbd_10_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
     vp9_highbd_10_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
     vp9_highbd_10_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
     vp9_highbd_12_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
     vp9_highbd_12_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
     vp9_highbd_12_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
     vp9_highbd_12_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
     vp9_highbd_12_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
     vp9_highbd_12_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
     vp9_highbd_12_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
     vp9_highbd_12_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
     vp9_highbd_12_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
     vp9_highbd_12_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
     vp9_highbd_12_sub_pixel_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelVarianceHighTest,
@@ -1725,35 +1768,56 @@
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_ENCODER
 
+#if CONFIG_VP8
+#if HAVE_SSE2
+const SubpixVarMxNFunc vp8_subpel_variance16x16_sse2 =
+    vp8_sub_pixel_variance16x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_sse2 =
+    vp8_sub_pixel_variance16x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_sse2 =
+    vp8_sub_pixel_variance8x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_sse2 =
+    vp8_sub_pixel_variance8x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_sse2 =
+    vp8_sub_pixel_variance4x4_wmt;
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_sse2, 0),
+                      make_tuple(3, 3, vp8_subpel_variance8x8_sse2, 0),
+                      make_tuple(3, 4, vp8_subpel_variance8x16_sse2, 0),
+                      make_tuple(4, 3, vp8_subpel_variance16x8_sse2, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_sse2, 0)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
 #if HAVE_SSSE3
 #if CONFIG_USE_X86INC
-
-const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
     vp9_sub_pixel_variance4x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
     vp9_sub_pixel_variance4x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
     vp9_sub_pixel_variance8x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
     vp9_sub_pixel_variance8x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
     vp9_sub_pixel_variance8x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
     vp9_sub_pixel_variance16x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
     vp9_sub_pixel_variance16x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
     vp9_sub_pixel_variance16x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
     vp9_sub_pixel_variance32x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
     vp9_sub_pixel_variance32x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
     vp9_sub_pixel_variance32x64_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
     vp9_sub_pixel_variance64x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
     vp9_sub_pixel_variance64x64_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9SubpelVarianceTest,
@@ -1815,6 +1879,19 @@
 #endif  // HAVE_SSSE3
 #endif  // CONFIG_VP9_ENCODER
 
+#if CONFIG_VP8
+#if HAVE_SSSE3
+const SubpixVarMxNFunc vp8_subpel_variance16x16_ssse3 =
+    vp8_sub_pixel_variance16x16_ssse3;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_ssse3 =
+    vp8_sub_pixel_variance16x8_ssse3;
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 3, vp8_subpel_variance16x8_ssse3, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_ssse3, 0)));
+#endif  // HAVE_SSSE3
+#endif  // CONFIG_VP8
+
 #if HAVE_AVX2
 const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
 INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
@@ -1834,9 +1911,9 @@
                       make_tuple(4, 4, variance16x16_avx2, 0)));
 
 #if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
+const SubpixVarMxNFunc subpel_variance32x32_avx2 =
     vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
+const SubpixVarMxNFunc subpel_variance64x64_avx2 =
     vp9_sub_pixel_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VP9SubpelVarianceTest,
@@ -1854,6 +1931,19 @@
 #endif  // CONFIG_VP9_ENCODER
 #endif  // HAVE_AVX2
 
+#if CONFIG_VP8
+#if HAVE_MEDIA
+const SubpixVarMxNFunc subpel_variance16x16_media =
+    vp8_sub_pixel_variance16x16_armv6;
+const SubpixVarMxNFunc subpel_variance8x8_media =
+    vp8_sub_pixel_variance8x8_armv6;
+INSTANTIATE_TEST_CASE_P(
+    MEDIA, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(3, 3, subpel_variance8x8_media, 0),
+                      make_tuple(4, 4, subpel_variance16x16_media, 0)));
+#endif  // HAVE_MEDIA
+#endif  // CONFIG_VP8
+
 #if HAVE_NEON
 const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
 INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
@@ -1882,14 +1972,26 @@
                       make_tuple(3, 4, variance8x16_neon, 0),
                       make_tuple(3, 3, variance8x8_neon, 0)));
 
+#if CONFIG_VP8
+#if HAVE_NEON_ASM
+const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
+    vp8_sub_pixel_variance16x16_neon;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
+    vp8_sub_pixel_variance8x8_neon;
+INSTANTIATE_TEST_CASE_P(
+    NEON, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
+#endif  // HAVE_NEON_ASM
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
-    vp9_sub_pixel_variance8x8_neon;
-const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
+const SubpixVarMxNFunc subpel_variance8x8_neon = vp9_sub_pixel_variance8x8_neon;
+const SubpixVarMxNFunc subpel_variance16x16_neon =
     vp9_sub_pixel_variance16x16_neon;
-const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
+const SubpixVarMxNFunc subpel_variance32x32_neon =
     vp9_sub_pixel_variance32x32_neon;
-const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
+const SubpixVarMxNFunc subpel_variance64x64_neon =
     vp9_sub_pixel_variance64x64_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VP9SubpelVarianceTest,
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -43,14 +43,6 @@
 
 const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
 
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
-  (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -16,10 +16,18 @@
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
 
-#include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
 
-#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
 
 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                       uint8_t *output_ptr,
@@ -27,9 +35,9 @@
                                       int pixel_step,
                                       unsigned int output_height,
                                       unsigned int output_width,
-                                      const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                      const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
   unsigned int i;
   for (i = 0; i < output_height; ++i) {
     const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -50,9 +58,9 @@
                                        int pixel_step,
                                        unsigned int output_height,
                                        unsigned int output_width,
-                                       const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                       const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
   unsigned int i, j;
   for (i = 0; i < output_height; ++i) {
     for (j = 0; j < output_width; j += 16) {
@@ -84,9 +92,9 @@
 
   var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
                             9, 8,
-                            BILINEAR_FILTERS_2TAP(xoffset));
+                            bilinear_filters[xoffset]);
   var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
-                            8, BILINEAR_FILTERS_2TAP(yoffset));
+                            8, bilinear_filters[yoffset]);
   return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
 }
 
@@ -102,9 +110,9 @@
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              17, 16,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
-                             16, BILINEAR_FILTERS_2TAP(yoffset));
+                             16, bilinear_filters[yoffset]);
   return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
 }
 
@@ -120,9 +128,9 @@
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              33, 32,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
-                             32, BILINEAR_FILTERS_2TAP(yoffset));
+                             32, bilinear_filters[yoffset]);
   return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
 }
 
@@ -138,8 +146,8 @@
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              65, 64,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
-                             64, BILINEAR_FILTERS_2TAP(yoffset));
+                             64, bilinear_filters[yoffset]);
   return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
 }
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -162,9 +162,9 @@
       error_per_bit + 4096) >> 13 : 0)
 
 
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
 static INLINE int sp(int x) {
-  return (x & 7) << 1;
+  return x & 7;
 }
 
 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -679,16 +679,14 @@
       tc = bc + search_step[idx].col;
       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-        int row_offset = (tr & 0x07) << 1;
-        int col_offset = (tc & 0x07) << 1;
         MV this_mv;
         this_mv.row = tr;
         this_mv.col = tc;
         if (second_pred == NULL)
-          thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                              src_address, src_stride, &sse);
         else
-          thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse, second_pred);
         cost_array[idx] = thismse +
             mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -709,14 +707,12 @@
     tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
       const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-      int row_offset = (tr & 0x07) << 1;
-      int col_offset = (tc & 0x07) << 1;
       MV this_mv = {tr, tc};
       if (second_pred == NULL)
-        thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                            src_address, src_stride, &sse);
       else
-        thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                             src_address, src_stride, &sse, second_pred);
       cost_array[4] = thismse +
           mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -19,6 +19,17 @@
 
 #include "vp9/encoder/vp9_variance.h"
 
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
+
 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
 // or vertical direction to produce the filtered output block. Used to implement
 // first-pass of 2-D separable filter.
@@ -33,7 +44,7 @@
                                               int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
-                                              const int16_t *vp9_filter) {
+                                              const uint8_t *vp9_filter) {
   unsigned int i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -65,7 +76,7 @@
                                                unsigned int pixel_step,
                                                unsigned int output_height,
                                                unsigned int output_width,
-                                               const int16_t *vp9_filter) {
+                                               const uint8_t *vp9_filter) {
   unsigned int  i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -91,9 +102,9 @@
   uint8_t temp2[H * W]; \
 \
   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
 \
   return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
 }
@@ -110,9 +121,9 @@
   DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
 \
   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
 \
   vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
 \
@@ -166,7 +177,7 @@
     int pixel_step,
     unsigned int output_height,
     unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
   unsigned int i, j;
   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
   for (i = 0; i < output_height; i++) {
@@ -192,7 +203,7 @@
     unsigned int pixel_step,
     unsigned int output_height,
     unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
   unsigned int  i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -219,9 +230,9 @@
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
                                           dst_stride, sse); \
@@ -236,9 +247,9 @@
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
@@ -253,9 +264,9 @@
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
@@ -273,9 +284,9 @@
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
@@ -295,9 +306,9 @@
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
@@ -317,9 +328,9 @@
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
--- a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -14,35 +14,19 @@
 pw_8: times  8 dw  8
 bilin_filter_m_sse2: times  8 dw 16
                      times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                      times  8 dw 14
                      times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                      times  8 dw 12
                      times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                      times  8 dw 10
                      times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                      times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                      times  8 dw  6
                      times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                      times  8 dw  4
                      times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                      times  8 dw  2
                      times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
 
 SECTION .text
 
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -14,52 +14,28 @@
 pw_8: times  8 dw  8
 bilin_filter_m_sse2: times  8 dw 16
                      times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                      times  8 dw 14
                      times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                      times  8 dw 12
                      times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                      times  8 dw 10
                      times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                      times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                      times  8 dw  6
                      times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                      times  8 dw  4
                      times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                      times  8 dw  2
                      times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
 
 bilin_filter_m_ssse3: times  8 db 16,  0
-                      times  8 db 15,  1
                       times  8 db 14,  2
-                      times  8 db 13,  3
                       times  8 db 12,  4
-                      times  8 db 11,  5
                       times  8 db 10,  6
-                      times  8 db  9,  7
                       times 16 db  8
-                      times  8 db  7,  9
                       times  8 db  6, 10
-                      times  8 db  5, 11
                       times  8 db  4, 12
-                      times  8 db  3, 13
                       times  8 db  2, 14
-                      times  8 db  1, 15
 
 SECTION .text
 
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -17,36 +17,20 @@
 DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
   16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
   16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
   14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
   14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
   12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
   12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
   10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
   10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
   6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
   6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
   4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
   4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
   2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
   2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
 };
 
 #define FILTER_SRC(filter) \