shithub: libvpx

Download patch

ref: 50b91aff52579ef7f46b47aef0d657a731a2f84f
parent: afa4b9780ae0c10f22995ee96ba40b2cc15b29f9
author: Yunqing Wang <yunqingwang@google.com>
date: Mon Oct 8 12:21:54 EDT 2018

Use 4-tap interp filter in speed 1 sub-pel motion search

Added the 4-tap interp filter, and used it for speed 1 sub-pel motion
search. Speed 2 motion search still used bilinear filter as before.

Speed 1 borg test showed good bit savings.
        avg_psnr:  ovr_psnr:    ssim:
lowres:  -1.125    -1.179      -1.021
midres:  -0.717    -0.710      -0.543
hdres:   -0.357    -0.370      -0.342
Speed test at speed 1 showed ~10% encoder time increase, which was
partially because of no SIMD version of 4-tap filter.

Change-Id: Ic9b48cdc6a964538c20144108526682d64348301

--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -789,7 +789,7 @@
   }
 }
 
-const int kNumFilterBanks = 4;
+const int kNumFilterBanks = 5;
 const int kNumFilters = 16;
 
 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -63,6 +63,20 @@
   { 0, -3, 2, 41, 63, 29, -2, -2 },   { 0, -3, 1, 38, 64, 32, -1, -3 }
 };
 
-const InterpKernel *vp9_filter_kernels[4] = {
-  sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters
+// 4-tap filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+  { 0, 0, 0, 128, 0, 0, 0, 0 },     { 0, 0, -3, 125, 8, -2, 0, 0 },
+  { 0, 0, -6, 120, 18, -4, 0, 0 },  { 0, 0, -8, 115, 27, -6, 0, 0 },
+  { 0, 0, -10, 108, 37, -7, 0, 0 }, { 0, 0, -11, 101, 47, -9, 0, 0 },
+  { 0, 0, -11, 93, 56, -10, 0, 0 }, { 0, 0, -12, 85, 66, -11, 0, 0 },
+  { 0, 0, -11, 75, 75, -11, 0, 0 }, { 0, 0, -11, 66, 85, -12, 0, 0 },
+  { 0, 0, -10, 56, 93, -11, 0, 0 }, { 0, 0, -9, 47, 101, -11, 0, 0 },
+  { 0, 0, -7, 37, 108, -10, 0, 0 }, { 0, 0, -6, 27, 115, -8, 0, 0 },
+  { 0, 0, -4, 18, 120, -6, 0, 0 },  { 0, 0, -2, 8, 125, -3, 0, 0 }
+};
+
+const InterpKernel *vp9_filter_kernels[5] = {
+  sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters,
+  sub_pel_filters_4
 };
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -25,6 +25,7 @@
 #define EIGHTTAP_SHARP 2
 #define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
 #define BILINEAR 3
+#define FOURTAP 4
 // The codec can operate in four possible inter prediction filter mode:
 // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
 #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
@@ -32,7 +33,7 @@
 
 typedef uint8_t INTERP_FILTER;
 
-extern const InterpKernel *vp9_filter_kernels[4];
+extern const InterpKernel *vp9_filter_kernels[5];
 
 #ifdef __cplusplus
 }  // extern "C"
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -760,7 +760,14 @@
   unsigned int cost_array[5];
   int kr, kc;
   MvLimits subpel_mv_limits;
-  const InterpKernel *kernel = vp9_filter_kernels[EIGHTTAP];
+
+  // TODO(yunqing): need to add 4-tap filter optimization to speed up the
+  // encoder.
+  const InterpKernel *kernel = (use_accurate_subpel_search > 0)
+                                   ? ((use_accurate_subpel_search == USE_4_TAPS)
+                                          ? vp9_filter_kernels[FOURTAP]
+                                          : vp9_filter_kernels[EIGHTTAP])
+                                   : vp9_filter_kernels[BILINEAR];
 
   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
   minc = subpel_mv_limits.col_min;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -288,7 +288,7 @@
     sf->exhaustive_searches_thresh =
         (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
                                                                 : INT_MAX;
-    sf->use_accurate_subpel_search = USE_2_TAPS;
+    sf->use_accurate_subpel_search = USE_4_TAPS;
   }
 
   if (speed >= 2) {
@@ -330,6 +330,8 @@
             good_quality_mesh_patterns[mesh_density_level][i].interval;
       }
     }
+
+    sf->use_accurate_subpel_search = USE_2_TAPS;
   }
 
   if (speed >= 3) {