ref: 50b91aff52579ef7f46b47aef0d657a731a2f84f
parent: afa4b9780ae0c10f22995ee96ba40b2cc15b29f9
author: Yunqing Wang <yunqingwang@google.com>
date: Mon Oct 8 12:21:54 EDT 2018
Use 4-tap interp filter in speed 1 sub-pel motion search Added the 4-tap interp filter, and used it for speed 1 sub-pel motion search. Speed 2 motion search still used bilinear filter as before. Speed 1 borg test showed good bit savings. avg_psnr: ovr_psnr: ssim: lowres: -1.125 -1.179 -1.021 midres: -0.717 -0.710 -0.543 hdres: -0.357 -0.370 -0.342 Speed test at speed 1 showed ~10% encoder time increase, which was partially because of no SIMD version of 4-tap filter. Change-Id: Ic9b48cdc6a964538c20144108526682d64348301
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -789,7 +789,7 @@
}
}
-const int kNumFilterBanks = 4;
+const int kNumFilterBanks = 5;
const int kNumFilters = 16;
TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -63,6 +63,20 @@
{ 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 }
};
-const InterpKernel *vp9_filter_kernels[4] = {
- sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters
+// 4-tap filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -3, 125, 8, -2, 0, 0 },
+ { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 115, 27, -6, 0, 0 },
+ { 0, 0, -10, 108, 37, -7, 0, 0 }, { 0, 0, -11, 101, 47, -9, 0, 0 },
+ { 0, 0, -11, 93, 56, -10, 0, 0 }, { 0, 0, -12, 85, 66, -11, 0, 0 },
+ { 0, 0, -11, 75, 75, -11, 0, 0 }, { 0, 0, -11, 66, 85, -12, 0, 0 },
+ { 0, 0, -10, 56, 93, -11, 0, 0 }, { 0, 0, -9, 47, 101, -11, 0, 0 },
+ { 0, 0, -7, 37, 108, -10, 0, 0 }, { 0, 0, -6, 27, 115, -8, 0, 0 },
+ { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 125, -3, 0, 0 }
+};
+
+const InterpKernel *vp9_filter_kernels[5] = {
+ sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters,
+ sub_pel_filters_4
};
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -25,6 +25,7 @@
#define EIGHTTAP_SHARP 2
#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
#define BILINEAR 3
+#define FOURTAP 4
// The codec can operate in four possible inter prediction filter mode:
// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
@@ -32,7 +33,7 @@
typedef uint8_t INTERP_FILTER;
-extern const InterpKernel *vp9_filter_kernels[4];
+extern const InterpKernel *vp9_filter_kernels[5];
#ifdef __cplusplus
} // extern "C"
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -760,7 +760,14 @@
unsigned int cost_array[5];
int kr, kc;
MvLimits subpel_mv_limits;
- const InterpKernel *kernel = vp9_filter_kernels[EIGHTTAP];
+
+ // TODO(yunqing): need to add 4-tap filter optimization to speed up the
+ // encoder.
+ const InterpKernel *kernel = (use_accurate_subpel_search > 0)
+ ? ((use_accurate_subpel_search == USE_4_TAPS)
+ ? vp9_filter_kernels[FOURTAP]
+ : vp9_filter_kernels[EIGHTTAP])
+ : vp9_filter_kernels[BILINEAR];
vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
minc = subpel_mv_limits.col_min;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -288,7 +288,7 @@
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
: INT_MAX;
- sf->use_accurate_subpel_search = USE_2_TAPS;
+ sf->use_accurate_subpel_search = USE_4_TAPS;
}
if (speed >= 2) {
@@ -330,6 +330,8 @@
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
+
+ sf->use_accurate_subpel_search = USE_2_TAPS;
}
if (speed >= 3) {