ref: 66aca163f5017fc50276f1c3025f2b2cc08aa010
parent: c85c5337bfe2f64503b8e8f4584db63d6cd64d61
author: Jerome Jiang <jianj@google.com>
date: Thu Mar 29 10:59:58 EDT 2018
VP9: Add speed 9 for subpel search. Set subpel search stop to 2 when motion vector is non zero. 10% speedup on 1 and 2 threads on Samsung Galaxy S8+. Change-Id: I7323bb913000229cf60a37495bf88bcc51d0ac96
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -152,5 +152,5 @@
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(0, 9));
+ ::testing::Range(0, 10));
} // namespace
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -48,7 +48,7 @@
EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
};
-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8, 9 };
const int kEncodePerfTestThreads[] = { 1, 2, 4 };
#define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -1150,20 +1150,21 @@
}
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSingleBR,
- ::testing::Range(5, 9));
+ ::testing::Range(5, 10));
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 9),
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 10),
::testing::Range(0, 3));
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcFrameDropMultiBR,
- ::testing::Range(5, 9), ::testing::Range(0, 2),
+ ::testing::Range(5, 10), ::testing::Range(0, 2),
::testing::Range(0, 3));
#if CONFIG_VP9_TEMPORAL_DENOISING
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, ::testing::Range(5, 9),
- ::testing::Range(1, 3), ::testing::Range(0, 3));
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser,
+ ::testing::Range(5, 10), ::testing::Range(1, 3),
+ ::testing::Range(0, 3));
#endif
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 9),
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 10),
::testing::Range(32, 36));
} // namespace
--- a/test/vp9_datarate_test.cc
+++ b/test/vp9_datarate_test.cc
@@ -824,16 +824,17 @@
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(2, 9), ::testing::Range(0, 4));
+ ::testing::Range(2, 10), ::testing::Range(0, 4));
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeOneBR,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(2, 9));
+ ::testing::Range(2, 10));
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 10));
#if CONFIG_VP9_TEMPORAL_DENOISING
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
+ ::testing::Range(5, 10));
#endif
} // namespace
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -409,7 +409,7 @@
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(3, 9), // cpu_used
+ ::testing::Range(3, 10), // cpu_used
::testing::Range(0, 3), // tile_columns
::testing::Range(2, 5))); // threads
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -224,6 +224,14 @@
if (rv && search_subpel) {
int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+ if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
+ int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+ if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
+ abs(tmp_mv->as_mv.col) >= mv_thresh)
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
+ else
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below;
+ }
cpi->find_fractional_mv_step(
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -661,6 +661,14 @@
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 1;
}
+
+ if (speed >= 9) {
+ sf->mv.enable_adaptive_subpel_force_stop = 1;
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
+ sf->mv.adapt_subpel_force_stop.force_stop_below = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_above = 2;
+ }
+
if (sf->use_altref_onepass) {
if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
sf->partition_search_type = FIXED_PARTITION;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -161,6 +161,17 @@
ONE_LOOP_REDUCED = 1
} FAST_COEFF_UPDATE;
+typedef struct ADAPT_SUBPEL_FORCE_STOP {
+ // Threshold for full pixel motion vector;
+ int mv_thresh;
+
+ // subpel_force_stop if full pixel MV is below the threshold.
+ int force_stop_below;
+
+ // subpel_force_stop if full pixel MV is equal to or above the threshold.
+ int force_stop_above;
+} ADAPT_SUBPEL_FORCE_STOP;
+
typedef struct MV_SPEED_FEATURES {
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
@@ -188,6 +199,11 @@
// 2: Stop at half pixel.
// 3: Stop at full pixel.
int subpel_force_stop;
+
+ // If it's enabled, different subpel_force_stop will be used for different MV.
+ int enable_adaptive_subpel_force_stop;
+
+ ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop;
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -248,7 +248,7 @@
RANGE_CHECK(extra_cfg, row_mt, 0, 1);
RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2);
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
- RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+ RANGE_CHECK(extra_cfg, cpu_used, -9, 9);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);