ref: 0971d3204d26abe4d86fc873af44ab2286bc9c62
parent: 119dad38e64b66e602471239b864c08c6574087f
author: Angie Chiang <angiebird@google.com>
date: Mon Jul 22 11:16:08 EDT 2019
Reduce call num of exhaustive search The encoding time difference between non_greedy_mv and baseline is reduced from 51% to 13% However, there is also a performance impact. non_greedy_mv performance: Before this CL lowres 0.395% midres 0.716% hdres 0.533% After this CL lowres 0.242% midres 0.429% hdres 0.305% Change-Id: I047d6509df504b264981c0b903c0cc955f45b273
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5937,9 +5937,8 @@
(void)sadpb;
nb_full_mv_num = vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row,
mi_col, rf_idx, bsize, nb_full_mvs);
- vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
- &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
- mv);
+ vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
+ lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
#else
(void)frame_idx;
(void)mi_row;
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -2437,16 +2437,24 @@
return best_sad;
}
+static int get_exhaustive_threshold(int exhaustive_searches_thresh,
+ BLOCK_SIZE bsize) {
+ return exhaustive_searches_thresh >>
+ (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+}
+
#if CONFIG_NON_GREEDY_MV
// Runs sequence of diamond searches in smaller steps for RD.
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
-int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
- int step_param, int lambda, int do_refine,
- const vp9_variance_fn_ptr_t *fn_ptr,
+int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+ int lambda, int do_refine,
const int_mv *nb_full_mvs, int full_mv_num,
MV *best_mv) {
+ const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+ const SPEED_FEATURES *const sf = &cpi->sf;
int n, num00 = 0;
int thissme;
int bestsme;
@@ -2495,9 +2503,16 @@
}
}
- full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
- nb_full_mvs, full_mv_num);
- bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
+ if (sf->exhaustive_searches_thresh < INT_MAX &&
+ !cpi->rc.is_src_frame_alt_ref) {
+ const int64_t exhaustive_thr =
+ get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+ if (bestsme > exhaustive_thr) {
+ full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
+ nb_full_mvs, full_mv_num);
+ bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
+ }
+ }
return bestsme;
}
#endif // CONFIG_NON_GREEDY_MV
@@ -2886,9 +2901,10 @@
if (sf->exhaustive_searches_thresh < INT_MAX &&
!cpi->rc.is_src_frame_alt_ref) {
const int64_t exhaustive_thr =
- sf->exhaustive_searches_thresh >>
- (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
- if (var > exhaustive_thr) run_exhaustive_search = 1;
+ get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+ if (var > exhaustive_thr) {
+ run_exhaustive_search = 1;
+ }
}
} else if (method == MESH) {
run_exhaustive_search = 1;
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -136,9 +136,8 @@
const int_mv *nb_full_mvs, int full_mv_num);
int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int lambda,
- int do_refine,
- const vp9_variance_fn_ptr_t *fn_ptr,
+ BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+ int lambda, int do_refine,
const int_mv *nb_full_mvs, int full_mv_num,
MV *best_mv);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2579,9 +2579,9 @@
mvp_full.row >>= 3;
#if CONFIG_NON_GREEDY_MV
- bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1,
- &cpi->fn_ptr[bsize], nb_full_mvs,
- nb_full_mv_num, &tmp_mv->as_mv);
+ bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param,
+ lambda, 1, nb_full_mvs, nb_full_mv_num,
+ &tmp_mv->as_mv);
#else // CONFIG_NON_GREEDY_MV
bestsme = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
@@ -2617,9 +2617,9 @@
mvp_full.row >>= 3;
#if CONFIG_NON_GREEDY_MV
this_me = vp9_full_pixel_diamond_new(
- cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
- lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
- &this_mv);
+ cpi, x, bsize, &mvp_full,
+ VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs,
+ nb_full_mv_num, &this_mv);
#else // CONFIG_NON_GREEDY_MV
this_me = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full,