ref: d69584261d90abe4430fc4ae70d95ad0b5219f5f
parent: 01cac9f374345c3bba337ef5e63fd2693473bca5
parent: 184071fe9f3c4db1b22d5bd69c9015b518c65e7a
author: Angie Chiang <angiebird@google.com>
date: Fri Jun 28 13:50:28 EDT 2019
Merge changes I833c82fb,I05a39165,Ie044bb01,I565f477f * changes: Integerize vp9_full_pixel_diamond_new Integerize vp9_refining_search_sad_new Integerize diamond_search_sad_new() Refactor vp9_full_pixel_diamond_new
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -2031,7 +2031,6 @@
int interval = sf->mesh_patterns[0].interval;
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
- const MV dummy_mv = { 0, 0 };
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
@@ -2067,19 +2066,18 @@
}
}
- bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0);
*dst_mv = temp_mv;
return bestsme;
}
-static double diamond_search_sad_new(const MACROBLOCK *x,
- const search_site_config *cfg,
- const MV *init_full_mv, MV *best_full_mv,
- int search_param, int lambda, int *num00,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs,
- int full_mv_num) {
+static int64_t diamond_search_sad_new(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ const MV *init_full_mv, MV *best_full_mv,
+ int search_param, int lambda, int *num00,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
int i, j, step;
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2089,7 +2087,7 @@
const int in_what_stride = xd->plane[0].pre[0].stride;
const uint8_t *best_address;
- double bestsad;
+ int64_t bestsad;
int best_site = -1;
int last_site = -1;
@@ -2116,11 +2114,11 @@
// Check the starting position
{
- const double mv_dist =
- fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
- const double mv_cost =
- vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
+ const int64_t mv_dist =
+ (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
bestsad = mv_dist + lambda * mv_cost;
}
@@ -2151,14 +2149,13 @@
sad_array);
for (t = 0; t < 4; t++, i++) {
- if (sad_array[t] < bestsad) {
+ const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION;
+ if (mv_dist < bestsad) {
const MV this_mv = { best_full_mv->row + ss_mv[i].row,
best_full_mv->col + ss_mv[i].col };
- const double mv_dist = sad_array[t];
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < bestsad) {
bestsad = thissad;
best_site = i;
@@ -2174,13 +2171,14 @@
if (is_mv_in(&x->mv_limits, &this_mv)) {
const uint8_t *const check_here = ss_os[i] + best_address;
- const double mv_dist =
- fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+ const int64_t mv_dist =
+ (int64_t)fn_ptr->sdf(what, what_stride, check_here,
+ in_what_stride)
+ << LOG2_PRECISION;
if (mv_dist < bestsad) {
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < bestsad) {
bestsad = thissad;
best_site = i;
@@ -2590,21 +2588,19 @@
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
-double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int lambda,
- int do_refine,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num,
- MV *best_mv) {
+int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
+ int step_param, int lambda, int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num,
+ MV *best_mv) {
int n, num00 = 0;
- double thissme;
- double bestsme;
+ int thissme;
+ int bestsme;
const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
const MV center_mv = { 0, 0 };
vpx_clear_system_state();
- bestsme =
- diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param,
- lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
+ diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda,
+ &n, fn_ptr, nb_full_mvs, full_mv_num);
bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
@@ -2618,9 +2614,9 @@
num00--;
} else {
MV temp_mv;
- thissme = diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
- step_param + n, lambda, &num00, fn_ptr,
- nb_full_mvs, full_mv_num);
+ diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+ step_param + n, lambda, &num00, fn_ptr,
+ nb_full_mvs, full_mv_num);
thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0);
// check to see if refining search is needed.
if (num00 > further_steps - n) do_refine = 0;
@@ -2636,8 +2632,8 @@
if (do_refine) {
const int search_range = 8;
MV temp_mv = *best_mv;
- thissme = vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range,
- fn_ptr, nb_full_mvs, full_mv_num);
+ vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr,
+ nb_full_mvs, full_mv_num);
thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0);
if (thissme < bestsme) {
bestsme = thissme;
@@ -2645,8 +2641,9 @@
}
}
- bestsme = (double)full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv,
- lambda, nb_full_mvs, full_mv_num);
+ full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
+ nb_full_mvs, full_mv_num);
+ bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
return bestsme;
}
#endif // CONFIG_NON_GREEDY_MV
@@ -2774,24 +2771,25 @@
}
#if CONFIG_NON_GREEDY_MV
-double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
- int lambda, int search_range,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num) {
+int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ int lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
- double best_sad;
+ int64_t best_sad;
int i, j;
vpx_clear_system_state();
{
- const double mv_dist =
- fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
- const double mv_cost =
- vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
+ const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride,
+ best_address, in_what->stride)
+ << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
best_sad = mv_dist + lambda * mv_cost;
}
@@ -2813,11 +2811,10 @@
for (j = 0; j < 4; ++j) {
const MV mv = { best_full_mv->row + neighbors[j].row,
best_full_mv->col + neighbors[j].col };
- const double mv_dist = sads[j];
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- const double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < best_sad) {
best_sad = thissad;
best_site = j;
@@ -2829,13 +2826,14 @@
best_full_mv->col + neighbors[j].col };
if (is_mv_in(&x->mv_limits, &mv)) {
- const double mv_dist =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride);
- const double mv_cost =
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
- (double)(1 << LOG2_PRECISION);
- const double thissad = mv_dist + lambda * mv_cost;
+ const int64_t mv_dist =
+ (int64_t)fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv),
+ in_what->stride)
+ << LOG2_PRECISION;
+ const int64_t mv_cost =
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ const int64_t thissad = mv_dist + lambda * mv_cost;
if (thissad < best_sad) {
best_sad = thissad;
best_site = j;
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -128,17 +128,17 @@
#if CONFIG_NON_GREEDY_MV
#define NB_MVS_NUM 4
struct TplDepStats;
-double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
- int lambda, int search_range,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num);
+int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ int lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num);
-double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int lambda,
- int do_refine,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const int_mv *nb_full_mvs, int full_mv_num,
- MV *best_mv);
+int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, int lambda,
+ int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num,
+ MV *best_mv);
int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
int mv_num);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2494,8 +2494,8 @@
const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
MV pred_mv[3];
+ int bestsme = INT_MAX;
#if CONFIG_NON_GREEDY_MV
- double bestsme;
int_mv nb_full_mvs[NB_MVS_NUM];
const int nb_full_mv_num = NB_MVS_NUM;
int gf_group_idx = cpi->twopass.gf_group.index;
@@ -2506,7 +2506,6 @@
vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
gf_rf_idx, square_bsize, nb_full_mvs);
#else // CONFIG_NON_GREEDY_MV
- int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
#endif // CONFIG_NON_GREEDY_MV
@@ -2592,11 +2591,7 @@
if (cpi->sf.enhanced_full_pixel_motion_search) {
int i;
for (i = 0; i < 3; ++i) {
-#if CONFIG_NON_GREEDY_MV
- double this_me;
-#else // CONFIG_NON_GREEDY_MV
int this_me;
-#endif // CONFIG_NON_GREEDY_MV
MV this_mv;
int diff_row;
int diff_col;