ref: d749bc7b33230f128baec7ecfa19c0d3e1607956
parent: bb407a27b2e32f89f0e9eeee2bcd0aa9d5cfea3f
parent: 037d67f684683ffad22e38ab9a6381ccfedd813f
author: Angie Chiang <angiebird@google.com>
date: Mon Jul 15 14:40:10 EDT 2019
Merge changes I9288c88d,Ib1ac6f57,I02fac56a,Id6a8b117 * changes: Use sdx8f in exhaustive_mesh_search_single_step Sync the behavior of exhaustive_mesh_search Refactor exhaustive_mesh_search_new Simplify code in exhaustive_mesh_search_new
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1533,6 +1533,7 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
+// TODO(angiebird): make sdx8f available for highbitdepth if needed
#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
@@ -1539,7 +1540,8 @@
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF;
+ cpi->fn_ptr[BT].sdx4df = SDX4DF; \
+ cpi->fn_ptr[BT].sdx8f = NULL;
#define MAKE_BFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
@@ -2438,62 +2440,67 @@
CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
cpi->source_var_thresh = 0;
cpi->frames_till_next_var_check = 0;
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].sdaf = SDAF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svaf = SVAF; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F;
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
- cpi->fn_ptr[BT].sdf = SDF; \
- cpi->fn_ptr[BT].sdaf = SDAF; \
- cpi->fn_ptr[BT].vf = VF; \
- cpi->fn_ptr[BT].svf = SVF; \
- cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF;
-
+ // TODO(angiebird): make sdx8f available for every block size
BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
- vpx_sad32x16x4d)
+ vpx_sad32x16x4d, NULL)
BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
- vpx_sad16x32x4d)
+ vpx_sad16x32x4d, NULL)
BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
- vpx_sad64x32x4d)
+ vpx_sad64x32x4d, NULL)
BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
- vpx_sad32x64x4d)
+ vpx_sad32x64x4d, NULL)
BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
- vpx_sad32x32x4d)
+ vpx_sad32x32x4d, NULL)
BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
- vpx_sad64x64x4d)
+ vpx_sad64x64x4d, NULL)
BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
- vpx_sad16x16x4d)
+ vpx_sad16x16x4d, vpx_sad16x16x8)
BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
- vpx_sad16x8x4d)
+ vpx_sad16x8x4d, vpx_sad16x8x8)
BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
- vpx_sad8x16x4d)
+ vpx_sad8x16x4d, vpx_sad8x16x8)
BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
- vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d)
+ vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
+ vpx_sad8x8x8)
BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
- vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d)
+ vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
+ NULL)
BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
- vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d)
+ vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
+ NULL)
BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
- vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d)
+ vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
+ vpx_sad4x4x8)
#if CONFIG_VP9_HIGHBITDEPTH
highbd_set_var_fns(cpi);
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1923,47 +1923,99 @@
return best_cost;
}
-static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
- int range, int step,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, int lambda,
- const int_mv *nb_full_mvs,
- int full_mv_num) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- MV fcenter_mv = { center_mv->row, center_mv->col };
+static int64_t exhaustive_mesh_search_multi_step(
+ MV *best_mv, const MV *center_mv, int range, int step,
+ const struct buf_2d *src, const struct buf_2d *pre, int lambda,
+ const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits,
+ const vp9_variance_fn_ptr_t *fn_ptr) {
int64_t best_sad;
+ int r, c;
+ int start_col, end_col, start_row, end_row;
+ *best_mv = *center_mv;
+ best_sad =
+ ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, center_mv), pre->stride)
+ << LOG2_PRECISION) +
+ lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+ start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+ start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+ end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+ end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+ for (r = start_row; r <= end_row; r += step) {
+ for (c = start_col; c <= end_col; c += step) {
+ const MV mv = { r, c };
+ int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, &mv), pre->stride)
+ << LOG2_PRECISION;
+ if (sad < best_sad) {
+ sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ }
+ return best_sad;
+}
+
+static int64_t exhaustive_mesh_search_single_step(
+ MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src,
+ const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs,
+ int full_mv_num, const MvLimits *mv_limits,
+ const vp9_variance_fn_ptr_t *fn_ptr) {
+ int64_t best_sad;
int r, c, i;
int start_col, end_col, start_row, end_row;
- int col_step = (step > 1) ? step : 4;
- assert(step >= 1);
-
- clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- *best_mv = fcenter_mv;
+ *best_mv = *center_mv;
best_sad =
- ((int64_t)fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &fcenter_mv),
- in_what->stride)
+ ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, center_mv), pre->stride)
<< LOG2_PRECISION) +
- lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num);
- start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
- start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
- end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
- end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
+ lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+ start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+ start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+ end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+ end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+ for (r = start_row; r <= end_row; r += 1) {
+ c = start_col;
+ // sdx8f may not be available some block size
+ if (fn_ptr->sdx8f) {
+ while (c + 7 <= end_col) {
+ unsigned int sads[8];
+ const MV mv = { r, c };
+ const uint8_t *buf = get_buf_from_mv(pre, &mv);
+ fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads);
- for (r = start_row; r <= end_row; r += step) {
- for (c = start_col; c <= end_col; c += col_step) {
- // Step > 1 means we are not checking every location in this pass.
- if (step > 1) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
- int64_t sad =
- (int64_t)fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride)
- << LOG2_PRECISION;
+ for (i = 0; i < 8; ++i) {
+ int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
+ if (sad < best_sad) {
+ const MV mv = { r, c + i };
+ sad += lambda *
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ c += 8;
+ }
+ }
+ while (c + 3 <= end_col) {
+ unsigned int sads[4];
+ const uint8_t *addrs[4];
+ for (i = 0; i < 4; ++i) {
+ const MV mv = { r, c + i };
+ addrs[i] = get_buf_from_mv(pre, &mv);
+ }
+ fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
+
+ for (i = 0; i < 4; ++i) {
+ int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
if (sad < best_sad) {
+ const MV mv = { r, c + i };
sad +=
lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
if (sad < best_sad) {
@@ -1971,51 +2023,46 @@
*best_mv = mv;
}
}
- } else {
- // 4 sads in a single call if we are checking every location
- if (c + 3 <= end_col) {
- unsigned int sads[4];
- const uint8_t *addrs[4];
- for (i = 0; i < 4; ++i) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- addrs[i] = get_buf_from_mv(in_what, &mv);
- }
- fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
- for (i = 0; i < 4; ++i) {
- int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
- if (sad < best_sad) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- sad += lambda *
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
- if (sad < best_sad) {
- best_sad = sad;
- *best_mv = mv;
- }
- }
- }
- } else {
- for (i = 0; i < end_col - c; ++i) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv),
- in_what->stride)
- << LOG2_PRECISION;
- if (sad < best_sad) {
- sad += lambda *
- vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
- if (sad < best_sad) {
- best_sad = sad;
- *best_mv = mv;
- }
- }
- }
+ }
+ c += 4;
+ }
+ while (c <= end_col) {
+ const MV mv = { r, c };
+ int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+ get_buf_from_mv(pre, &mv), pre->stride)
+ << LOG2_PRECISION;
+ if (sad < best_sad) {
+ sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
+ c += 1;
}
}
-
return best_sad;
+}
+
+static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
+ int range, int step,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int lambda,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *src = &x->plane[0].src;
+ const struct buf_2d *pre = &xd->plane[0].pre[0];
+ assert(step >= 1);
+ assert(is_mv_in(&x->mv_limits, center_mv));
+ if (step == 1) {
+ return exhaustive_mesh_search_single_step(
+ best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num,
+ &x->mv_limits, fn_ptr);
+ }
+ return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src,
+ pre, lambda, nb_full_mvs,
+ full_mv_num, &x->mv_limits, fn_ptr);
}
static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -76,6 +76,7 @@
vpx_subpixvariance_fn_t svf;
vpx_subp_avg_variance_fn_t svaf;
vpx_sad_multi_d_fn_t sdx4df;
+ vpx_sad_multi_fn_t sdx8f;
} vp9_variance_fn_ptr_t;
#endif // CONFIG_VP9