ref: abfa03ab937b13f9cd632a96fa92ce7723be8e39
parent: 829d1b2098b54664e793723793ac21ddc905ab5c
parent: fdfec4c7be6a0cf61806a099ce7df4ec16dd1a01
author: Jingning Han <jingning@google.com>
date: Thu Jul 12 13:00:07 EDT 2018
Merge "Change the tpl model operating block size to 32x32"
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5603,7 +5603,7 @@
vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
- vp9_full_pixel_search(cpi, x, BLOCK_8X8, &best_ref_mv1_full, step_param,
+ vp9_full_pixel_search(cpi, x, BLOCK_32X32, &best_ref_mv1_full, step_param,
search_method, sadpb, cond_cost_list(cpi, cost_list),
&best_ref_mv1, mv, 0, 0);
@@ -5613,7 +5613,7 @@
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(
x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[BLOCK_8X8], 0, mv_sf->subpel_iters_per_step,
+ &cpi->fn_ptr[BLOCK_32X32], 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
0);
@@ -5626,20 +5626,20 @@
int width = 0, height = 0;
switch (block) {
case 0:
- width = grid_pos_col + MI_SIZE - ref_pos_col;
- height = grid_pos_row + MI_SIZE - ref_pos_row;
+ width = grid_pos_col + 4 * MI_SIZE - ref_pos_col;
+ height = grid_pos_row + 4 * MI_SIZE - ref_pos_row;
break;
case 1:
- width = ref_pos_col + MI_SIZE - grid_pos_col;
- height = grid_pos_row + MI_SIZE - ref_pos_row;
+ width = ref_pos_col + 4 * MI_SIZE - grid_pos_col;
+ height = grid_pos_row + 4 * MI_SIZE - ref_pos_row;
break;
case 2:
- width = grid_pos_col + MI_SIZE - ref_pos_col;
- height = ref_pos_row + MI_SIZE - grid_pos_row;
+ width = grid_pos_col + 4 * MI_SIZE - ref_pos_col;
+ height = ref_pos_row + 4 * MI_SIZE - grid_pos_row;
break;
case 3:
- width = ref_pos_col + MI_SIZE - grid_pos_col;
- height = ref_pos_row + MI_SIZE - grid_pos_row;
+ width = ref_pos_col + 4 * MI_SIZE - grid_pos_col;
+ height = ref_pos_row + 4 * MI_SIZE - grid_pos_row;
break;
default: assert(0);
}
@@ -5647,18 +5647,18 @@
return overlap_area = width * height;
}
-int round_floor(int ref_pos) {
+int round_floor(int ref_pos, int bsize_pix) {
int round;
if (ref_pos < 0)
- round = -(1 + (-ref_pos - 1) / MI_SIZE);
+ round = -(1 + (-ref_pos - 1) / bsize_pix);
else
- round = ref_pos / MI_SIZE;
+ round = ref_pos / bsize_pix;
return round;
}
void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
- int mi_row, int mi_col) {
+ int mi_row, int mi_col, const BLOCK_SIZE bsize) {
TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
MV mv = tpl_stats->mv.as_mv;
@@ -5668,21 +5668,27 @@
int ref_pos_row = mi_row * MI_SIZE + mv_row;
int ref_pos_col = mi_col * MI_SIZE + mv_col;
- // top-left on grid block location
- int grid_pos_row_base = round_floor(ref_pos_row) * MI_SIZE;
- int grid_pos_col_base = round_floor(ref_pos_col) * MI_SIZE;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int pix_num = bw * bh;
+
+ // top-left on grid block location in pixel
+ int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
+ int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
int block;
for (block = 0; block < 4; ++block) {
- int grid_pos_row = grid_pos_row_base + MI_SIZE * (block >> 1);
- int grid_pos_col = grid_pos_col_base + MI_SIZE * (block & 0x01);
+ int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
+ int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
int overlap_area = get_overlap_area(grid_pos_row, grid_pos_col,
ref_pos_row, ref_pos_col, block);
- int ref_mi_row = round_floor(grid_pos_row);
- int ref_mi_col = round_floor(grid_pos_col);
+ int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
+ int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
int64_t mc_flow = tpl_stats->mc_dep_cost -
(tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
@@ -5689,11 +5695,11 @@
tpl_stats->intra_cost;
ref_stats[ref_mi_row * ref_tpl_frame->stride + ref_mi_col].mc_flow +=
- (mc_flow * overlap_area) >> (MI_SIZE_LOG2 * 2);
+ (mc_flow * overlap_area) / pix_num;
ref_stats[ref_mi_row * ref_tpl_frame->stride + ref_mi_col].mc_ref_cost +=
- ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) >>
- (MI_SIZE_LOG2 * 2);
+ ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
+ pix_num;
assert(overlap_area >= 0);
}
}
@@ -5713,20 +5719,25 @@
int mi_row, mi_col;
const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
- // TODO(jingning): Let's keep the buffer size to support 16x16 pixel block,
- // in case we would like to increase the operating block size.
#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
- DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
uint8_t *predictor;
#else
- DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
#endif
- DECLARE_ALIGNED(16, int16_t, src_diff[16 * 16]);
- DECLARE_ALIGNED(16, tran_low_t, coeff[16 * 16]);
+ DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
MODE_INFO mi_above, mi_left;
+ const BLOCK_SIZE bsize = BLOCK_32X32;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int pix_num = bw * bh;
+
// Setup scaling factor
#if CONFIG_VP9_HIGHBITDEPTH
vp9_setup_scale_factors_for_frame(
@@ -5761,12 +5772,12 @@
vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);
tpl_frame->is_valid = 1;
- for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
// Motion estimation row boundary
x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
x->mv_limits.row_max =
(cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
- for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
int mb_y_offset =
mi_row * MI_SIZE * this_frame->y_stride + mi_col * MI_SIZE;
int best_rf_idx = -1;
@@ -5793,9 +5804,9 @@
src_stride = this_frame->y_stride;
dst = &predictor[0];
- dst_stride = MI_SIZE;
+ dst_stride = bw;
- xd->mi[0]->sb_type = BLOCK_8X8;
+ xd->mi[0]->sb_type = BLOCK_32X32;
xd->mi[0]->ref_frame[0] = INTRA_FRAME;
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
@@ -5804,16 +5815,16 @@
xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
- vp9_predict_intra_block(xd, b_width_log2_lookup[BLOCK_8X8], TX_8X8,
+ vp9_predict_intra_block(xd, b_width_log2_lookup[BLOCK_32X32], TX_32X32,
mode, src, src_stride, dst, dst_stride, 0, 0,
0);
- vpx_subtract_block(MI_SIZE, MI_SIZE, src_diff, MI_SIZE, src, src_stride,
- dst, dst_stride);
+ vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
+ dst_stride);
- vpx_hadamard_8x8(src_diff, MI_SIZE, coeff);
+ vpx_fdct32x32(src_diff, coeff, bw);
- intra_cost = vpx_satd(coeff, MI_SIZE * MI_SIZE);
+ intra_cost = vpx_satd(coeff, pix_num);
if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
}
@@ -5844,35 +5855,33 @@
vp9_highbd_build_inter_predictor(
CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]),
- MI_SIZE, &mv.as_mv, &sf, MI_SIZE, MI_SIZE, 0, kernel,
- MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
- vpx_highbd_subtract_block(MI_SIZE, MI_SIZE, src_diff, MI_SIZE,
- this_frame->y_buffer + mb_y_offset,
- this_frame->y_stride, &predictor[0],
- MI_SIZE, xd->bd);
+ bw, &mv.as_mv, &sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
+ vpx_highbd_subtract_block(
+ bh, bw, src_diff, bw, this_frame->y_buffer + mb_y_offset,
+ this_frame->y_stride, &predictor[0], bw, xd->bd);
} else {
vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
ref_frame[rf_idx]->y_stride, &predictor[0],
- MI_SIZE, &mv.as_mv, &sf, MI_SIZE, MI_SIZE,
- 0, kernel, MV_PRECISION_Q3,
- mi_col * MI_SIZE, mi_row * MI_SIZE);
- vpx_subtract_block(MI_SIZE, MI_SIZE, src_diff, MI_SIZE,
+ bw, &mv.as_mv, &sf, bw, bh, 0, kernel,
+ MV_PRECISION_Q3, mi_col * MI_SIZE,
+ mi_row * MI_SIZE);
+ vpx_subtract_block(bh, bw, src_diff, bw,
this_frame->y_buffer + mb_y_offset,
- this_frame->y_stride, &predictor[0], MI_SIZE);
+ this_frame->y_stride, &predictor[0], bw);
}
#else
- vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
- ref_frame[rf_idx]->y_stride, &predictor[0],
- MI_SIZE, &mv.as_mv, &sf, MI_SIZE, MI_SIZE, 0,
- kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
- mi_row * MI_SIZE);
- vpx_subtract_block(MI_SIZE, MI_SIZE, src_diff, MI_SIZE,
+ vp9_build_inter_predictor(
+ ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, &sf, bw,
+ bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
+ vpx_subtract_block(bh, bw, src_diff, bw,
this_frame->y_buffer + mb_y_offset,
- this_frame->y_stride, &predictor[0], MI_SIZE);
+ this_frame->y_stride, &predictor[0], bw);
#endif
- vpx_hadamard_8x8(src_diff, MI_SIZE, coeff);
+ vpx_fdct32x32(src_diff, coeff, bw);
- inter_cost = vpx_satd(coeff, MI_SIZE * MI_SIZE);
+ inter_cost = vpx_satd(coeff, pix_num);
if (inter_cost < best_inter_cost) {
best_rf_idx = rf_idx;
@@ -5890,7 +5899,7 @@
tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
tpl_stats->mv.as_int = best_mv.as_int;
- tpl_model_update(cpi->tpl_stats, tpl_stats, mi_row, mi_col);
+ tpl_model_update(cpi->tpl_stats, tpl_stats, mi_row, mi_col, bsize);
(void)best_mv;
(void)best_rf_idx;
}
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -279,11 +279,11 @@
}
typedef struct TplDepStats {
- uint64_t intra_cost;
- uint64_t inter_cost;
- uint64_t mc_flow;
- uint64_t mc_dep_cost;
- uint64_t mc_ref_cost;
+ int64_t intra_cost;
+ int64_t inter_cost;
+ int64_t mc_flow;
+ int64_t mc_dep_cost;
+ int64_t mc_ref_cost;
int ref_frame_index;
int_mv mv;