ref: dbc5090b5e9cb262aa575fb5d41314ecfefff6eb
parent: 2a21b45fdcb82cac39ba36ab5cf55b433b45323a
parent: 12ec948490c61bf234febc84196216442529180b
author: Yunqing Wang <yunqingwang@google.com>
date: Fri Feb 3 20:02:29 EST 2017
Merge "Changes to facilitate multi-threading of encoding stage"
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -484,23 +484,31 @@
}
static void write_modes(VP9_COMP *cpi, MACROBLOCKD *const xd,
- const TileInfo *const tile, vpx_writer *w,
- TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
- unsigned int *const max_mv_magnitude,
+ const TileInfo *const tile, vpx_writer *w, int tile_row,
+ int tile_col, unsigned int *const max_mv_magnitude,
int interp_filter_selected[MAX_REF_FRAMES]
[SWITCHABLE]) {
const VP9_COMMON *const cm = &cpi->common;
- int mi_row, mi_col;
+ int mi_row, mi_col, tile_sb_row;
+ TOKENEXTRA *tok = NULL;
+ TOKENEXTRA *tok_end = NULL;
set_partition_probs(cm, xd);
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
+ tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile->mi_row_start) >>
+ MI_BLOCK_SIZE_LOG2;
+ tok = cpi->tplist[tile_row][tile_col][tile_sb_row].start;
+ tok_end = tok + cpi->tplist[tile_row][tile_col][tile_sb_row].count;
+
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
+ write_modes_sb(cpi, xd, tile, w, &tok, tok_end, mi_row, mi_col,
BLOCK_64X64, max_mv_magnitude, interp_filter_selected);
+
+ assert(tok == cpi->tplist[tile_row][tile_col][tile_sb_row].stop);
}
}
@@ -919,9 +927,8 @@
MACROBLOCKD *const xd = &data->xd;
vpx_start_encode(&data->bit_writer, data->dest);
write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
- &data->bit_writer, &data->tok, data->tok_end,
- &data->max_mv_magnitude, data->interp_filter_selected);
- assert(data->tok == data->tok_end);
+ &data->bit_writer, 0, data->tile_idx, &data->max_mv_magnitude,
+ data->interp_filter_selected);
vpx_stop_encode(&data->bit_writer);
return 1;
}
@@ -978,8 +985,6 @@
// Populate the worker data.
data->xd = cpi->td.mb.e_mbd;
data->tile_idx = tile_col;
- data->tok = cpi->tile_tok[0][tile_col];
- data->tok_end = cpi->tile_tok[0][tile_col] + cpi->tok_count[0][tile_col];
data->max_mv_magnitude = cpi->max_mv_magnitude;
memset(data->interp_filter_selected, 0,
sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE);
@@ -1039,7 +1044,6 @@
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
vpx_writer residual_bc;
int tile_row, tile_col;
- TOKENEXTRA *tok_end;
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1058,11 +1062,7 @@
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
int tile_idx = tile_row * tile_cols + tile_col;
- TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
- tok_end = cpi->tile_tok[tile_row][tile_col] +
- cpi->tok_count[tile_row][tile_col];
-
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
else
@@ -1069,9 +1069,9 @@
vpx_start_encode(&residual_bc, data_ptr + total_size);
write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc,
- &tok, tok_end, &cpi->max_mv_magnitude,
+ tile_row, tile_col, &cpi->max_mv_magnitude,
cpi->interp_filter_selected);
- assert(tok == tok_end);
+
vpx_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
// size of this tile
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -20,8 +20,6 @@
typedef struct VP9BitstreamWorkerData {
uint8_t *dest;
int dest_size;
- TOKENEXTRA *tok;
- TOKENEXTRA *tok_end;
vpx_writer bit_writer;
int tile_idx;
unsigned int max_mv_magnitude;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4078,7 +4078,9 @@
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row;
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
+ TOKENLIST *tplist = cpi->tplist[0][0];
int tile_tok = 0;
+ int tplist_count = 0;
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
@@ -4109,10 +4111,44 @@
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
pre_tok = cpi->tile_tok[tile_row][tile_col];
tile_tok = allocated_tokens(*tile_info);
+
+ cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
+ tplist = cpi->tplist[tile_row][tile_col];
+ tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
}
}
}
+void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
+ int tile_col, int mi_row) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ const TileInfo *const tile_info = &this_tile->tile_info;
+ TOKENEXTRA *tok = NULL;
+ int tile_sb_row;
+ int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
+
+ tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
+ MI_BLOCK_SIZE_LOG2;
+ get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
+ cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
+
+ if (cpi->sf.use_nonrd_pick_mode)
+ encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
+ else
+ encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
+
+ cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
+ cpi->tplist[tile_row][tile_col][tile_sb_row].count =
+ (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
+ cpi->tplist[tile_row][tile_col][tile_sb_row].start);
+ assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
+ get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
+
+ (void)tile_mb_cols;
+}
+
void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
int tile_col) {
VP9_COMMON *const cm = &cpi->common;
@@ -4119,7 +4155,6 @@
const int tile_cols = 1 << cm->log2_tile_cols;
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
const TileInfo *const tile_info = &this_tile->tile_info;
- TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
const int mi_row_start = tile_info->mi_row_start;
const int mi_row_end = tile_info->mi_row_end;
int mi_row;
@@ -4130,16 +4165,8 @@
td->mb.m_search_count_ptr = &this_tile->m_search_count;
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
- for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) {
- if (cpi->sf.use_nonrd_pick_mode)
- encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
- else
- encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
- }
- cpi->tok_count[tile_row][tile_col] =
- (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
- assert(tok - cpi->tile_tok[tile_row][tile_col] <=
- allocated_tokens(*tile_info));
+ for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
+ vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
}
static void encode_tiles(VP9_COMP *cpi) {
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -495,6 +495,9 @@
vpx_free(cpi->tile_tok[0][0]);
cpi->tile_tok[0][0] = 0;
+ vpx_free(cpi->tplist[0][0]);
+ cpi->tplist[0][0] = NULL;
+
vp9_free_pc_tree(&cpi->td);
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
@@ -829,6 +832,7 @@
static void alloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
+ int sb_rows;
vp9_alloc_context_buffers(cm, cm->width, cm->height);
@@ -841,6 +845,12 @@
CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
}
+
+ sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ vpx_free(cpi->tplist[0][0]);
+ CHECK_MEM_ERROR(
+ cm, cpi->tplist[0][0],
+ vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
vp9_setup_pc_tree(&cpi->common, &cpi->td);
}
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -284,6 +284,12 @@
#endif
} RowMTInfo;
+typedef struct {
+ TOKENEXTRA *start;
+ TOKENEXTRA *stop;
+ unsigned int count;
+} TOKENLIST;
+
typedef struct MultiThreadHandle {
int allocated_tile_rows;
int allocated_tile_cols;
@@ -478,6 +484,7 @@
TOKENEXTRA *tile_tok[4][1 << 6];
uint32_t tok_count[4][1 << 6];
+ TOKENLIST *tplist[4][1 << 6];
// Ambient reconstruction err target for force key frames
int64_t ambient_err;
@@ -784,6 +791,20 @@
int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
return get_token_alloc(tile_mb_rows, tile_mb_cols);
+}
+
+static INLINE void get_start_tok(VP9_COMP *cpi, int tile_row, int tile_col,
+ int mi_row, TOKENEXTRA **tok) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ const TileInfo *const tile_info = &this_tile->tile_info;
+
+ int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
+ const int mb_row = (mi_row - tile_info->mi_row_start) >> 1;
+
+ *tok =
+ cpi->tile_tok[tile_row][tile_col] + get_token_alloc(mb_row, tile_mb_cols);
}
int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);