ref: c1b024b48861dcd788a3419e556eb31bb727d5d1
parent: 5818014b691c1dc20f3597bbcbf165782b54eacb
author: Ritu Baldwa <ritu.baldwa@ittiam.com>
date: Wed Jan 23 04:39:06 EST 2019
Modify map read/write to sync logic in row_mt case Adds conditional wait/signal instead of sched_yield. Change-Id: I49a760eacdd6b6ac690e797ea5f10febf6a1a084
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1645,32 +1645,36 @@
}
}
-static void map_write(RowMTWorkerData *row_mt_worker_data, int idx) {
+static void map_write(RowMTWorkerData *const row_mt_worker_data, int map_idx,
+ int sync_idx) {
#if CONFIG_MULTITHREAD
- pthread_mutex_lock(&row_mt_worker_data->map_mutex);
- row_mt_worker_data->recon_map[idx] = 1;
- pthread_mutex_unlock(&row_mt_worker_data->map_mutex);
+ pthread_mutex_lock(&row_mt_worker_data->recon_sync_mutex[sync_idx]);
+ row_mt_worker_data->recon_map[map_idx] = 1;
+ pthread_cond_signal(&row_mt_worker_data->recon_sync_cond[sync_idx]);
+ pthread_mutex_unlock(&row_mt_worker_data->recon_sync_mutex[sync_idx]);
#else
(void)row_mt_worker_data;
- (void)idx;
-#endif
+ (void)map_idx;
+ (void)sync_idx;
+#endif // CONFIG_MULTITHREAD
}
-static void map_read(RowMTWorkerData *row_mt_worker_data, int idx) {
+static void map_read(RowMTWorkerData *const row_mt_worker_data, int map_idx,
+ int sync_idx) {
#if CONFIG_MULTITHREAD
- volatile int8_t *map = row_mt_worker_data->recon_map + idx;
- pthread_mutex_lock(&row_mt_worker_data->map_mutex);
- // TODO(ritu.baldwa): Replace this with a condition variable
- while (!*map) {
- pthread_mutex_unlock(&row_mt_worker_data->map_mutex);
- sched_yield();
- pthread_mutex_lock(&row_mt_worker_data->map_mutex);
+ volatile int8_t *map = row_mt_worker_data->recon_map + map_idx;
+ pthread_mutex_t *const mutex =
+ &row_mt_worker_data->recon_sync_mutex[sync_idx];
+ pthread_mutex_lock(mutex);
+ while (!(*map)) {
+ pthread_cond_wait(&row_mt_worker_data->recon_sync_cond[sync_idx], mutex);
}
- pthread_mutex_unlock(&row_mt_worker_data->map_mutex);
+ pthread_mutex_unlock(mutex);
#else
(void)row_mt_worker_data;
- (void)idx;
-#endif
+ (void)map_idx;
+ (void)sync_idx;
+#endif // CONFIG_MULTITHREAD
}
static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) {
@@ -1699,10 +1703,10 @@
int terminate;
RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data;
const int all_parse_done = 1 << pbi->common.log2_tile_cols;
- pthread_mutex_lock(&row_mt_worker_data->recon_mutex);
+ pthread_mutex_lock(&row_mt_worker_data->recon_done_mutex);
row_mt_worker_data->num_tiles_done++;
terminate = all_parse_done == row_mt_worker_data->num_tiles_done;
- pthread_mutex_unlock(&row_mt_worker_data->recon_mutex);
+ pthread_mutex_unlock(&row_mt_worker_data->recon_done_mutex);
if (terminate) {
vp9_jobq_terminate(&row_mt_worker_data->jobq);
}
@@ -1729,7 +1733,8 @@
}
static void recon_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi,
- int mi_row, int is_last_row, VP9LfSync *lf_sync) {
+ int mi_row, int is_last_row, VP9LfSync *lf_sync,
+ int cur_tile_col) {
VP9_COMMON *const cm = &pbi->common;
RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -1749,7 +1754,8 @@
// Top Dependency
if (cur_sb_row) {
- map_read(row_mt_worker_data, ((cur_sb_row - 1) * sb_cols) + c);
+ map_read(row_mt_worker_data, ((cur_sb_row - 1) * sb_cols) + c,
+ ((cur_sb_row - 1) * tile_cols) + cur_tile_col);
}
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
@@ -1786,7 +1792,8 @@
}
}
}
- map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c);
+ map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c,
+ (cur_sb_row * tile_cols) + cur_tile_col);
}
}
@@ -1840,6 +1847,7 @@
const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
+ const int tile_cols = 1 << cm->log2_tile_cols;
Job job;
LFWorkerData *lf_data = thread_data->lf_data;
VP9LfSync *lf_sync = thread_data->lf_sync;
@@ -1877,7 +1885,8 @@
for (mi_col = mi_col_start; mi_col < mi_col_end;
mi_col += MI_BLOCK_SIZE) {
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
- map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c);
+ map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c,
+ (cur_sb_row * tile_cols) + job.tile_col);
}
if (is_last_row) {
vp9_tile_done(pbi);
@@ -1888,7 +1897,8 @@
tile_data_recon->error_info.setjmp = 1;
tile_data_recon->xd.error_info = &tile_data_recon->error_info;
- recon_tile_row(tile_data_recon, pbi, mi_row, is_last_row, lf_sync);
+ recon_tile_row(tile_data_recon, pbi, mi_row, is_last_row, lf_sync,
+ job.tile_col);
if (corrupted)
vpx_internal_error(&tile_data_recon->error_info,
@@ -2756,13 +2766,14 @@
setup_tile_info(cm, rb);
if (pbi->row_mt == 1) {
int num_sbs = 1;
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
if (pbi->row_mt_worker_data == NULL) {
CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data,
vpx_calloc(1, sizeof(*pbi->row_mt_worker_data)));
#if CONFIG_MULTITHREAD
- pthread_mutex_init(&pbi->row_mt_worker_data->recon_mutex, NULL);
- pthread_mutex_init(&pbi->row_mt_worker_data->map_mutex, NULL);
+ pthread_mutex_init(&pbi->row_mt_worker_data->recon_done_mutex, NULL);
#endif
}
@@ -2769,8 +2780,6 @@
if (pbi->max_threads > 1) {
const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2;
- const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
- const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
num_sbs = sb_cols * sb_rows;
}
@@ -2778,7 +2787,7 @@
if (num_sbs > pbi->row_mt_worker_data->num_sbs) {
vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs,
- pbi->max_threads);
+ pbi->max_threads, sb_rows << cm->log2_tile_cols);
}
vp9_jobq_alloc(pbi);
}
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -56,10 +56,34 @@
}
void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
- VP9_COMMON *cm, int num_sbs, int max_threads) {
+ VP9_COMMON *cm, int num_sbs, int max_threads,
+ int num_jobs) {
int plane;
const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) *
sizeof(*row_mt_worker_data->dqcoeff[0]);
+ row_mt_worker_data->num_jobs = num_jobs;
+#if CONFIG_MULTITHREAD
+ {
+ int i;
+ CHECK_MEM_ERROR(
+ cm, row_mt_worker_data->recon_sync_mutex,
+ vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs));
+ if (row_mt_worker_data->recon_sync_mutex) {
+ for (i = 0; i < num_jobs; ++i) {
+ pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(
+ cm, row_mt_worker_data->recon_sync_cond,
+ vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs));
+ if (row_mt_worker_data->recon_sync_cond) {
+ for (i = 0; i < num_jobs; ++i) {
+ pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL);
+ }
+ }
+ }
+#endif
row_mt_worker_data->num_sbs = num_sbs;
for (plane = 0; plane < 3; ++plane) {
CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
@@ -87,6 +111,23 @@
void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) {
if (row_mt_worker_data != NULL) {
int plane;
+#if CONFIG_MULTITHREAD
+ int i;
+ if (row_mt_worker_data->recon_sync_mutex != NULL) {
+ for (i = 0; i < row_mt_worker_data->num_jobs; ++i) {
+ pthread_mutex_destroy(&row_mt_worker_data->recon_sync_mutex[i]);
+ }
+ vpx_free(row_mt_worker_data->recon_sync_mutex);
+ row_mt_worker_data->recon_sync_mutex = NULL;
+ }
+ if (row_mt_worker_data->recon_sync_cond != NULL) {
+ for (i = 0; i < row_mt_worker_data->num_jobs; ++i) {
+ pthread_cond_destroy(&row_mt_worker_data->recon_sync_cond[i]);
+ }
+ vpx_free(row_mt_worker_data->recon_sync_cond);
+ row_mt_worker_data->recon_sync_cond = NULL;
+ }
+#endif
for (plane = 0; plane < 3; ++plane) {
vpx_free(row_mt_worker_data->eob[plane]);
row_mt_worker_data->eob[plane] = NULL;
@@ -193,8 +234,7 @@
vp9_jobq_deinit(&pbi->row_mt_worker_data->jobq);
vpx_free(pbi->row_mt_worker_data->jobq_buf);
#if CONFIG_MULTITHREAD
- pthread_mutex_destroy(&pbi->row_mt_worker_data->recon_mutex);
- pthread_mutex_destroy(&pbi->row_mt_worker_data->map_mutex);
+ pthread_mutex_destroy(&pbi->row_mt_worker_data->recon_done_mutex);
#endif
}
vpx_free(pbi->row_mt_worker_data);
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -74,9 +74,11 @@
JobQueueRowMt jobq;
size_t jobq_size;
int num_tiles_done;
+ int num_jobs;
#if CONFIG_MULTITHREAD
- pthread_mutex_t recon_mutex;
- pthread_mutex_t map_mutex;
+ pthread_mutex_t recon_done_mutex;
+ pthread_mutex_t *recon_sync_mutex;
+ pthread_cond_t *recon_sync_cond;
#endif
ThreadData *thread_data;
} RowMTWorkerData;
@@ -159,7 +161,8 @@
void vp9_decoder_remove(struct VP9Decoder *pbi);
void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
- VP9_COMMON *cm, int num_sbs, int max_threads);
+ VP9_COMMON *cm, int num_sbs, int max_threads,
+ int num_jobs);
void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data);
static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,