shithub: libvpx

Download patch

ref: 351ec07a6da930b5751cc84586f15ed65f1f9033
parent: 4da98b0cc3a539d1be028d0c4c87d7cd3d718781
author: Ritu Baldwa <ritu.baldwa@ittiam.com>
date: Wed Oct 10 12:25:51 EDT 2018

Add Memory to Enable Row Decode

Row based multi-thread needs extra memory to store the parsed
co-efficients, partitions and eob. This commit adds memory for the same.

Change-Id: I13fa4a6ada2ec3048bc973e465055b832429388f

--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1984,6 +1984,28 @@
   setup_segmentation_dequant(cm);
 
   setup_tile_info(cm, rb);
+  if (pbi->row_mt == 1) {
+    int num_sbs = 1;
+
+    if (pbi->row_mt_worker_data == NULL) {
+      CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data,
+                      vpx_calloc(1, sizeof(*pbi->row_mt_worker_data)));
+    }
+
+    if (pbi->max_threads > 1) {
+      const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+      const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2;
+      const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+      const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
+
+      num_sbs = sb_cols * sb_rows;
+    }
+
+    if (num_sbs > pbi->row_mt_worker_data->num_sbs) {
+      vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
+      vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs);
+    }
+  }
   sz = vpx_rb_read_literal(rb, 16);
 
   if (sz == 0)
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -55,6 +55,43 @@
          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 }
 
+void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
+                              VP9_COMMON *cm, int num_sbs) {
+  int plane;
+  const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) *
+                              sizeof(*row_mt_worker_data->dqcoeff[0]);
+  row_mt_worker_data->num_sbs = num_sbs;
+  for (plane = 0; plane < 3; ++plane) {
+    CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
+                    vpx_memalign(16, dqcoeff_size));
+    memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size);
+    CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane],
+                    vpx_calloc(num_sbs << EOBS_PER_SB_LOG2,
+                               sizeof(*row_mt_worker_data->eob[plane])));
+  }
+  CHECK_MEM_ERROR(cm, row_mt_worker_data->partition,
+                  vpx_calloc(num_sbs * PARTITIONS_PER_SB,
+                             sizeof(*row_mt_worker_data->partition)));
+  CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map,
+                  vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map)));
+}
+
+void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) {
+  if (row_mt_worker_data != NULL) {
+    int plane;
+    for (plane = 0; plane < 3; ++plane) {
+      vpx_free(row_mt_worker_data->eob[plane]);
+      row_mt_worker_data->eob[plane] = NULL;
+      vpx_free(row_mt_worker_data->dqcoeff[plane]);
+      row_mt_worker_data->dqcoeff[plane] = NULL;
+    }
+    vpx_free(row_mt_worker_data->partition);
+    row_mt_worker_data->partition = NULL;
+    vpx_free(row_mt_worker_data->recon_map);
+    row_mt_worker_data->recon_map = NULL;
+  }
+}
+
 static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
   if (!cm->mip) return 1;
@@ -140,6 +177,10 @@
     vp9_loop_filter_dealloc(&pbi->lf_row_sync);
   }
 
+  if (pbi->row_mt == 1) {
+    vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
+    vpx_free(pbi->row_mt_worker_data);
+  }
   vp9_remove_common(&pbi->common);
   vpx_free(pbi);
 }
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -26,6 +26,10 @@
 extern "C" {
 #endif
 
+#define EOBS_PER_SB_LOG2 8
+#define DQCOEFFS_PER_SB_LOG2 12
+#define PARTITIONS_PER_SB 85
+
 typedef struct TileBuffer {
   const uint8_t *data;
   size_t size;
@@ -45,6 +49,14 @@
   struct vpx_internal_error_info error_info;
 } TileWorkerData;
 
+typedef struct RowMTWorkerData {
+  int num_sbs;
+  int *eob[MAX_MB_PLANE];
+  PARTITION_TYPE *partition;
+  tran_low_t *dqcoeff[MAX_MB_PLANE];
+  int8_t *recon_map;
+} RowMTWorkerData;
+
 typedef struct VP9Decoder {
   DECLARE_ALIGNED(16, MACROBLOCKD, mb);
 
@@ -77,6 +89,7 @@
 
   int row_mt;
   int lpf_mt_opt;
+  RowMTWorkerData *row_mt_worker_data;
 } VP9Decoder;
 
 int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size,
@@ -113,6 +126,10 @@
 struct VP9Decoder *vp9_decoder_create(BufferPool *const pool);
 
 void vp9_decoder_remove(struct VP9Decoder *pbi);
+
+void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
+                              VP9_COMMON *cm, int num_sbs);
+void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data);
 
 static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
                                       BufferPool *const pool) {