shithub: libvpx

Download patch

ref: 7cb4ab56eaa27b8b08b745dbc33d8c087050bd4e
parent: bfc2a7e3a04908e5d82f656bf1723ad4eddff986
author: Scott LaVarnway <slavarnway@google.com>
date: Thu Apr 7 07:44:27 EDT 2016

VP9: Combine TileData with TileWorkerData

Change-Id: I83536734a54ef7b85f90f56a51878d94fac7ff22

--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1460,7 +1460,7 @@
   TileBuffer tile_buffers[4][1 << 6];
   int tile_row, tile_col;
   int mi_row, mi_col;
-  TileData *tile_data = NULL;
+  TileWorkerData *tile_data = NULL;
 
   if (cm->lf.filter_level && !cm->skip_loop_filter &&
       pbi->lf_worker.data1 == NULL) {
@@ -1496,28 +1496,17 @@
 
   get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
 
-  if (pbi->tile_data == NULL ||
-      (tile_cols * tile_rows) != pbi->total_tiles) {
-    vpx_free(pbi->tile_data);
-    CHECK_MEM_ERROR(
-        cm,
-        pbi->tile_data,
-        vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data))));
-    pbi->total_tiles = tile_rows * tile_cols;
-  }
-
   // Load all tile information into tile_data.
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
       const TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
-      tile_data = pbi->tile_data + tile_cols * tile_row + tile_col;
-      tile_data->cm = cm;
+      tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col;
       tile_data->xd = pbi->mb;
       tile_data->xd.corrupted = 0;
-      tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
-                             NULL : &cm->counts;
+      tile_data->xd.counts =
+          cm->frame_parallel_decoding_mode ? NULL : &cm->counts;
       vp9_zero(tile_data->dqcoeff);
-      vp9_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col);
+      vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col);
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                           &tile_data->bit_reader, pbi->decrypt_cb,
                           pbi->decrypt_state);
@@ -1533,8 +1522,8 @@
       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
         const int col = pbi->inv_tile_order ?
                         tile_cols - tile_col - 1 : tile_col;
-        tile_data = pbi->tile_data + tile_cols * tile_row + col;
-        vp9_tile_set_col(&tile, tile_data->cm, col);
+        tile_data = pbi->tile_worker_data + tile_cols * tile_row + col;
+        vp9_tile_set_col(&tile, cm, col);
         vp9_zero(tile_data->xd.left_context);
         vp9_zero(tile_data->xd.left_seg_context);
         for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
@@ -1586,7 +1575,7 @@
   }
 
   // Get last tile data.
-  tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
+  tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1;
 
   if (pbi->frame_parallel_decode)
     vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
@@ -1671,12 +1660,6 @@
     const int num_threads = pbi->max_threads;
     CHECK_MEM_ERROR(cm, pbi->tile_workers,
                     vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
-    // Ensure tile data offsets will be properly aligned. This may fail on
-    // platforms without DECLARE_ALIGNED().
-    assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
-    CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
-                    vpx_memalign(32, num_threads *
-                                 sizeof(*pbi->tile_worker_data)));
     for (n = 0; n < num_threads; ++n) {
       VPxWorker *const worker = &pbi->tile_workers[n];
       ++pbi->num_tile_workers;
@@ -1692,7 +1675,8 @@
   // Reset tile decoding hook
   for (n = 0; n < num_workers; ++n) {
     VPxWorker *const worker = &pbi->tile_workers[n];
-    TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
+    TileWorkerData *const tile_data =
+        &pbi->tile_worker_data[n + pbi->total_tiles];
     winterface->sync(worker);
     tile_data->xd = pbi->mb;
     tile_data->xd.counts =
@@ -2219,6 +2203,19 @@
     // Signal the main thread that context is ready.
     vp9_frameworker_signal_stats(worker);
     vp9_frameworker_unlock_stats(worker);
+  }
+
+  if (pbi->tile_worker_data == NULL ||
+      (tile_cols * tile_rows) != pbi->total_tiles) {
+    const int num_tile_workers = tile_cols * tile_rows +
+        ((pbi->max_threads > 1) ? pbi->max_threads : 0);
+    const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data);
+    // Ensure tile data offsets will be properly aligned. This may fail on
+    // platforms without DECLARE_ALIGNED().
+    assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
+    vpx_free(pbi->tile_worker_data);
+    CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size));
+    pbi->total_tiles = tile_rows * tile_cols;
   }
 
   if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -131,11 +131,12 @@
 
   vpx_get_worker_interface()->end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
-  vpx_free(pbi->tile_data);
+
   for (i = 0; i < pbi->num_tile_workers; ++i) {
     VPxWorker *const worker = &pbi->tile_workers[i];
     vpx_get_worker_interface()->end(worker);
   }
+
   vpx_free(pbi->tile_worker_data);
   vpx_free(pbi->tile_workers);
 
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -27,15 +27,6 @@
 extern "C" {
 #endif
 
-// TODO(hkuang): combine this with TileWorkerData.
-typedef struct TileData {
-  VP9_COMMON *cm;
-  vpx_reader bit_reader;
-  DECLARE_ALIGNED(16, MACROBLOCKD, xd);
-  /* dqcoeff are shared by all the planes. So planes must be decoded serially */
-  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
-} TileData;
-
 typedef struct TileBuffer {
   const uint8_t *data;
   size_t size;
@@ -74,8 +65,6 @@
   TileWorkerData *tile_worker_data;
   TileBuffer tile_buffers[64];
   int num_tile_workers;
-
-  TileData *tile_data;
   int total_tiles;
 
   VP9LfSync lf_row_sync;