shithub: libvpx

Download patch

ref: 753fd86e86ac727dccac88376260b8f54502f2a3
parent: 0bfab06084e44a62067aeabf43ba8087cee4a3ae
author: James Zern <jzern@google.com>
date: Thu Aug 30 05:58:02 EDT 2018

Revert "Loopfilter MultiThread Optimization"

This reverts commit dafe064289a917977439ab6f4f002b9946496084.

Corrupted files may cause the decoder to hang as row progress in the
loopfilter is used to progress each thread.

BUG=webm:1558

Change-Id: I0674ce9af14d3fb7b2da8124e7b600616c8e734a

--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -258,8 +258,6 @@
   PARTITION_CONTEXT *above_seg_context;
   ENTROPY_CONTEXT *above_context;
   int above_context_alloc_cols;
-
-  int lf_row;
 } VP9_COMMON;
 
 static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -229,26 +229,6 @@
                       workers, num_workers, lf_sync);
 }
 
-void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
-                     int num_workers) {
-  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
-
-  if (!frame_filter_level) return;
-
-  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
-      num_workers > lf_sync->num_workers) {
-    vp9_loop_filter_dealloc(lf_sync);
-    vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
-  }
-
-  // Initialize cur_sb_col to -1 for all SB rows.
-  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
-
-  memset(lf_sync->num_tiles_done, 0,
-         sizeof(*lf_sync->num_tiles_done) * sb_rows);
-  cm->lf_row = 0;
-}
-
 // Set up nsync by width.
 static INLINE int get_sync_range(int width) {
   // nsync numbers are picked by testing. For example, for 4k
@@ -286,25 +266,6 @@
         pthread_cond_init(&lf_sync->cond_[i], NULL);
       }
     }
-    pthread_mutex_init(&lf_sync->lf_mutex, NULL);
-
-    CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
-                    vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
-    if (lf_sync->recon_done_mutex) {
-      int i;
-      for (i = 0; i < rows; ++i) {
-        pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
-      }
-    }
-
-    CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
-                    vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
-    if (lf_sync->recon_done_cond) {
-      int i;
-      for (i = 0; i < rows; ++i) {
-        pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
-      }
-    }
   }
 #endif  // CONFIG_MULTITHREAD
 
@@ -315,11 +276,6 @@
   CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
                   vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
 
-  CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
-                  vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
-                                 mi_cols_aligned_to_sb(cm->mi_rows) >>
-                             MI_BLOCK_SIZE_LOG2));
-
   // Set up nsync.
   lf_sync->sync_range = get_sync_range(width);
 }
@@ -342,116 +298,13 @@
       }
       vpx_free(lf_sync->cond_);
     }
-    if (lf_sync->recon_done_mutex != NULL) {
-      int i;
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
-      }
-      vpx_free(lf_sync->recon_done_mutex);
-    }
-
-    pthread_mutex_destroy(&lf_sync->lf_mutex);
-    if (lf_sync->recon_done_cond != NULL) {
-      int i;
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
-      }
-      vpx_free(lf_sync->recon_done_cond);
-    }
 #endif  // CONFIG_MULTITHREAD
-
     vpx_free(lf_sync->lfdata);
     vpx_free(lf_sync->cur_sb_col);
-    vpx_free(lf_sync->num_tiles_done);
     // clear the structure as the source of this call may be a resize in which
     // case this call will be followed by an _alloc() which may fail.
     vp9_zero(*lf_sync);
   }
-}
-
-static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
-  int return_val = -1;
-  int cur_row;
-  const int max_rows = cm->mi_rows;
-
-#if CONFIG_MULTITHREAD
-  const int tile_cols = 1 << cm->log2_tile_cols;
-
-  pthread_mutex_lock(&lf_sync->lf_mutex);
-  if (cm->lf_row < max_rows) {
-    cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
-    return_val = cm->lf_row;
-    cm->lf_row += MI_BLOCK_SIZE;
-    if (cm->lf_row < max_rows) {
-      /* If this is not the last row, make sure the next row is also decoded.
-       * This is because the intra predict has to happen before loop filter */
-      cur_row += 1;
-    }
-  }
-  pthread_mutex_unlock(&lf_sync->lf_mutex);
-
-  if (return_val == -1) return return_val;
-
-  pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
-  if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
-    pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
-                      &lf_sync->recon_done_mutex[cur_row]);
-  }
-  pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
-#else
-  (void)lf_sync;
-  if (cm->lf_row < max_rows) {
-    cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
-    return_val = cm->lf_row;
-    cm->lf_row += MI_BLOCK_SIZE;
-    if (cm->lf_row < max_rows) {
-      /* If this is not the last row, make sure the next row is also decoded.
-       * This is because the intra predict has to happen before loop filter */
-      cur_row += 1;
-    }
-  }
-#endif  // CONFIG_MULTITHREAD
-
-  return return_val;
-}
-
-void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync,
-                         MACROBLOCKD *xd) {
-  int mi_row;
-  VP9_COMMON *cm = lf_data->cm;
-
-  while (!xd->corrupted && (mi_row = get_next_row(cm, lf_sync)) != -1 &&
-         mi_row < cm->mi_rows) {
-    lf_data->start = mi_row;
-    lf_data->stop = mi_row + MI_BLOCK_SIZE;
-
-    thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
-                            lf_data->start, lf_data->stop, lf_data->y_only,
-                            lf_sync);
-  }
-}
-
-void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row) {
-#if CONFIG_MULTITHREAD
-  pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
-  lf_sync->num_tiles_done[row] += 1;
-  if (num_tiles == lf_sync->num_tiles_done[row]) {
-    if (is_last_row) {
-      /* The last 2 rows wait on the last row to be done.
-       * So, we have to broadcast the signal in this case.
-       */
-      pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
-    } else {
-      pthread_cond_signal(&lf_sync->recon_done_cond[row]);
-    }
-  }
-  pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
-#else
-  (void)lf_sync;
-  (void)num_tiles;
-  (void)row;
-  (void)is_last_row;
-#endif  // CONFIG_MULTITHREAD
 }
 
 // Accumulate frame counts.
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -37,13 +37,6 @@
   // Row-based parallel loopfilter data
   LFWorkerData *lfdata;
   int num_workers;
-
-#if CONFIG_MULTITHREAD
-  pthread_mutex_t lf_mutex;
-  pthread_mutex_t *recon_done_mutex;
-  pthread_cond_t *recon_done_cond;
-#endif
-  int *num_tiles_done;
 } VP9LfSync;
 
 // Allocate memory for loopfilter row synchronization.
@@ -59,17 +52,6 @@
                               int frame_filter_level, int y_only,
                               int partial_frame, VPxWorker *workers,
                               int num_workers, VP9LfSync *lf_sync);
-
-// Multi-threaded loopfilter initialisations
-void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
-                     int frame_filter_level, int num_workers);
-
-void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync,
-                         MACROBLOCKD *xd);
-
-void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row);
-
-void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row);
 
 void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
                                  const struct FRAME_COUNTS *counts, int is_dec);
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1487,11 +1487,6 @@
   TileInfo *volatile tile = &tile_data->xd.tile;
   const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
   const uint8_t *volatile bit_reader_end = NULL;
-  VP9_COMMON *cm = &pbi->common;
-
-  LFWorkerData *lf_data = tile_data->lf_data;
-  VP9LfSync *lf_sync = tile_data->lf_sync;
-
   volatile int n = tile_data->buf_start;
   tile_data->error_info.setjmp = 1;
 
@@ -1524,13 +1519,6 @@
            mi_col += MI_BLOCK_SIZE) {
         decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
       }
-      if (cm->lf.filter_level && !cm->skip_loop_filter) {
-        const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
-        const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
-        const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
-        vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
-                    mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row);
-      }
     }
 
     if (buf->col == final_col) {
@@ -1538,11 +1526,6 @@
     }
   } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
 
-  if (!tile_data->xd.corrupted && cm->lf.filter_level &&
-      !cm->skip_loop_filter) {
-    vp9_loopfilter_rows(lf_data, lf_sync, &tile_data->xd);
-  }
-
   tile_data->data_end = bit_reader_end;
   return !tile_data->xd.corrupted;
 }
@@ -1559,8 +1542,6 @@
   VP9_COMMON *const cm = &pbi->common;
   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
   const uint8_t *bit_reader_end = NULL;
-  VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
-  YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1587,12 +1568,6 @@
     }
   }
 
-  // Initialize LPF
-  if (cm->lf.filter_level && !cm->skip_loop_filter) {
-    vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
-                    pbi->num_tile_workers);
-  }
-
   // Reset tile decoding hook
   for (n = 0; n < num_workers; ++n) {
     VPxWorker *const worker = &pbi->tile_workers[n];
@@ -1599,14 +1574,6 @@
     TileWorkerData *const tile_data =
         &pbi->tile_worker_data[n + pbi->total_tiles];
     winterface->sync(worker);
-
-    if (cm->lf.filter_level && !cm->skip_loop_filter) {
-      tile_data->lf_sync = lf_row_sync;
-      tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
-      vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
-      tile_data->lf_data->y_only = 0;
-    }
-
     tile_data->xd = pbi->mb;
     tile_data->xd.counts =
         cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
@@ -2128,7 +2095,15 @@
   if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
     // Multi-threaded tile decoder
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
-    if (xd->corrupted) {
+    if (!xd->corrupted) {
+      if (!cm->skip_loop_filter) {
+        // If multiple threads are used to decode tiles, then we use those
+        // threads to do parallel loopfiltering.
+        vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
+                                 0, 0, pbi->tile_workers, pbi->num_tile_workers,
+                                 &pbi->lf_row_sync);
+      }
+    } else {
       vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                          "Decode failed. Frame data is corrupted.");
     }
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -37,8 +37,6 @@
   int buf_start, buf_end;  // pbi->tile_buffers to decode, inclusive
   vpx_reader bit_reader;
   FRAME_COUNTS counts;
-  LFWorkerData *lf_data;
-  VP9LfSync *lf_sync;
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
--- a/vpx_util/vpx_thread.h
+++ b/vpx_util/vpx_thread.h
@@ -159,23 +159,6 @@
   return 0;
 }
 
-static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) {
-  int ok = 1;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
-  WakeAllConditionVariable(condition);
-#else
-  while (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
-    // a thread is waiting in pthread_cond_wait: allow it to be notified
-    ok &= SetEvent(condition->signal_event_);
-    // wait until the event is consumed so the signaler cannot consume
-    // the event via its own pthread_cond_wait.
-    ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
-           WAIT_OBJECT_0);
-  }
-#endif
-  return !ok;
-}
-
 static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
   int ok = 1;
 #ifdef USE_WINDOWS_CONDITION_VARIABLE