shithub: libvpx

Download patch

ref: e8b2750904fd78cf168fa196b38d35594b060e8a
parent: b758ba795a04d551caa3c1af2806cdf0c075803a
author: James Zern <jzern@google.com>
date: Sat Apr 6 13:07:13 EDT 2019

loop_filter_rows_mt: unify worker count calculation

fixes a deadlock with an odd number of threads that go from < number of
tiles to >. the previous calculations were out of sync so going from
e.g., 8 tiles to 2 with 3 threads would result in scheduling only 2
workers, but thread_loop_filter_rows() would expect 3.

BUG=webm:1618

Change-Id: I78c967a8c3c927d929e13c949808a5ef443ebacb

--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@@ -193,6 +193,50 @@
   libvpx_test::TestMode encoding_mode_;
 };
 
+#if CONFIG_VP9_DECODER
+// The test parameters control VP9D_SET_LOOP_FILTER_OPT and the number of
+// decoder threads.
+class EndToEndTestLoopFilterThreading
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<bool, int> {
+ protected:
+  EndToEndTestLoopFilterThreading()
+      : EncoderTest(GET_PARAM(0)), use_loop_filter_opt_(GET_PARAM(1)) {}
+
+  virtual ~EndToEndTestLoopFilterThreading() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libvpx_test::kRealTime);
+    cfg_.g_threads = 2;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.rc_target_bitrate = 500;
+    cfg_.rc_end_usage = VPX_CBR;
+    cfg_.kf_min_dist = 1;
+    cfg_.kf_max_dist = 1;
+    dec_cfg_.threads = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(VP8E_SET_CPUUSED, 8);
+    }
+    encoder->Control(VP9E_SET_TILE_COLUMNS, 4 - video->frame() % 5);
+  }
+
+  virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Decoder *decoder) {
+    if (video->frame() == 0) {
+      decoder->Control(VP9D_SET_LOOP_FILTER_OPT, use_loop_filter_opt_ ? 1 : 0);
+    }
+  }
+
+ private:
+  const bool use_loop_filter_opt_;
+};
+#endif  // CONFIG_VP9_DECODER
+
 TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
   cfg_.rc_target_bitrate = kBitrate;
   cfg_.g_error_resilient = 0;
@@ -255,6 +299,16 @@
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
 
+#if CONFIG_VP9_DECODER
+TEST_P(EndToEndTestLoopFilterThreading, TileCountChange) {
+  ::libvpx_test::RandomVideoSource video;
+  video.SetSize(4096, 2160);
+  video.set_limit(10);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#endif  // CONFIG_VP9_DECODER
+
 VP9_INSTANTIATE_TEST_CASE(EndToEndTestLarge,
                           ::testing::ValuesIn(kEncodingModeVectors),
                           ::testing::ValuesIn(kTestVectors),
@@ -262,4 +316,9 @@
 
 VP9_INSTANTIATE_TEST_CASE(EndToEndTestAdaptiveRDThresh,
                           ::testing::Values(5, 6, 7), ::testing::Values(8, 9));
+
+#if CONFIG_VP9_DECODER
+VP9_INSTANTIATE_TEST_CASE(EndToEndTestLoopFilterThreading, ::testing::Bool(),
+                          ::testing::Range(2, 6));
+#endif  // CONFIG_VP9_DECODER
 }  // namespace
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <assert.h>
 #include <limits.h>
 #include "./vpx_config.h"
 #include "vpx_dsp/vpx_dsp_common.h"
@@ -92,7 +93,7 @@
     int y_only, VP9LfSync *const lf_sync) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
   const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
-  const int num_active_workers = VPXMIN(lf_sync->num_workers, lf_sync->rows);
+  const int num_active_workers = lf_sync->num_active_workers;
   int mi_row, mi_col;
   enum lf_path path;
   if (y_only)
@@ -104,6 +105,8 @@
   else
     path = LF_PATH_SLOW;
 
+  assert(num_active_workers > 0);
+
   for (mi_row = start; mi_row < stop;
        mi_row += num_active_workers * MI_BLOCK_SIZE) {
     MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
@@ -172,6 +175,7 @@
     vp9_loop_filter_dealloc(lf_sync);
     vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
   }
+  lf_sync->num_active_workers = num_workers;
 
   // Initialize cur_sb_col to -1 for all SB rows.
   memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
@@ -319,6 +323,7 @@
   CHECK_MEM_ERROR(cm, lf_sync->lfdata,
                   vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
   lf_sync->num_workers = num_workers;
+  lf_sync->num_active_workers = lf_sync->num_workers;
 
   CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
                   vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -36,7 +36,8 @@
 
   // Row-based parallel loopfilter data
   LFWorkerData *lfdata;
-  int num_workers;
+  int num_workers;         // number of allocated workers.
+  int num_active_workers;  // number of scheduled workers.
 
 #if CONFIG_MULTITHREAD
   pthread_mutex_t lf_mutex;