ref: e8b2750904fd78cf168fa196b38d35594b060e8a
parent: b758ba795a04d551caa3c1af2806cdf0c075803a
author: James Zern <jzern@google.com>
date: Sat Apr 6 13:07:13 EDT 2019
loop_filter_rows_mt: unify worker count calculation fixes a deadlock with an odd number of threads that go from < number of tiles to >. the previous calculations were out of sync so going from e.g., 8 tiles to 2 with 3 threads would result in scheduling only 2 workers, but thread_loop_filter_rows() would expect 3. BUG=webm:1618 Change-Id: I78c967a8c3c927d929e13c949808a5ef443ebacb
--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@@ -193,6 +193,50 @@
libvpx_test::TestMode encoding_mode_;
};
+#if CONFIG_VP9_DECODER
+// The test parameters control VP9D_SET_LOOP_FILTER_OPT and the number of
+// decoder threads.
+class EndToEndTestLoopFilterThreading
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<bool, int> {
+ protected:
+ EndToEndTestLoopFilterThreading()
+ : EncoderTest(GET_PARAM(0)), use_loop_filter_opt_(GET_PARAM(1)) {}
+
+ virtual ~EndToEndTestLoopFilterThreading() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(::libvpx_test::kRealTime);
+ cfg_.g_threads = 2;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.rc_target_bitrate = 500;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.kf_min_dist = 1;
+ cfg_.kf_max_dist = 1;
+ dec_cfg_.threads = GET_PARAM(2);
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, 8);
+ }
+ encoder->Control(VP9E_SET_TILE_COLUMNS, 4 - video->frame() % 5);
+ }
+
+ virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Decoder *decoder) {
+ if (video->frame() == 0) {
+ decoder->Control(VP9D_SET_LOOP_FILTER_OPT, use_loop_filter_opt_ ? 1 : 0);
+ }
+ }
+
+ private:
+ const bool use_loop_filter_opt_;
+};
+#endif // CONFIG_VP9_DECODER
+
TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
cfg_.rc_target_bitrate = kBitrate;
cfg_.g_error_resilient = 0;
@@ -255,6 +299,16 @@
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
+#if CONFIG_VP9_DECODER
+TEST_P(EndToEndTestLoopFilterThreading, TileCountChange) {
+ ::libvpx_test::RandomVideoSource video;
+ video.SetSize(4096, 2160);
+ video.set_limit(10);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+#endif // CONFIG_VP9_DECODER
+
VP9_INSTANTIATE_TEST_CASE(EndToEndTestLarge,
::testing::ValuesIn(kEncodingModeVectors),
::testing::ValuesIn(kTestVectors),
@@ -262,4 +316,9 @@
VP9_INSTANTIATE_TEST_CASE(EndToEndTestAdaptiveRDThresh,
::testing::Values(5, 6, 7), ::testing::Values(8, 9));
+
+#if CONFIG_VP9_DECODER
+VP9_INSTANTIATE_TEST_CASE(EndToEndTestLoopFilterThreading, ::testing::Bool(),
+ ::testing::Range(2, 6));
+#endif // CONFIG_VP9_DECODER
} // namespace
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include <limits.h>
#include "./vpx_config.h"
#include "vpx_dsp/vpx_dsp_common.h"
@@ -92,7 +93,7 @@
int y_only, VP9LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
- const int num_active_workers = VPXMIN(lf_sync->num_workers, lf_sync->rows);
+ const int num_active_workers = lf_sync->num_active_workers;
int mi_row, mi_col;
enum lf_path path;
if (y_only)
@@ -104,6 +105,8 @@
else
path = LF_PATH_SLOW;
+ assert(num_active_workers > 0);
+
for (mi_row = start; mi_row < stop;
mi_row += num_active_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
@@ -172,6 +175,7 @@
vp9_loop_filter_dealloc(lf_sync);
vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
}
+ lf_sync->num_active_workers = num_workers;
// Initialize cur_sb_col to -1 for all SB rows.
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
@@ -319,6 +323,7 @@
CHECK_MEM_ERROR(cm, lf_sync->lfdata,
vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
lf_sync->num_workers = num_workers;
+ lf_sync->num_active_workers = lf_sync->num_workers;
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -36,7 +36,8 @@
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
- int num_workers;
+ int num_workers; // number of allocated workers.
+ int num_active_workers; // number of scheduled workers.
#if CONFIG_MULTITHREAD
pthread_mutex_t lf_mutex;