ref: d115cd8b124df6b0052da62dbdaf474069c6ff00
parent: f4837579d1b2fed66f01bd5d939b4cb8617de5e1
parent: a0ffa2794b9d3d831332f3956c2f88f3f3345aab
author: James Zern <jzern@google.com>
date: Mon Aug 5 12:07:09 EDT 2013
Merge changes I082959ab,Ib6932640 * changes: vp9/decoder: threaded row-based loop filter vp9/decoder: add thread worker
--- a/test/test.mk
+++ b/test/test.mk
@@ -89,6 +89,7 @@
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
--- /dev/null
+++ b/test/vp9_thread_test.cc
@@ -1,0 +1,109 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/decoder/vp9_thread.h"
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/webm_video_source.h"
+
+namespace {
+
+class VP9WorkerThreadTest : public ::testing::Test {
+ protected:
+ virtual ~VP9WorkerThreadTest() {}
+ virtual void SetUp() {
+ vp9_worker_init(&worker_);
+ }
+
+ virtual void TearDown() {
+ vp9_worker_end(&worker_);
+ }
+
+ VP9Worker worker_;
+};
+
+int ThreadHook(void* data, void* return_value) {
+ int* const hook_data = reinterpret_cast<int*>(data);
+ *hook_data = 5;
+ return *reinterpret_cast<int*>(return_value);
+}
+
+TEST_F(VP9WorkerThreadTest, HookSuccess) {
+ EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
+
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_TRUE(vp9_worker_reset(&worker_));
+
+ int hook_data = 0;
+ int return_value = 1; // return successfully from the hook
+ worker_.hook = ThreadHook;
+ worker_.data1 = &hook_data;
+ worker_.data2 = &return_value;
+
+ vp9_worker_launch(&worker_);
+ EXPECT_TRUE(vp9_worker_sync(&worker_));
+ EXPECT_FALSE(worker_.had_error);
+ EXPECT_EQ(5, hook_data);
+
+ EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
+ }
+}
+
+TEST_F(VP9WorkerThreadTest, HookFailure) {
+ EXPECT_TRUE(vp9_worker_reset(&worker_));
+
+ int hook_data = 0;
+ int return_value = 0; // return failure from the hook
+ worker_.hook = ThreadHook;
+ worker_.data1 = &hook_data;
+ worker_.data2 = &return_value;
+
+ vp9_worker_launch(&worker_);
+ EXPECT_FALSE(vp9_worker_sync(&worker_));
+ EXPECT_TRUE(worker_.had_error);
+
+ // Ensure _reset() clears the error and _launch() can be called again.
+ return_value = 1;
+ EXPECT_TRUE(vp9_worker_reset(&worker_));
+ EXPECT_FALSE(worker_.had_error);
+ vp9_worker_launch(&worker_);
+ EXPECT_TRUE(vp9_worker_sync(&worker_));
+ EXPECT_FALSE(worker_.had_error);
+}
+
+TEST(VP9DecodeMTTest, MTDecode) {
+ libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
+ video.Init();
+
+ vpx_codec_dec_cfg_t cfg = {0};
+ cfg.threads = 2;
+ libvpx_test::VP9Decoder decoder(cfg, 0);
+
+ libvpx_test::MD5 md5;
+ for (video.Begin(); video.cxdata(); video.Next()) {
+ const vpx_codec_err_t res =
+ decoder.DecodeFrame(video.cxdata(), video.frame_size());
+ ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+
+ libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+ const vpx_image_t *img = NULL;
+
+ // Get decompressed data
+ while ((img = dec_iter.Next())) {
+ md5.Add(img);
+ }
+ }
+ EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
+}
+
+} // namespace
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -383,3 +383,11 @@
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
0, cm->mi_rows, y_only);
}
+
+int vp9_loop_filter_worker(void *arg1, void *arg2) {
+ LFWorkerData *const lf_data = (LFWorkerData*)arg1;
+ (void)arg2;
+ vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+ lf_data->start, lf_data->stop, lf_data->y_only);
+ return 1;
+}
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -57,4 +57,18 @@
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
struct VP9Common *cm, struct macroblockd *xd,
int start, int stop, int y_only);
+
+typedef struct LoopFilterWorkerData {
+ const YV12_BUFFER_CONFIG *frame_buffer;
+ struct VP9Common *cm;
+ struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the
+ // loopfilter. the planes are necessary as their state
+ // is changed during decode.
+ int start;
+ int stop;
+ int y_only;
+} LFWorkerData;
+
+// Operates on the rows described by LFWorkerData passed as 'arg1'.
+int vp9_loop_filter_worker(void *arg1, void *arg2);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -34,6 +34,7 @@
#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
+#include "vp9/decoder/vp9_thread.h"
#include "vp9/decoder/vp9_treereader.h"
static int read_be32(const uint8_t *p) {
@@ -583,10 +584,18 @@
}
static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
+ const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const pc = &pbi->common;
int mi_row, mi_col;
if (pbi->do_loopfilter_inline) {
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+ lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ lf_data->cm = pc;
+ lf_data->xd = pbi->mb;
+ lf_data->y_only = 0;
+ }
vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level);
}
@@ -601,17 +610,33 @@
}
if (pbi->do_loopfilter_inline) {
- YV12_BUFFER_CONFIG *const fb =
- &pbi->common.yv12_fb[pbi->common.new_fb_idx];
// delay the loopfilter by 1 macroblock row.
const int lf_start = mi_row - MI_BLOCK_SIZE;
if (lf_start < 0) continue;
- vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
+ vp9_worker_sync(&pbi->lf_worker);
+ lf_data->start = lf_start;
+ lf_data->stop = mi_row;
+ pbi->lf_worker.hook = vp9_loop_filter_worker;
+ vp9_worker_launch(&pbi->lf_worker);
+ } else {
+ YV12_BUFFER_CONFIG *const fb =
+ &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+ }
}
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ if (num_threads > 1) {
+ // TODO(jzern): since the loop filter is delayed one mb row, this will be
+ // forced to wait for the last row scheduled in the for loop.
+ vp9_worker_sync(&pbi->lf_worker);
+ }
vp9_loop_filter_rows(fb, pc, &pbi->mb,
mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
}
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -141,6 +141,16 @@
pbi->common.error.setjmp = 0;
pbi->decoded_key_frame = 0;
+ if (pbi->oxcf.max_threads > 1) {
+ vp9_worker_init(&pbi->lf_worker);
+ pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
+ pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+ if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
+ vp9_remove_decompressor(pbi);
+ return NULL;
+ }
+ }
+
return pbi;
}
@@ -154,6 +164,8 @@
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
+ vp9_worker_end(&pbi->lf_worker);
+ vpx_free(pbi->lf_worker.data1);
vpx_free(pbi);
}
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -14,8 +14,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
-
#include "vp9/decoder/vp9_onyxd.h"
+#include "vp9/decoder/vp9_thread.h"
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -38,6 +38,7 @@
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
+ VP9Worker lf_worker;
} VP9D_COMP;
#endif // VP9_DECODER_VP9_TREEREADER_H_
--- /dev/null
+++ b/vp9/decoder/vp9_thread.c
@@ -1,0 +1,248 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Original source:
+// http://git.chromium.org/webm/libwebp.git
+// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c
+
+
+#include <assert.h>
+#include <string.h> // for memset()
+#include "./vp9_thread.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if CONFIG_MULTITHREAD
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static int pthread_create(pthread_t* const thread, const void* attr,
+ unsigned int (__stdcall *start)(void*), void* arg) {
+ (void)attr;
+ *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
+ 0, /* unsigned stack_size */
+ start,
+ arg,
+ 0, /* unsigned initflag */
+ NULL); /* unsigned *thrdaddr */
+ if (*thread == NULL) return 1;
+ SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+ return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+ CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+ (void)mutexattr;
+ InitializeCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+ EnterCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+ LeaveCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+ DeleteCriticalSection(mutex);
+ return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+ int ok = 1;
+ ok &= (CloseHandle(condition->waiting_sem_) != 0);
+ ok &= (CloseHandle(condition->received_sem_) != 0);
+ ok &= (CloseHandle(condition->signal_event_) != 0);
+ return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+ (void)cond_attr;
+ condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (condition->waiting_sem_ == NULL ||
+ condition->received_sem_ == NULL ||
+ condition->signal_event_ == NULL) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+ int ok = 1;
+ if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+ // a thread is waiting in pthread_cond_wait: allow it to be notified
+ ok = SetEvent(condition->signal_event_);
+ // wait until the event is consumed so the signaler cannot consume
+ // the event via its own pthread_cond_wait.
+ ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+ WAIT_OBJECT_0);
+ }
+ return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition,
+ pthread_mutex_t* const mutex) {
+ int ok;
+ // note that there is a consumer available so the signal isn't dropped in
+ // pthread_cond_signal
+ if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+ return 1;
+ // now unlock the mutex so pthread_cond_signal may be issued
+ pthread_mutex_unlock(mutex);
+ ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+ WAIT_OBJECT_0);
+ ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+ pthread_mutex_lock(mutex);
+ return !ok;
+}
+
+#else // _WIN32
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
+
+//------------------------------------------------------------------------------
+
+static THREADFN thread_loop(void *ptr) { // thread loop
+ VP9Worker* const worker = (VP9Worker*)ptr;
+ int done = 0;
+ while (!done) {
+ pthread_mutex_lock(&worker->mutex_);
+ while (worker->status_ == OK) { // wait in idling mode
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ if (worker->status_ == WORK) {
+ if (worker->hook) {
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+ }
+ worker->status_ = OK;
+ } else if (worker->status_ == NOT_OK) { // finish the worker
+ done = 1;
+ }
+ // signal to the main thread that we're done (for Sync())
+ pthread_cond_signal(&worker->condition_);
+ pthread_mutex_unlock(&worker->mutex_);
+ }
+ return THREAD_RETURN(NULL); // Thread is finished
+}
+
+// main thread state control
+static void change_state(VP9Worker* const worker,
+ VP9WorkerStatus new_status) {
+ // no-op when attempting to change state on a thread that didn't come up
+ if (worker->status_ < OK) return;
+
+ pthread_mutex_lock(&worker->mutex_);
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->condition_);
+ }
+ pthread_mutex_unlock(&worker->mutex_);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+
+void vp9_worker_init(VP9Worker* const worker) {
+ memset(worker, 0, sizeof(*worker));
+ worker->status_ = NOT_OK;
+}
+
+int vp9_worker_sync(VP9Worker* const worker) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, OK);
+#endif
+ assert(worker->status_ <= OK);
+ return !worker->had_error;
+}
+
+int vp9_worker_reset(VP9Worker* const worker) {
+ int ok = 1;
+ worker->had_error = 0;
+ if (worker->status_ < OK) {
+#if CONFIG_MULTITHREAD
+ if (pthread_mutex_init(&worker->mutex_, NULL) ||
+ pthread_cond_init(&worker->condition_, NULL)) {
+ return 0;
+ }
+ pthread_mutex_lock(&worker->mutex_);
+ ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker);
+ if (ok) worker->status_ = OK;
+ pthread_mutex_unlock(&worker->mutex_);
+#else
+ worker->status_ = OK;
+#endif
+ } else if (worker->status_ > OK) {
+ ok = vp9_worker_sync(worker);
+ }
+ assert(!ok || (worker->status_ == OK));
+ return ok;
+}
+
+void vp9_worker_launch(VP9Worker* const worker) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, WORK);
+#else
+ if (worker->hook)
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+#endif
+}
+
+void vp9_worker_end(VP9Worker* const worker) {
+ if (worker->status_ >= OK) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, NOT_OK);
+ pthread_join(worker->thread_, NULL);
+ pthread_mutex_destroy(&worker->mutex_);
+ pthread_cond_destroy(&worker->condition_);
+#else
+ worker->status_ = NOT_OK;
+#endif
+ }
+ assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
--- /dev/null
+++ b/vp9/decoder/vp9_thread.h
@@ -1,0 +1,93 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Original source:
+// http://git.chromium.org/webm/libwebp.git
+// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h
+
+
+#ifndef VP9_DECODER_VP9_THREAD_H_
+#define VP9_DECODER_VP9_THREAD_H_
+
+#include "vpx_config.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if CONFIG_MULTITHREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+ HANDLE waiting_sem_;
+ HANDLE received_sem_;
+ HANDLE signal_event_;
+} pthread_cond_t;
+
+#else
+
+#include <pthread.h>
+
+#endif /* _WIN32 */
+#endif /* CONFIG_MULTITHREAD */
+
+// State of the worker thread object
+typedef enum {
+ NOT_OK = 0, // object is unusable
+ OK, // ready to work
+ WORK // busy finishing the current task
+} VP9WorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*VP9WorkerHook)(void*, void*);
+
+// Synchronize object used to launch job in the worker thread
+typedef struct {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+#endif
+ VP9WorkerStatus status_;
+ VP9WorkerHook hook; // hook to call
+ void* data1; // first argument passed to 'hook'
+ void* data2; // second argument passed to 'hook'
+ int had_error; // return value of the last call to 'hook'
+} VP9Worker;
+
+// Must be called first, before any other method.
+void vp9_worker_init(VP9Worker* const worker);
+// Must be called to initialize the object and spawn the thread. Re-entrant.
+// Will potentially launch the thread. Returns false in case of error.
+int vp9_worker_reset(VP9Worker* const worker);
+// Makes sure the previous work is finished. Returns true if worker->had_error
+// was not set and no error condition was triggered by the working thread.
+int vp9_worker_sync(VP9Worker* const worker);
+// Triggers the thread to call hook() with data1 and data2 argument. These
+// hook/data1/data2 can be changed at any time before calling this function,
+// but not be changed afterward until the next call to vp9_worker_sync().
+void vp9_worker_launch(VP9Worker* const worker);
+// Kill the thread and terminate the object. To use the object again, one
+// must call vp9_worker_reset() again.
+void vp9_worker_end(VP9Worker* const worker);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* VP9_DECODER_VP9_THREAD_H_ */
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -28,6 +28,8 @@
VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h
+VP9_DX_SRCS-yes += decoder/vp9_thread.c
+VP9_DX_SRCS-yes += decoder/vp9_thread.h
VP9_DX_SRCS-yes += decoder/vp9_treereader.h
VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
VP9_DX_SRCS-yes += decoder/vp9_idct_blk.c
--
⑨