shithub: libvpx

--- a/vp8/common/threading.h

+++ b/vp8/common/threading.h

@@ -191,47 +191,18 @@

 #define x86_pause_hint()

 #endif

-#if defined(__has_feature)

-#if __has_feature(thread_sanitizer)

-#define USE_MUTEX_LOCK 1

-#endif

-#endif

 #include "vpx_util/vpx_thread.h"

+#include "vpx_util/vpx_atomics.h"

-static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {

-  (void)mutex;

-#if defined(USE_MUTEX_LOCK)

-  int ret;

-  pthread_mutex_lock(mutex);

-  ret = *p;

-  pthread_mutex_unlock(mutex);

-  return ret;

-#endif

-  return *p;

-}

-static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,

-                             const int *last_row_current_mb_col,

-                             const int nsync) {

-  while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {

+static INLINE void vp8_atomic_spin_wait(

+    int mb_col, const vpx_atomic_int *last_row_current_mb_col,

+    const int nsync) {

+  while (mb_col > (vpx_atomic_load_acquire(last_row_current_mb_col) - nsync)) {

     x86_pause_hint();

     thread_sleep(0);

-static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {

-  (void)mutex;

-#if defined(USE_MUTEX_LOCK)

-  pthread_mutex_lock(mutex);

-  *p = v;

-  pthread_mutex_unlock(mutex);

-  return;

-#endif

-  *p = v;

-}

-#undef USE_MUTEX_LOCK

 #endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */

 #ifdef __cplusplus

--- a/vp8/decoder/decodeframe.c

+++ b/vp8/decoder/decodeframe.c

@@ -1205,7 +1205,8 @@

   pbi->frame_corrupt_residual = 0;

 #if CONFIG_MULTITHREAD

-  if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) {

+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) &&

+      pc->multi_token_partition != ONE_PARTITION) {

     unsigned int thread;

     vp8mt_decode_mb_rows(pbi, xd);

     vp8_yv12_extend_frame_borders(yv12_fb_new);

--- a/vp8/decoder/onyxd_int.h

+++ b/vp8/decoder/onyxd_int.h

@@ -68,7 +68,7 @@

 #if CONFIG_MULTITHREAD

   /* variable for threading */

-  int b_multithreaded_rd;

+  vpx_atomic_int b_multithreaded_rd;

   int max_threads;

   int current_mb_col_main;

   unsigned int decoding_thread_count;

@@ -76,9 +76,8 @@

   int mt_baseline_filter_level[MAX_MB_SEGMENTS];

   int sync_range;

-  int *mt_current_mb_col; /* Each row remembers its already decoded column. */

-  pthread_mutex_t *pmutex;

-  pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */

+  /* Each row remembers its already decoded column. */

+  vpx_atomic_int *mt_current_mb_col;

   unsigned char **mt_yabove_row; /* mb_rows x width */

   unsigned char **mt_uabove_row;

--- a/vp8/decoder/threading.c

+++ b/vp8/decoder/threading.c

@@ -79,7 +79,8 @@

     if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;

-  for (i = 0; i < pc->mb_rows; ++i) pbi->mt_current_mb_col[i] = -1;

+  for (i = 0; i < pc->mb_rows; ++i)

+    vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1);

 static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,

@@ -247,12 +248,13 @@

 static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,

                               int start_mb_row) {

-  const int *last_row_current_mb_col;

-  int *current_mb_col;

+  const vpx_atomic_int *last_row_current_mb_col;

+  vpx_atomic_int *current_mb_col;

   int mb_row;

   VP8_COMMON *pc = &pbi->common;

   const int nsync = pbi->sync_range;

-  const int first_row_no_sync_above = pc->mb_cols + nsync;

+  const vpx_atomic_int first_row_no_sync_above =

+      VPX_ATOMIC_INIT(pc->mb_cols + nsync);

   int num_part = 1 << pbi->common.multi_token_partition;

   int last_mb_row = start_mb_row;

@@ -356,13 +358,11 @@

     for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {

       if (((mb_col - 1) % nsync) == 0) {

-        pthread_mutex_t *mutex = &pbi->pmutex[mb_row];

-        protected_write(mutex, current_mb_col, mb_col - 1);

+        vpx_atomic_store_release(current_mb_col, mb_col - 1);

       if (mb_row && !(mb_col & (nsync - 1))) {

-        pthread_mutex_t *mutex = &pbi->pmutex[mb_row - 1];

-        sync_read(mutex, mb_col, last_row_current_mb_col, nsync);

+        vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);

       /* Distance of MB to the various image edges.

@@ -548,7 +548,7 @@

     /* last MB of row is ready just after extension is done */

-    protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);

+    vpx_atomic_store_release(current_mb_col, mb_col + nsync);

     ++xd->mode_info_context; /* skip prediction column */

     xd->up_available = 1;

@@ -568,10 +568,10 @@

   ENTROPY_CONTEXT_PLANES mb_row_left_context;

   while (1) {

-    if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break;

+    if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break;

     if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {

-      if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) {

+      if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) {

         break;

       } else {

         MACROBLOCKD *xd = &mbrd->mbd;

@@ -589,9 +589,8 @@

   int core_count = 0;

   unsigned int ithread;

-  pbi->b_multithreaded_rd = 0;

+  vpx_atomic_init(&pbi->b_multithreaded_rd, 0);

   pbi->allocated_decoding_thread_count = 0;

-  pthread_mutex_init(&pbi->mt_mutex, NULL);

   /* limit decoding threads to the max number of token partitions */

   core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;

@@ -602,7 +601,7 @@

   if (core_count > 1) {

-    pbi->b_multithreaded_rd = 1;

+    vpx_atomic_init(&pbi->b_multithreaded_rd, 1);

     pbi->decoding_thread_count = core_count - 1;

     CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);

@@ -648,16 +647,6 @@

 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {

   int i;

-  /* De-allocate mutex */

-  if (pbi->pmutex != NULL) {

-    for (i = 0; i < mb_rows; ++i) {

-      pthread_mutex_destroy(&pbi->pmutex[i]);

-    }

-    vpx_free(pbi->pmutex);

-    pbi->pmutex = NULL;

-  }

   vpx_free(pbi->mt_current_mb_col);

   pbi->mt_current_mb_col = NULL;

@@ -723,7 +712,7 @@

   int i;

   int uv_width;

-  if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {

+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {

     vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);

     /* our internal buffers are always multiples of 16 */

@@ -741,18 +730,12 @@

     uv_width = width >> 1;

-    /* Allocate mutex */

-    CHECK_MEM_ERROR(pbi->pmutex,

-                    vpx_malloc(sizeof(*pbi->pmutex) * pc->mb_rows));

-    if (pbi->pmutex) {

-      for (i = 0; i < pc->mb_rows; ++i) {

-        pthread_mutex_init(&pbi->pmutex[i], NULL);

-      }

-    }

+    /* Allocate a vpx_atomic_int for each mb row. */

+    CHECK_MEM_ERROR(pbi->mt_current_mb_col,

+                    vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows));

+    for (i = 0; i < pc->mb_rows; ++i)

+      vpx_atomic_init(&pbi->mt_current_mb_col[i], 0);

-    /* Allocate an int for each mb row. */

-    CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);

     /* Allocate memory for above_row buffers. */

     CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);

     for (i = 0; i < pc->mb_rows; ++i)

@@ -792,9 +775,9 @@

 void vp8_decoder_remove_threads(VP8D_COMP *pbi) {

   /* shutdown MB Decoding thread; */

-  if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {

+  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {

     int i;

-    protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);

+    vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0);

     /* allow all threads to exit */

     for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {

@@ -824,7 +807,6 @@

     vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);

-  pthread_mutex_destroy(&pbi->mt_mutex);

 void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {

--- a/vp8/encoder/bitstream.c

+++ b/vp8/encoder/bitstream.c

@@ -1416,7 +1416,7 @@

     vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end);

 #if CONFIG_MULTITHREAD

-    if (cpi->b_multi_threaded) {

+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {

       pack_mb_row_tokens(cpi, &cpi->bc[1]);

     } else {

       vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);

--- a/vp8/encoder/encodeframe.c

+++ b/vp8/encoder/encodeframe.c

@@ -341,11 +341,11 @@

 #if CONFIG_MULTITHREAD

   const int nsync = cpi->mt_sync_range;

-  const int rightmost_col = cm->mb_cols + nsync;

-  const int *last_row_current_mb_col;

-  int *current_mb_col = &cpi->mt_current_mb_col[mb_row];

+  vpx_atomic_int rightmost_col = VPX_ATOMIC_INIT(cm->mb_cols + nsync);

+  const vpx_atomic_int *last_row_current_mb_col;

+  vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row];

-  if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) {

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0 && mb_row != 0) {

     last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];

   } else {

     last_row_current_mb_col = &rightmost_col;

@@ -415,15 +415,13 @@

     vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);

 #if CONFIG_MULTITHREAD

-    if (cpi->b_multi_threaded != 0) {

+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {

       if (((mb_col - 1) % nsync) == 0) {

-        pthread_mutex_t *mutex = &cpi->pmutex[mb_row];

-        protected_write(mutex, current_mb_col, mb_col - 1);

+        vpx_atomic_store_release(current_mb_col, mb_col - 1);

       if (mb_row && !(mb_col & (nsync - 1))) {

-        pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];

-        sync_read(mutex, mb_col, last_row_current_mb_col, nsync);

+        vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);

 #endif

@@ -563,8 +561,9 @@

                     xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);

 #if CONFIG_MULTITHREAD

-  if (cpi->b_multi_threaded != 0) {

-    protected_write(&cpi->pmutex[mb_row], current_mb_col, rightmost_col);

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) {

+    vpx_atomic_store_release(current_mb_col,

+                             vpx_atomic_load_acquire(&rightmost_col));

 #endif

@@ -749,13 +748,14 @@

     vpx_usec_timer_start(&emr_timer);

 #if CONFIG_MULTITHREAD

-    if (cpi->b_multi_threaded) {

+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {

       int i;

       vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei,

                                 cpi->encoding_thread_count);

-      for (i = 0; i < cm->mb_rows; ++i) cpi->mt_current_mb_col[i] = -1;

+      for (i = 0; i < cm->mb_rows; ++i)

+        vpx_atomic_store_release(&cpi->mt_current_mb_col[i], -1);

       for (i = 0; i < cpi->encoding_thread_count; ++i) {

         sem_post(&cpi->h_event_start_encoding[i]);

--- a/vp8/encoder/ethreading.c

+++ b/vp8/encoder/ethreading.c

@@ -26,11 +26,11 @@

   VP8_COMMON *cm = &cpi->common;

   while (1) {

-    if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;

+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;

     if (sem_wait(&cpi->h_event_start_lpf) == 0) {

       /* we're shutting down */

-      if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;

+      if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;

       vp8_loopfilter_frame(cpi, cm);

@@ -48,7 +48,7 @@

   ENTROPY_CONTEXT_PLANES mb_row_left_context;

   while (1) {

-    if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;

+    if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;

     if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) {

       const int nsync = cpi->mt_sync_range;

@@ -66,7 +66,7 @@

       int *totalrate = &mbri->totalrate;

       /* we're shutting down */

-      if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;

+      if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break;

       xd->mode_info_context = cm->mi + cm->mode_info_stride * (ithread + 1);

       xd->mode_info_stride = cm->mode_info_stride;

@@ -80,8 +80,8 @@

         int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;

         int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;

         int map_index = (mb_row * cm->mb_cols);

-        const int *last_row_current_mb_col;

-        int *current_mb_col = &cpi->mt_current_mb_col[mb_row];

+        const vpx_atomic_int *last_row_current_mb_col;

+        vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row];

 #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)

         vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];

@@ -108,13 +108,11 @@

         /* for each macroblock col in image */

         for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {

           if (((mb_col - 1) % nsync) == 0) {

-            pthread_mutex_t *mutex = &cpi->pmutex[mb_row];

-            protected_write(mutex, current_mb_col, mb_col - 1);

+            vpx_atomic_store_release(current_mb_col, mb_col - 1);

           if (mb_row && !(mb_col & (nsync - 1))) {

-            pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];

-            sync_read(mutex, mb_col, last_row_current_mb_col, nsync);

+            vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);

 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING

@@ -286,7 +284,7 @@

         vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16,

                           xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);

-        protected_write(&cpi->pmutex[mb_row], current_mb_col, mb_col + nsync);

+        vpx_atomic_store_release(current_mb_col, mb_col + nsync);

         /* this is to account for the border */

         xd->mode_info_context++;

@@ -490,12 +488,10 @@

 int vp8cx_create_encoder_threads(VP8_COMP *cpi) {

   const VP8_COMMON *cm = &cpi->common;

-  cpi->b_multi_threaded = 0;

+  vpx_atomic_init(&cpi->b_multi_threaded, 0);

   cpi->encoding_thread_count = 0;

   cpi->b_lpf_running = 0;

-  pthread_mutex_init(&cpi->mt_mutex, NULL);

   if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) {

     int ithread;

     int th_count = cpi->oxcf.multi_threaded - 1;

@@ -526,7 +522,7 @@

     CHECK_MEM_ERROR(cpi->en_thread_data,

                     vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));

-    cpi->b_multi_threaded = 1;

+    vpx_atomic_store_release(&cpi->b_multi_threaded, 1);

     cpi->encoding_thread_count = th_count;

/*

@@ -555,7 +551,7 @@

     if (rc) {

       /* shutdown other threads */

-      protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);

+      vpx_atomic_store_release(&cpi->b_multi_threaded, 0);

       for (--ithread; ithread >= 0; ithread--) {

         pthread_join(cpi->h_encoding_thread[ithread], 0);

         sem_destroy(&cpi->h_event_start_encoding[ithread]);

@@ -569,8 +565,6 @@

       vpx_free(cpi->mb_row_ei);

       vpx_free(cpi->en_thread_data);

-      pthread_mutex_destroy(&cpi->mt_mutex);

       return -1;

@@ -585,7 +579,7 @@

       if (rc) {

         /* shutdown other threads */

-        protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);

+        vpx_atomic_store_release(&cpi->b_multi_threaded, 0);

         for (--ithread; ithread >= 0; ithread--) {

           sem_post(&cpi->h_event_start_encoding[ithread]);

           sem_post(&cpi->h_event_end_encoding[ithread]);

@@ -603,8 +597,6 @@

         vpx_free(cpi->mb_row_ei);

         vpx_free(cpi->en_thread_data);

-        pthread_mutex_destroy(&cpi->mt_mutex);

         return -2;

@@ -613,9 +605,9 @@

 void vp8cx_remove_encoder_threads(VP8_COMP *cpi) {

-  if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded)) {

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {

     /* shutdown other threads */

-    protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);

+    vpx_atomic_store_release(&cpi->b_multi_threaded, 0);

       int i;

@@ -643,6 +635,5 @@

     vpx_free(cpi->mb_row_ei);

     vpx_free(cpi->en_thread_data);

-  pthread_mutex_destroy(&cpi->mt_mutex);

 #endif

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -451,18 +451,6 @@

   cpi->mb.pip = 0;

 #if CONFIG_MULTITHREAD

-  /* De-allocate mutex */

-  if (cpi->pmutex != NULL) {

-    VP8_COMMON *const pc = &cpi->common;

-    int i;

-    for (i = 0; i < pc->mb_rows; ++i) {

-      pthread_mutex_destroy(&cpi->pmutex[i]);

-    }

-    vpx_free(cpi->pmutex);

-    cpi->pmutex = NULL;

-  }

   vpx_free(cpi->mt_current_mb_col);

   cpi->mt_current_mb_col = NULL;

 #endif

@@ -1153,9 +1141,6 @@

   int width = cm->Width;

   int height = cm->Height;

-#if CONFIG_MULTITHREAD

-  int prev_mb_rows = cm->mb_rows;

-#endif

   if (vp8_alloc_frame_buffers(cm, width, height)) {

     vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,

@@ -1247,26 +1232,11 @@

   if (cpi->oxcf.multi_threaded > 1) {

     int i;

-    /* De-allocate and re-allocate mutex */

-    if (cpi->pmutex != NULL) {

-      for (i = 0; i < prev_mb_rows; ++i) {

-        pthread_mutex_destroy(&cpi->pmutex[i]);

-      }

-      vpx_free(cpi->pmutex);

-      cpi->pmutex = NULL;

-    }

-    CHECK_MEM_ERROR(cpi->pmutex,

-                    vpx_malloc(sizeof(*cpi->pmutex) * cm->mb_rows));

-    if (cpi->pmutex) {

-      for (i = 0; i < cm->mb_rows; ++i) {

-        pthread_mutex_init(&cpi->pmutex[i], NULL);

-      }

-    }

     vpx_free(cpi->mt_current_mb_col);

     CHECK_MEM_ERROR(cpi->mt_current_mb_col,

                     vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));

+    for (i = 0; i < cm->mb_rows; ++i)

+      vpx_atomic_init(&cpi->mt_current_mb_col[i], 0);

 #endif

@@ -3274,7 +3244,7 @@

 #if CONFIG_MULTITHREAD

-  if (cpi->b_multi_threaded) {

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {

     sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */

 #endif

@@ -4471,7 +4441,7 @@

 #endif

 #if CONFIG_MULTITHREAD

-  if (cpi->b_multi_threaded) {

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) {

     /* start loopfilter in separate thread */

     sem_post(&cpi->h_event_start_lpf);

     cpi->b_lpf_running = 1;

@@ -4497,7 +4467,8 @@

 #if CONFIG_MULTITHREAD

   /* wait that filter_level is picked so that we can continue with stream

    * packing */

-  if (cpi->b_multi_threaded) sem_wait(&cpi->h_event_end_lpf);

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded))

+    sem_wait(&cpi->h_event_end_lpf);

 #endif

   /* build the bitstream */

@@ -5341,7 +5312,7 @@

 #if CONFIG_MULTITHREAD

   /* wait for the lpf thread done */

-  if (cpi->b_multi_threaded && cpi->b_lpf_running) {

+  if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) && cpi->b_lpf_running) {

     sem_wait(&cpi->h_event_end_lpf);

     cpi->b_lpf_running = 0;

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -518,11 +518,9 @@

 #if CONFIG_MULTITHREAD

   /* multithread data */

-  pthread_mutex_t *pmutex;

-  pthread_mutex_t mt_mutex; /* mutex for b_multi_threaded */

-  int *mt_current_mb_col;

+  vpx_atomic_int *mt_current_mb_col;

   int mt_sync_range;

-  int b_multi_threaded;

+  vpx_atomic_int b_multi_threaded;

   int encoding_thread_count;

   int b_lpf_running;

--- a/vp8/vp8_dx_iface.c

+++ b/vp8/vp8_dx_iface.c

@@ -415,7 +415,7 @@

 #endif

 #if CONFIG_MULTITHREAD

-        if (pbi->b_multithreaded_rd) {

+        if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {

           vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);

 #else

--- /dev/null

+++ b/vpx_util/vpx_atomics.h

@@ -1,0 +1,133 @@

+/*

+ *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef VPX_UTIL_VPX_ATOMICS_H_

+#define VPX_UTIL_VPX_ATOMICS_H_

+#include "./vpx_config.h"

+#ifdef __cplusplus

+extern "C" {

+#endif  // __cplusplus

+#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD

+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \

+    (defined(__cplusplus) && __cplusplus >= 201112L)

+// Where available, use <stdatomic.h>

+#include <stdatomic.h>

+#define VPX_USE_STD_ATOMIC

+#else

+// Look for built-ins.

+#if !defined(__has_builtin)

+#define __has_builtin(x) 0  // Compatibility with non-clang compilers.

+#endif                      // !defined(__has_builtin)

+#if (__has_builtin(__atomic_load_n)) || \

+    (defined(__GNUC__) &&               \

+     (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))

+// For GCC >= 4.7 and Clang that support __atomic builtins, use those.

+#define VPX_USE_ATOMIC_BUILTINS

+#else

+// Use platform-specific asm barriers.

+#if defined(_MSC_VER)

+// TODO(pbos): This assumes that newer versions of MSVC are building with the

+// default /volatile:ms (or older, where this is always true. Consider adding

+// support for using <atomic> instead of stdatomic.h when building C++11 under

+// MSVC. It's unclear what to do for plain C under /volatile:iso (inline asm?),

+// there're no explicit Interlocked* functions for only storing or loading

+// (presumably because volatile has historically implied that on MSVC).

+//

+// For earlier versions of MSVC or the default /volatile:ms volatile int are

+// acquire/release and require no barrier.

+#define vpx_atomic_memory_barrier() \

+  do {                              \

+  } while (0)

+#else

+#if ARCH_X86 || ARCH_X86_64

+// Use a compiler barrier on x86, no runtime penalty.

+#define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory")

+#elif ARCH_ARM

+#define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory")

+#elif ARCH_MIPS

+#define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory")

+#else

+#error Unsupported architecture!

+#endif  // ARCH_X86 || ARCH_X86_64

+#endif  // defined(_MSC_VER)

+#endif  // atomic builtin availability check

+#endif  // stdatomic availability check

+// These are wrapped in a struct so that they are not easily accessed directly

+// on any platform (to discourage programmer errors by setting values directly).

+// This primitive MUST be initialized using vpx_atomic_init or VPX_ATOMIC_INIT

+// (NOT memset) and accessed through vpx_atomic_ functions.

+typedef struct vpx_atomic_int {

+#if defined(VPX_USE_STD_ATOMIC)

+  atomic_int value;

+#else

+  volatile int value;

+#endif  // defined(USE_STD_ATOMIC)

+} vpx_atomic_int;

+#if defined(VPX_USE_STD_ATOMIC)

+#define VPX_ATOMIC_INIT(num) \

+  { ATOMIC_VAR_INIT(num) }

+#else

+#define VPX_ATOMIC_INIT(num) \

+  { num }

+#endif  // defined(VPX_USE_STD_ATOMIC)

+// Initialization of an atomic int, not thread safe.

+static INLINE void vpx_atomic_init(vpx_atomic_int *atomic, int value) {

+#if defined(VPX_USE_STD_ATOMIC)

+  atomic_init(&atomic->value, value);

+#else

+  atomic->value = value;

+#endif  // defined(USE_STD_ATOMIC)

+}

+static INLINE void vpx_atomic_store_release(vpx_atomic_int *atomic, int value) {

+#if defined(VPX_USE_STD_ATOMIC)

+  atomic_store_explicit(&atomic->value, value, memory_order_release);

+#elif defined(VPX_USE_ATOMIC_BUILTINS)

+  __atomic_store_n(&atomic->value, value, __ATOMIC_RELEASE);

+#else

+  vpx_atomic_memory_barrier();

+  atomic->value = value;

+#endif  // defined(VPX_USE_STD_ATOMIC)

+}

+static INLINE int vpx_atomic_load_acquire(const vpx_atomic_int *atomic) {

+#if defined(VPX_USE_STD_ATOMIC)

+  // const_cast (in C) that doesn't trigger -Wcast-qual.

+  return atomic_load_explicit(

+      (atomic_int *)(uintptr_t)(const void *)&atomic->value,

+      memory_order_acquire);

+#elif defined(VPX_USE_ATOMIC_BUILTINS)

+  return __atomic_load_n(&atomic->value, __ATOMIC_ACQUIRE);

+#else

+  int v = atomic->value;

+  vpx_atomic_memory_barrier();

+  return v;

+#endif  // defined(VPX_USE_STD_ATOMIC)

+}

+#undef VPX_USE_STD_ATOMIC

+#undef VPX_USE_ATOMIC_BUILTINS

+#undef vpx_atomic_memory_barrier

+#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */

+#ifdef __cplusplus

+}  // extern "C"

+#endif  // __cplusplus

+#endif  // VPX_UTIL_VPX_ATOMICS_H_

--- a/vpx_util/vpx_util.mk

+++ b/vpx_util/vpx_util.mk

@@ -8,6 +8,7 @@

 ##  be found in the AUTHORS file in the root of the source tree.

##

+UTIL_SRCS-yes += vpx_atomics.h

 UTIL_SRCS-yes += vpx_util.mk

 UTIL_SRCS-yes += vpx_thread.c

 UTIL_SRCS-yes += vpx_thread.h

--

⑨