shithub: libvpx

Download patch

ref: 4ffd8350be4c9de82da722f5b1672acabbaf9b51
parent: 0d245d42c448ee764385872f474adb3cee07a8d6
parent: bc837b223b3028f825314b9df623ba81ac445403
author: James Zern <jzern@google.com>
date: Wed Jul 5 19:53:22 EDT 2017

Merge changes from topic 'rm-dec-frame-parallel'

* changes:
  VP9_COMMON: rm frame_parallel_decode
  VP9Decoder: rm frame_parallel_decode
  vp9_dx: rm worker thread creation

--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -62,8 +62,7 @@
   cm->prev_seg_map_idx = 1;
 
   cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
-  if (!cm->frame_parallel_decode)
-    cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+  cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
 
   return 0;
 }
@@ -77,10 +76,7 @@
   }
 
   cm->current_frame_seg_map = NULL;
-
-  if (!cm->frame_parallel_decode) {
-    cm->last_frame_seg_map = NULL;
-  }
+  cm->last_frame_seg_map = NULL;
 }
 
 void vp9_free_ref_frame_buffers(BufferPool *pool) {
@@ -186,7 +182,7 @@
 
 void vp9_init_context_buffers(VP9_COMMON *cm) {
   cm->setup_mi(cm);
-  if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+  if (cm->last_frame_seg_map)
     memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
 }
 
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -428,7 +428,7 @@
   vp9_clearall_segfeatures(&cm->seg);
   cm->seg.abs_delta = SEGMENT_DELTADATA;
 
-  if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+  if (cm->last_frame_seg_map)
     memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
 
   if (cm->current_frame_seg_map)
@@ -457,7 +457,7 @@
   }
 
   // prev_mip will only be allocated in encoder.
-  if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode)
+  if (frame_is_intra_only(cm) && cm->prev_mip)
     memset(cm->prev_mip, 0,
            cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
 
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -235,10 +235,6 @@
   struct loopfilter lf;
   struct segmentation seg;
 
-  // TODO(hkuang): Remove this as it is the same as frame_parallel_decode
-  // in pbi.
-  int frame_parallel_decode;  // frame-based threading.
-
   // Context probabilities for reference frame prediction
   MV_REFERENCE_FRAME comp_fixed_ref;
   MV_REFERENCE_FRAME comp_var_ref[2];
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -490,8 +490,8 @@
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 static void dec_build_inter_predictors(
-    VPxWorker *const worker, MACROBLOCKD *xd, int plane, int bw, int bh, int x,
-    int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel,
+    MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h,
+    int mi_x, int mi_y, const InterpKernel *kernel,
     const struct scale_factors *sf, struct buf_2d *pre_buf,
     struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf,
     int is_scaled, int ref) {
@@ -593,12 +593,6 @@
       y_pad = 1;
     }
 
-    // Wait until reference block is ready. Pad 7 more pixels as last 7
-    // pixels of each superblock row can be changed by next superblock row.
-    if (worker != NULL)
-      vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7))
-                                                      << (plane == 0 ? 0 : 1));
-
     // Skip border extension if block is inside the frame.
     if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
         y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
@@ -617,14 +611,6 @@
                          w, h, ref, xs, ys);
       return;
     }
-  } else {
-    // Wait until reference block is ready. Pad 7 more pixels as last 7
-    // pixels of each superblock row can be changed by next superblock row.
-    if (worker != NULL) {
-      const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
-      vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7))
-                                                      << (plane == 0 ? 0 : 1));
-    }
   }
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -653,8 +639,6 @@
   const int is_compound = has_second_ref(mi);
   int ref;
   int is_scaled;
-  VPxWorker *const fwo =
-      pbi->frame_parallel_decode ? pbi->frame_worker_owner : NULL;
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
@@ -686,10 +670,10 @@
         for (y = 0; y < num_4x4_h; ++y) {
           for (x = 0; x < num_4x4_w; ++x) {
             const MV mv = average_split_mvs(pd, mi, ref, i++);
-            dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 4 * x,
-                                       4 * y, 4, 4, mi_x, mi_y, kernel, sf,
-                                       pre_buf, dst_buf, &mv, ref_frame_buf,
-                                       is_scaled, ref);
+            dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y,
+                                       4, 4, mi_x, mi_y, kernel, sf, pre_buf,
+                                       dst_buf, &mv, ref_frame_buf, is_scaled,
+                                       ref);
           }
         }
       }
@@ -703,7 +687,7 @@
         const int n4w_x4 = 4 * num_4x4_w;
         const int n4h_x4 = 4 * num_4x4_h;
         struct buf_2d *const pre_buf = &pd->pre[ref];
-        dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
+        dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
                                    n4h_x4, mi_x, mi_y, kernel, sf, pre_buf,
                                    dst_buf, &mv, ref_frame_buf, is_scaled, ref);
       }
@@ -1473,11 +1457,6 @@
           winterface->execute(&pbi->lf_worker);
         }
       }
-      // After loopfiltering, the last 7 row pixels in each superblock row may
-      // still be changed by the longest loopfilter of the next superblock
-      // row.
-      if (pbi->frame_parallel_decode)
-        vp9_frameworker_broadcast(pbi->cur_buf, mi_row << MI_BLOCK_SIZE_LOG2);
     }
   }
 
@@ -1493,8 +1472,6 @@
   // Get last tile data.
   tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1;
 
-  if (pbi->frame_parallel_decode)
-    vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
   return vpx_reader_find_end(&tile_data->bit_reader);
 }
 
@@ -1793,10 +1770,6 @@
     cm->lf.filter_level = 0;
     cm->show_frame = 1;
 
-    if (pbi->frame_parallel_decode) {
-      for (i = 0; i < REF_FRAMES; ++i)
-        cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
-    }
     return 0;
   }
 
@@ -2088,24 +2061,6 @@
 
   if (cm->lf.filter_level && !cm->skip_loop_filter) {
     vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
-  }
-
-  // If encoded in frame parallel mode, frame context is ready after decoding
-  // the frame header.
-  if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) {
-    VPxWorker *const worker = pbi->frame_worker_owner;
-    FrameWorkerData *const frame_worker_data = worker->data1;
-    if (cm->refresh_frame_context) {
-      context_updated = 1;
-      cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
-    }
-    vp9_frameworker_lock_stats(worker);
-    pbi->cur_buf->row = -1;
-    pbi->cur_buf->col = -1;
-    frame_worker_data->frame_context_ready = 1;
-    // Signal the main thread that context is ready.
-    vp9_frameworker_signal_stats(worker);
-    vp9_frameworker_unlock_stats(worker);
   }
 
   if (pbi->tile_worker_data == NULL ||
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -455,12 +455,6 @@
   }
 }
 
-static void fpm_sync(void *const data, int mi_row) {
-  VP9Decoder *const pbi = (VP9Decoder *)data;
-  vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
-                       mi_row << MI_BLOCK_SIZE_LOG2);
-}
-
 // This macro is used to add a motion vector mv_ref list if it isn't
 // already in the list.  If it's the second motion vector or early_break
 // it will also skip all additional processing and jump to Done!
@@ -500,8 +494,7 @@
                             PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
                             const POSITION *const mv_ref_search,
                             int_mv *mv_ref_list, int mi_row, int mi_col,
-                            int block, int is_sub8x8, find_mv_refs_sync sync,
-                            void *const data) {
+                            int block, int is_sub8x8) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
   int different_ref_found = 0;
@@ -557,23 +550,8 @@
     }
   }
 
-// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
-// on windows platform. The sync here is unnecessary if use_prev_frame_mvs
-// is 0. But after removing it, there will be hang in the unit test on windows
-// due to several threads waiting for a thread's signal.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
-  if (cm->frame_parallel_decode && sync != NULL) {
-    sync(data, mi_row);
-  }
-#endif
-
   // Check the last frame's mode and mv info.
   if (prev_frame_mvs) {
-    // Synchronize here for frame parallel decode if sync function is provided.
-    if (cm->frame_parallel_decode && sync != NULL) {
-      sync(data, mi_row);
-    }
-
     if (prev_frame_mvs->ref_frame[0] == ref_frame) {
       ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
     } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
@@ -652,7 +630,7 @@
 
   refmv_count =
       dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
-                       mv_list, mi_row, mi_col, block, 1, NULL, NULL);
+                       mv_list, mi_row, mi_col, block, 1);
 
   switch (block) {
     case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break;
@@ -750,9 +728,8 @@
         const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
         int refmv_count;
 
-        refmv_count =
-            dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs,
-                             mi_row, mi_col, -1, 0, fpm_sync, (void *)pbi);
+        refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search,
+                                       tmp_mvs, mi_row, mi_col, -1, 0);
 
         dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
                               refmv_count);
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -254,11 +254,9 @@
   pbi->hold_ref_buf = 0;
   cm->frame_to_show = get_frame_new_buffer(cm);
 
-  if (!pbi->frame_parallel_decode || !cm->show_frame) {
-    lock_buffer_pool(pool);
-    --frame_bufs[cm->new_fb_idx].ref_count;
-    unlock_buffer_pool(pool);
-  }
+  lock_buffer_pool(pool);
+  --frame_bufs[cm->new_fb_idx].ref_count;
+  unlock_buffer_pool(pool);
 
   // Invalidate these references until the next frame starts.
   for (ref_index = 0; ref_index < 3; ref_index++)
@@ -292,9 +290,7 @@
   pbi->ready_for_new_data = 0;
 
   // Check if the previous frame was a frame without any references to it.
-  // Release frame buffer if not decoding in frame parallel mode.
-  if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 &&
-      frame_bufs[cm->new_fb_idx].ref_count == 0)
+  if (cm->new_fb_idx >= 0 && frame_bufs[cm->new_fb_idx].ref_count == 0)
     pool->release_fb_cb(pool->cb_priv,
                         &frame_bufs[cm->new_fb_idx].raw_frame_buffer);
   // Find a free frame buffer. Return error if can not find any.
@@ -309,18 +305,7 @@
   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
 
   pbi->hold_ref_buf = 0;
-  if (pbi->frame_parallel_decode) {
-    VPxWorker *const worker = pbi->frame_worker_owner;
-    vp9_frameworker_lock_stats(worker);
-    frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
-    // Reset decoding progress.
-    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
-    pbi->cur_buf->row = -1;
-    pbi->cur_buf->col = -1;
-    vp9_frameworker_unlock_stats(worker);
-  } else {
-    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
-  }
+  pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
 
   if (setjmp(cm->error.jmp)) {
     const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
@@ -377,31 +362,14 @@
   if (!cm->show_existing_frame) {
     cm->last_show_frame = cm->show_frame;
     cm->prev_frame = cm->cur_frame;
-    if (cm->seg.enabled && !pbi->frame_parallel_decode)
-      vp9_swap_current_and_last_seg_map(cm);
+    if (cm->seg.enabled) vp9_swap_current_and_last_seg_map(cm);
   }
 
   // Update progress in frame parallel decode.
-  if (pbi->frame_parallel_decode) {
-    // Need to lock the mutex here as another thread may
-    // be accessing this buffer.
-    VPxWorker *const worker = pbi->frame_worker_owner;
-    FrameWorkerData *const frame_worker_data = worker->data1;
-    vp9_frameworker_lock_stats(worker);
-
-    if (cm->show_frame) {
-      cm->current_video_frame++;
-    }
-    frame_worker_data->frame_decoded = 1;
-    frame_worker_data->frame_context_ready = 1;
-    vp9_frameworker_signal_stats(worker);
-    vp9_frameworker_unlock_stats(worker);
-  } else {
-    cm->last_width = cm->width;
-    cm->last_height = cm->height;
-    if (cm->show_frame) {
-      cm->current_video_frame++;
-    }
+  cm->last_width = cm->width;
+  cm->last_height = cm->height;
+  if (cm->show_frame) {
+    cm->current_video_frame++;
   }
 
   cm->error.setjmp = 0;
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -53,8 +53,6 @@
 
   int refresh_frame_flags;
 
-  int frame_parallel_decode;  // frame-based threading.
-
   // TODO(hkuang): Combine this with cur_buf in macroblockd as they are
   // the same.
   RefCntBuffer *cur_buf;  //  Current decoding frame buffer.
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -347,13 +347,7 @@
     frame_worker_data->pbi->max_threads = ctx->cfg.threads;
 
     frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
-    frame_worker_data->pbi->frame_parallel_decode = 0;
-    frame_worker_data->pbi->common.frame_parallel_decode = 0;
     worker->hook = (VPxWorkerHook)frame_worker_hook;
-    if (!winterface->reset(worker)) {
-      set_error_detail(ctx, "Frame Worker thread creation failed");
-      return VPX_CODEC_MEM_ERROR;
-    }
   }
 
   // If postprocessing was enabled by the application and a