shithub: libvpx

Download patch

ref: 7f31bfeddbe4a0309b2a79129c1d526ba5725864
parent: a60dd5c83a48f705db6060bc10d1b4cc5d75c53b
author: James Zern <jzern@google.com>
date: Tue Oct 18 18:46:06 EDT 2016

Revert "vp9_bitstream: Encode tiles in parallel"

This reverts commit 9e8efa5b189a5abf78b1bcbc8076893728129d1e.

this change causes ubsan warnings, failures in
vpxenc_vp9_webm_rt_multithread_tiled

BUG=webm:1309

Change-Id: I020c7be985c771bfff4b3de1afe51cc8edb980da

--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -915,120 +915,6 @@
   }
 }
 
-static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) {
-  MACROBLOCKD *const xd = &data->xd;
-  vpx_start_encode(&data->bit_writer, data->dest);
-  write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
-              &data->bit_writer, &data->tok, data->tok_end,
-              &data->max_mv_magnitude, data->interp_filter_selected);
-  assert(data->tok == data->tok_end);
-  vpx_stop_encode(&data->bit_writer);
-  return 1;
-}
-
-void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) {
-  if (cpi->vp9_bitstream_worker_data) {
-    int i;
-    for (i = 1; i < cpi->num_workers; ++i) {
-      vpx_free(cpi->vp9_bitstream_worker_data[i].dest);
-    }
-    vpx_free(cpi->vp9_bitstream_worker_data);
-    cpi->vp9_bitstream_worker_data = NULL;
-  }
-}
-
-static int encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
-  int i;
-  cpi->vp9_bitstream_worker_data =
-      vpx_calloc(cpi->num_workers, sizeof(*cpi->vp9_bitstream_worker_data));
-  if (!cpi->vp9_bitstream_worker_data) return 1;
-  for (i = 1; i < cpi->num_workers; ++i) {
-    cpi->vp9_bitstream_worker_data[i].dest_size =
-        cpi->oxcf.width * cpi->oxcf.height;
-    cpi->vp9_bitstream_worker_data[i].dest =
-        vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size);
-    if (!cpi->vp9_bitstream_worker_data[i].dest) return 1;
-  }
-  return 0;
-}
-
-static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
-  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
-  VP9_COMMON *const cm = &cpi->common;
-  const int tile_cols = 1 << cm->log2_tile_cols;
-  const int num_workers = cpi->num_workers;
-  size_t total_size = 0;
-  int tile_col = 0;
-
-  if (!cpi->vp9_bitstream_worker_data ||
-      cpi->vp9_bitstream_worker_data[1].dest_size >
-          (cpi->oxcf.width * cpi->oxcf.height)) {
-    vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
-    if (encode_tiles_buffer_alloc(cpi)) return 0;
-  }
-
-  while (tile_col < tile_cols) {
-    int i, j;
-    for (i = 0; i < num_workers && tile_col < tile_cols; ++i) {
-      VPxWorker *const worker = &cpi->workers[i];
-      VP9BitstreamWorkerData *const data = &cpi->vp9_bitstream_worker_data[i];
-
-      // Populate the worker data.
-      data->xd = cpi->td.mb.e_mbd;
-      data->tile_idx = tile_col;
-      data->tok = cpi->tile_tok[0][tile_col];
-      data->tok_end = cpi->tile_tok[0][tile_col] + cpi->tok_count[0][tile_col];
-      data->max_mv_magnitude = cpi->max_mv_magnitude;
-      memset(data->interp_filter_selected, 0,
-             sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE);
-
-      // First thread can directly write into the output buffer.
-      if (i == 0) {
-        data->dest = data_ptr + total_size + 4;
-      }
-      worker->data1 = cpi;
-      worker->data2 = data;
-      worker->hook = (VPxWorkerHook)encode_tile_worker;
-      worker->had_error = 0;
-
-      if (i < num_workers - 1) {
-        winterface->launch(worker);
-      } else {
-        winterface->execute(worker);
-      }
-      ++tile_col;
-    }
-    for (j = 0; j < i; ++j) {
-      VPxWorker *const worker = &cpi->workers[j];
-      VP9BitstreamWorkerData *const data =
-          (VP9BitstreamWorkerData *)worker->data2;
-      uint32_t tile_size;
-      int k;
-
-      if (!winterface->sync(worker)) return 0;
-      tile_size = data->bit_writer.pos;
-
-      // Aggregate per-thread bitstream stats.
-      cpi->max_mv_magnitude =
-          VPXMAX(cpi->max_mv_magnitude, data->max_mv_magnitude);
-      for (k = 0; k < SWITCHABLE; ++k) {
-        cpi->interp_filter_selected[0][k] += data->interp_filter_selected[0][k];
-      }
-
-      // Prefix the size of the tile on all but the last.
-      if (tile_col != tile_cols || j < i - 1) {
-        mem_put_be32(data_ptr + total_size, tile_size);
-        total_size += 4;
-      }
-      if (j > 0) {
-        memcpy(data_ptr + total_size, data->dest, tile_size);
-      }
-      total_size += tile_size;
-    }
-  }
-  return total_size;
-}
-
 static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -1042,14 +928,6 @@
   memset(cm->above_seg_context, 0,
          sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
 
-  // Encoding tiles in parallel is done only for realtime mode now. In other
-  // modes the speed up is insignificant and requires further testing to ensure
-  // that it does not make the overall process worse in any case.
-  if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 &&
-      tile_cols > 1) {
-    return encode_tiles_mt(cpi, data_ptr);
-  }
-
   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
       int tile_idx = tile_row * tile_cols + tile_col;
@@ -1077,6 +955,7 @@
       total_size += residual_bc.pos;
     }
   }
+
   return total_size;
 }
 
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -17,27 +17,9 @@
 
 #include "vp9/encoder/vp9_encoder.h"
 
-typedef struct VP9BitstreamWorkerData {
-  uint8_t *dest;
-  int dest_size;
-  TOKENEXTRA *tok;
-  TOKENEXTRA *tok_end;
-  vpx_writer bit_writer;
-  int tile_idx;
-  unsigned int max_mv_magnitude;
-  // The size of interp_filter_selected in VP9_COMP is actually
-  // MAX_REFERENCE_FRAMES x SWITCHABLE. But when encoding tiles, all we ever do
-  // is increment the very first index (index 0) for the first dimension. Hence
-  // this is sufficient.
-  int interp_filter_selected[1][SWITCHABLE];
-  DECLARE_ALIGNED(16, MACROBLOCKD, xd);
-} VP9BitstreamWorkerData;
-
 int vp9_get_refresh_mask(VP9_COMP *cpi);
 
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
-
-void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi);
 
 static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
   return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2030,10 +2030,7 @@
   vpx_free(cpi->tile_thr_data);
   vpx_free(cpi->workers);
 
-  if (cpi->num_workers > 1) {
-    vp9_loop_filter_dealloc(&cpi->lf_row_sync);
-    vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
-  }
+  if (cpi->num_workers > 1) vp9_loop_filter_dealloc(&cpi->lf_row_sync);
 
   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
 
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -601,7 +601,6 @@
   VPxWorker *workers;
   struct EncWorkerData *tile_thr_data;
   VP9LfSync lf_row_sync;
-  struct VP9BitstreamWorkerData *vp9_bitstream_worker_data;
 
   int keep_level_stats;
   Vp9LevelInfo level_info;