shithub: libvpx

Download patch

ref: 0993bef7e99ed1935dfdc245cb2db34f73f3bc32
parent: 30104207fda5ef23d1c3c3f161c297511524530b
author: Yunqing Wang <yunqingwang@google.com>
date: Tue Nov 25 11:53:47 EST 2014

vp9_ethread: calculate and save the tok starting address for tiles

Each tile's tok starting address is calculated before the encoding
process. These addresses are stored so that the same calculation
won't be done again in packing bit stream.

Change-Id: I0a3be0301f002260c19a850303f2f73ebc47aa50

--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -934,30 +934,22 @@
   VP9_COMMON *const cm = &cpi->common;
   vp9_writer residual_bc;
   int tile_row, tile_col;
-  TOKENEXTRA *tok[4][1 << 6], *tok_end;
+  TOKENEXTRA *tok_end;
   size_t total_size = 0;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
-  TOKENEXTRA *pre_tok = cpi->tok;
-  int tile_tok = 0;
 
   vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
              mi_cols_aligned_to_sb(cm->mi_cols));
 
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      int tile_idx = tile_row * tile_cols + tile_col;
-      tok[tile_row][tile_col] = pre_tok + tile_tok;
-      pre_tok = tok[tile_row][tile_col];
-      tile_tok = allocated_tokens(cpi->tile_data[tile_idx].tile_info);
-    }
-  }
-
   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
       int tile_idx = tile_row * tile_cols + tile_col;
-      tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
+      TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
 
+      tok_end = cpi->tile_tok[tile_row][tile_col] +
+          cpi->tok_count[tile_row][tile_col];
+
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
         vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
       else
@@ -964,8 +956,8 @@
         vp9_start_encode(&residual_bc, data_ptr + total_size);
 
       write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
-                  &residual_bc, &tok[tile_row][tile_col], tok_end);
-      assert(tok[tile_row][tile_col] == tok_end);
+                  &residual_bc, &tok, tok_end);
+      assert(tok == tok_end);
       vp9_stop_encode(&residual_bc);
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
         // size of this tile
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3422,24 +3422,12 @@
          cm->show_frame;
 }
 
-static void tile_data_init(TileDataEnc *tile_data) {
-  int i, j;
-  for (i = 0; i < BLOCK_SIZES; ++i) {
-    for (j = 0; j < MAX_MODES; ++j) {
-      tile_data->thresh_freq_fact[i][j] = 32;
-      tile_data->mode_map[i][j] = j;
-    }
-  }
-}
-
-static void encode_tiles(VP9_COMP *cpi) {
+static void init_tile_data(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
-
   int tile_col, tile_row;
-  TOKENEXTRA *tok[4][1 << 6];
-  TOKENEXTRA *pre_tok = cpi->tok;
+  TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
   int tile_tok = 0;
 
   if (cpi->tile_data == NULL) {
@@ -3446,8 +3434,17 @@
     CHECK_MEM_ERROR(cm, cpi->tile_data,
         vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
-      for (tile_col = 0; tile_col < tile_cols; ++tile_col)
-        tile_data_init(&cpi->tile_data[tile_row * tile_cols + tile_col]);
+      for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+        TileDataEnc *tile_data =
+            &cpi->tile_data[tile_row * tile_cols + tile_col];
+        int i, j;
+        for (i = 0; i < BLOCK_SIZES; ++i) {
+          for (j = 0; j < MAX_MODES; ++j) {
+            tile_data->thresh_freq_fact[i][j] = 32;
+            tile_data->mode_map[i][j] = j;
+          }
+        }
+      }
   }
 
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
@@ -3456,32 +3453,41 @@
           &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
       vp9_tile_init(tile_info, cm, tile_row, tile_col);
 
-      tok[tile_row][tile_col] = pre_tok + tile_tok;
-      pre_tok = tok[tile_row][tile_col];
+      cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
+      pre_tok = cpi->tile_tok[tile_row][tile_col];
       tile_tok = allocated_tokens(*tile_info);
     }
   }
+}
 
+static void encode_tiles(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int tile_cols = 1 << cm->log2_tile_cols;
+  const int tile_rows = 1 << cm->log2_tile_rows;
+  int tile_col, tile_row;
+
+  init_tile_data(cpi);
+
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
       const TileInfo * const tile_info =
           &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
-      TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
+      TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
       int mi_row;
-      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+      TileDataEnc *this_tile =
+          &cpi->tile_data[tile_row * tile_cols + tile_col];
 
       for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
            mi_row += MI_BLOCK_SIZE) {
         if (cpi->sf.use_nonrd_pick_mode)
-          encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row,
-                              &tok[tile_row][tile_col]);
+          encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok);
         else
-          encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row,
-                           &tok[tile_row][tile_col]);
+          encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok);
       }
       cpi->tok_count[tile_row][tile_col] =
-          (unsigned int)(tok[tile_row][tile_col] - old_tok);
-      assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*tile_info));
+          (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
+      assert(tok - cpi->tile_tok[tile_row][tile_col] <=
+          allocated_tokens(*tile_info));
     }
   }
 }
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -247,8 +247,8 @@
   vp9_free_frame_buffer(&cpi->alt_ref_buffer);
   vp9_lookahead_destroy(cpi->lookahead);
 
-  vpx_free(cpi->tok);
-  cpi->tok = 0;
+  vpx_free(cpi->tile_tok[0][0]);
+  cpi->tile_tok[0][0] = 0;
 
   vp9_free_pc_tree(&cpi->td);
 
@@ -543,11 +543,12 @@
 
   vp9_alloc_context_buffers(cm, cm->width, cm->height);
 
-  vpx_free(cpi->tok);
+  vpx_free(cpi->tile_tok[0][0]);
 
   {
     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
-    CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
+    CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
+        vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
   }
 
   vp9_setup_pc_tree(&cpi->common, &cpi->td);
@@ -1800,7 +1801,6 @@
 #endif
 
   dealloc_compressor_data(cpi);
-  vpx_free(cpi->tok);
 
   for (i = 0; i < sizeof(cpi->mbgraph_stats) /
                   sizeof(cpi->mbgraph_stats[0]); ++i) {
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -297,7 +297,7 @@
 
   YV12_BUFFER_CONFIG last_frame_uf;
 
-  TOKENEXTRA *tok;
+  TOKENEXTRA *tile_tok[4][1 << 6];
   unsigned int tok_count[4][1 << 6];
 
   // Ambient reconstruction err target for force key frames