shithub: libvpx

Download patch

ref: 3ffa41aae32bdf2372b144cc7a1e65a921501551
parent: ded951793c5f13d62b608f4f964a4c5f1f633150
parent: 7b9ca3caa78bd08fbc4dac8a05396be6c7240143
author: James Zern <jzern@google.com>
date: Mon Oct 28 08:45:11 EDT 2013

Merge changes If9b16f7d,I75aab21c,I9cbb768c,If5cea3d3,I96940657,I025595d8,Ie0bc3935,I3ebb172d

* changes:
  vp9: remove partition+entropy contexts from common
  vp9: add above/left_context to MACROBLOCKD
  vp9: add above/left_seg_context to MACROBLOCKD
  vp9: add above/left_context to encoder
  vp9: add above/left_seg_context to encoder
  vp9: pass entropy context directly to set_skip_context
  vp9: pass context directly to partition functions
  vp9/decode: add alloc_tile_storage()

--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -41,17 +41,12 @@
 
   vpx_free(cm->mip);
   vpx_free(cm->prev_mip);
-  vpx_free(cm->above_seg_context);
   vpx_free(cm->last_frame_seg_map);
   vpx_free(cm->mi_grid_base);
   vpx_free(cm->prev_mi_grid_base);
 
-  vpx_free(cm->above_context[0]);
-  for (i = 0; i < MAX_MB_PLANE; i++)
-    cm->above_context[i] = 0;
   cm->mip = NULL;
   cm->prev_mip = NULL;
-  cm->above_seg_context = NULL;
   cm->last_frame_seg_map = NULL;
   cm->mi_grid_base = NULL;
   cm->prev_mi_grid_base = NULL;
@@ -85,7 +80,7 @@
 }
 
 int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
-  int i, mi_cols;
+  int i;
 
   const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
@@ -140,21 +135,6 @@
 
   setup_mi(cm);
 
-  // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
-  // information is exposed at this level
-  mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
-
-  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
-  // block where mi unit size is 8x8.
-  cm->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE *
-                                         (2 * mi_cols), 1);
-  if (!cm->above_context[0])
-    goto fail;
-
-  cm->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
-  if (!cm->above_seg_context)
-    goto fail;
-
   // Create the segmentation map structure and set to 0.
   cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
   if (!cm->last_frame_seg_map)
@@ -186,17 +166,11 @@
 }
 
 void vp9_update_frame_size(VP9_COMMON *cm) {
-  int i, mi_cols;
   const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2);
 
   set_mb_mi(cm, aligned_width, aligned_height);
   setup_mi(cm);
-
-  mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
-  for (i = 1; i < MAX_MB_PLANE; i++)
-    cm->above_context[i] =
-        cm->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
 
   // Initialize the previous frame segment map to 0.
   if (cm->last_frame_seg_map)
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -226,6 +226,13 @@
   unsigned char ab_index;   // index of 4x4 block inside the 8x8 block
 
   int q_index;
+
+  /* Y,U,V,(A) */
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
+
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT left_seg_context[8];
 } MACROBLOCKD;
 
 
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -185,14 +185,6 @@
   struct loopfilter lf;
   struct segmentation seg;
 
-  /* Y,U,V */
-  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
-  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
-
-  // partition contexts
-  PARTITION_CONTEXT *above_seg_context;
-  PARTITION_CONTEXT left_seg_context[8];
-
   // Context probabilities for reference frame prediction
   int allow_comp_inter_inter;
   MV_REFERENCE_FRAME comp_fixed_ref;
@@ -254,15 +246,18 @@
   return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
 }
 
-static INLINE void set_skip_context(VP9_COMMON *cm, MACROBLOCKD *xd,
-                                    int mi_row, int mi_col) {
+static INLINE void set_skip_context(
+    MACROBLOCKD *xd,
+    ENTROPY_CONTEXT *above_context[MAX_MB_PLANE],
+    ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16],
+    int mi_row, int mi_col) {
   const int above_idx = mi_col * 2;
   const int left_idx = (mi_row * 2) & 15;
   int i;
   for (i = 0; i < MAX_MB_PLANE; i++) {
     struct macroblockd_plane *const pd = &xd->plane[i];
-    pd->above_context = cm->above_context[i] + (above_idx >> pd->subsampling_x);
-    pd->left_context = cm->left_context[i] + (left_idx >> pd->subsampling_y);
+    pd->above_context = above_context[i] + (above_idx >> pd->subsampling_x);
+    pd->left_context = left_context[i] + (left_idx >> pd->subsampling_y);
   }
 }
 
@@ -313,12 +308,14 @@
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
-static INLINE void update_partition_context(VP9_COMMON *cm,
-                                            int mi_row, int mi_col,
-                                            BLOCK_SIZE sb_type,
-                                            BLOCK_SIZE sb_size) {
-  PARTITION_CONTEXT *above_ctx = cm->above_seg_context + mi_col;
-  PARTITION_CONTEXT *left_ctx = cm->left_seg_context + (mi_row & MI_MASK);
+static INLINE void update_partition_context(
+    PARTITION_CONTEXT *above_seg_context,
+    PARTITION_CONTEXT left_seg_context[8],
+    int mi_row, int mi_col,
+    BLOCK_SIZE sb_type,
+    BLOCK_SIZE sb_size) {
+  PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
+  PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
 
   const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
   const int bwl = b_width_log2(sb_type);
@@ -337,11 +334,13 @@
   vpx_memset(left_ctx, pcvalue[bhl == bsl], bs);
 }
 
-static INLINE int partition_plane_context(const VP9_COMMON *cm,
-                                          int mi_row, int mi_col,
-                                          BLOCK_SIZE sb_type) {
-  const PARTITION_CONTEXT *above_ctx = cm->above_seg_context + mi_col;
-  const PARTITION_CONTEXT *left_ctx = cm->left_seg_context + (mi_row & MI_MASK);
+static INLINE int partition_plane_context(
+    const PARTITION_CONTEXT *above_seg_context,
+    const PARTITION_CONTEXT left_seg_context[8],
+    int mi_row, int mi_col,
+    BLOCK_SIZE sb_type) {
+  const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
 
   int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
   int above = 0, left = 0, i;
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -199,6 +199,43 @@
     xd->plane[i].dequant = cm->uv_dequant[q_index];
 }
 
+// Allocate storage for each tile column.
+// TODO(jzern): when max_threads <= 1 the same storage could be used for each
+// tile.
+static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) {
+  VP9_COMMON *const cm = &pbi->common;
+  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+  int i, tile_col;
+
+  CHECK_MEM_ERROR(cm, pbi->mi_streams,
+                  vpx_realloc(pbi->mi_streams, tile_cols *
+                              sizeof(*pbi->mi_streams)));
+  for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+    vp9_get_tile_col_offsets(cm, tile_col);
+    pbi->mi_streams[tile_col] =
+        &cm->mi[cm->mi_rows * cm->cur_tile_mi_col_start];
+  }
+
+  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
+  // block where mi unit size is 8x8.
+  CHECK_MEM_ERROR(cm, pbi->above_context[0],
+                  vpx_realloc(pbi->above_context[0],
+                              sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
+                              2 * aligned_mi_cols));
+  for (i = 1; i < MAX_MB_PLANE; ++i) {
+    pbi->above_context[i] = pbi->above_context[0] +
+                            i * sizeof(*pbi->above_context[0]) *
+                            2 * aligned_mi_cols;
+  }
+
+  // This is sized based on the entire frame. Each tile operates within its
+  // column bounds.
+  CHECK_MEM_ERROR(cm, pbi->above_seg_context,
+                  vpx_realloc(pbi->above_seg_context,
+                              sizeof(*pbi->above_seg_context) *
+                              aligned_mi_cols));
+}
+
 static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
   MACROBLOCKD* const xd = arg;
@@ -311,7 +348,7 @@
   // cannot be used.
   xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
 
-  set_skip_context(cm, xd, mi_row, mi_col);
+  set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -387,6 +424,7 @@
 static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
                             vp9_reader* r, BLOCK_SIZE bsize, int index) {
   VP9_COMMON *const cm = &pbi->common;
+  MACROBLOCKD *const xd = &pbi->mb;
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
   PARTITION_TYPE partition = PARTITION_NONE;
   BLOCK_SIZE subsize;
@@ -401,7 +439,8 @@
     int pl;
     const int idx = check_bsize_coverage(hbs, cm->mi_rows, cm->mi_cols,
                                          mi_row, mi_col);
-    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+    pl = partition_plane_context(xd->above_seg_context, xd->left_seg_context,
+                                 mi_row, mi_col, bsize);
 
     if (idx == 0)
       partition = treed_read(r, vp9_partition_tree,
@@ -447,7 +486,8 @@
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(cm, mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd->above_seg_context, xd->left_seg_context,
+                             mi_row, mi_col, subsize, bsize);
 }
 
 static void setup_token_decoder(const uint8_t *data,
@@ -690,14 +730,24 @@
   setup_display_size(cm, rb);
 }
 
-static void decode_tile(VP9D_COMP *pbi, vp9_reader *r, int tile_col) {
+static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
+                               int tile_col) {
+  int i;
+  xd->mi_stream = pbi->mi_streams[tile_col];
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    xd->above_context[i] = pbi->above_context[i];
+  }
+  // see note in alloc_tile_storage().
+  xd->above_seg_context = pbi->above_seg_context;
+}
+
+static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
   const int num_threads = pbi->oxcf.max_threads;
   VP9_COMMON *const cm = &pbi->common;
   int mi_row, mi_col;
   MACROBLOCKD *xd = &pbi->mb;
 
-  xd->mi_stream = pbi->mi_streams[tile_col];
-
   if (pbi->do_loopfilter_inline) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
     lf_data->frame_buffer = get_frame_new_buffer(cm);
@@ -711,8 +761,8 @@
   for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end;
        mi_row += MI_BLOCK_SIZE) {
     // For a SB there are 2 left contexts, each pertaining to a MB row within
-    vp9_zero(cm->left_context);
-    vp9_zero(cm->left_seg_context);
+    vp9_zero(xd->left_context);
+    vp9_zero(xd->left_seg_context);
     for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
          mi_col += MI_BLOCK_SIZE)
       decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64, 0);
@@ -768,6 +818,7 @@
   vp9_reader residual_bc;
 
   VP9_COMMON *const cm = &pbi->common;
+  MACROBLOCKD *const xd = &pbi->mb;
 
   const uint8_t *const data_end = pbi->source + pbi->source_sz;
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
@@ -777,11 +828,12 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cm->above_context[0], 0,
-             sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE * (2 * aligned_mi_cols));
+  vpx_memset(pbi->above_context[0], 0,
+             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
+             2 * aligned_mi_cols);
 
-  vpx_memset(cm->above_seg_context, 0,
-             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);
+  vpx_memset(pbi->above_seg_context, 0,
+             sizeof(*pbi->above_seg_context) * aligned_mi_cols);
 
   if (pbi->oxcf.inv_tile_order) {
     const uint8_t *data_ptr2[4][1 << 6];
@@ -811,7 +863,8 @@
         setup_token_decoder(data_ptr2[tile_row][tile_col], data_end,
                             data_end - data_ptr2[tile_row][tile_col],
                             &cm->error, &residual_bc);
-        decode_tile(pbi, &residual_bc, tile_col);
+        setup_tile_context(pbi, xd, tile_col);
+        decode_tile(pbi, &residual_bc);
         if (tile_row == tile_rows - 1 && tile_col == tile_cols - 1)
           bc_bak = residual_bc;
       }
@@ -840,7 +893,8 @@
         }
 
         setup_token_decoder(data, data_end, size, &cm->error, &residual_bc);
-        decode_tile(pbi, &residual_bc, tile_col);
+        setup_tile_context(pbi, xd, tile_col);
+        decode_tile(pbi, &residual_bc);
         data += size;
       }
     }
@@ -1092,7 +1146,6 @@
   const int keyframe = cm->frame_type == KEY_FRAME;
   YV12_BUFFER_CONFIG *new_fb = get_frame_new_buffer(cm);
   const int tile_cols = 1 << cm->log2_tile_cols;
-  int tile_col;
 
   if (!first_partition_size) {
     if (!keyframe) {
@@ -1132,14 +1185,7 @@
   xd->mi_8x8 = cm->mi_grid_visible;
   xd->mode_info_stride = cm->mode_info_stride;
 
-  CHECK_MEM_ERROR(cm, pbi->mi_streams,
-                  vpx_realloc(pbi->mi_streams, tile_cols *
-                              sizeof(*pbi->mi_streams)));
-  for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-    vp9_get_tile_col_offsets(cm, tile_col);
-    pbi->mi_streams[tile_col] =
-        &cm->mi[cm->mi_rows * cm->cur_tile_mi_col_start];
-  }
+  alloc_tile_storage(pbi, tile_cols);
 
   cm->fc = cm->frame_contexts[cm->frame_context_idx];
 
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -156,6 +156,8 @@
   vp9_worker_end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
   vpx_free(pbi->mi_streams);
+  vpx_free(pbi->above_context[0]);
+  vpx_free(pbi->above_seg_context);
   vpx_free(pbi);
 }
 
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -43,6 +43,9 @@
   /* Each tile column has its own MODE_INFO stream. This array indexes them by
      tile column index. */
   MODE_INFO **mi_streams;
+
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  PARTITION_CONTEXT *above_seg_context;
 } VP9D_COMP;
 
 #endif  // VP9_DECODER_VP9_ONYXD_INT_H_
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -618,7 +618,8 @@
     int pl;
     const int idx = check_bsize_coverage(bs, cm->mi_rows, cm->mi_cols,
                                          mi_row, mi_col);
-    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+    pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+                                 mi_row, mi_col, bsize);
     // encode the partition information
     if (idx == 0)
       write_token(bc, vp9_partition_tree,
@@ -661,7 +662,8 @@
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(cm, mi_row, mi_col, subsize, bsize);
+    update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
+                             mi_row, mi_col, subsize, bsize);
 }
 
 static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
@@ -677,7 +679,7 @@
   for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end;
        mi_row += 8, mi_8x8 += 8 * mis) {
     m_8x8 = mi_8x8;
-    vp9_zero(cm->left_seg_context);
+    vp9_zero(cpi->left_seg_context);
     for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
          mi_col += MI_BLOCK_SIZE, m_8x8 += MI_BLOCK_SIZE) {
       write_modes_sb(cpi, m_8x8, bc, tok, tok_end, mi_row, mi_col,
@@ -1201,7 +1203,7 @@
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
 
-  vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
+  vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) *
              mi_cols_aligned_to_sb(cm->mi_cols));
 
   tok[0][0] = cpi->tok;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -499,7 +499,7 @@
   const int idx_map = mb_row * cm->mb_cols + mb_col;
   const struct segmentation *const seg = &cm->seg;
 
-  set_skip_context(cm, xd, mi_row, mi_col);
+  set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
 
   // Activity map pointer
   x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
@@ -711,7 +711,6 @@
                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
                             PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
                             BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   int p;
@@ -721,21 +720,21 @@
   int mi_height = num_8x8_blocks_high_lookup[bsize];
   for (p = 0; p < MAX_MB_PLANE; p++) {
     vpx_memcpy(
-        cm->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
+        cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
         a + num_4x4_blocks_wide * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     vpx_memcpy(
-        cm->left_context[p]
+        cpi->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         l + num_4x4_blocks_high * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
   }
-  vpx_memcpy(cm->above_seg_context + mi_col, sa,
-             sizeof(PARTITION_CONTEXT) * mi_width);
-  vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
-             sizeof(PARTITION_CONTEXT) * mi_height);
+  vpx_memcpy(cpi->above_seg_context + mi_col, sa,
+             sizeof(*cpi->above_seg_context) * mi_width);
+  vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl,
+             sizeof(cpi->left_seg_context[0]) * mi_height);
 }
 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
@@ -742,7 +741,6 @@
                          ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
                          PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
                          BLOCK_SIZE bsize) {
-  const VP9_COMMON *const cm = &cpi->common;
   const MACROBLOCK *const x = &cpi->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
   int p;
@@ -755,20 +753,20 @@
   for (p = 0; p < MAX_MB_PLANE; ++p) {
     vpx_memcpy(
         a + num_4x4_blocks_wide * p,
-        cm->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
+        cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     vpx_memcpy(
         l + num_4x4_blocks_high * p,
-        cm->left_context[p]
+        cpi->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
   }
-  vpx_memcpy(sa, cm->above_seg_context + mi_col,
-             sizeof(PARTITION_CONTEXT) * mi_width);
-  vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
-             sizeof(PARTITION_CONTEXT) * mi_height);
+  vpx_memcpy(sa, cpi->above_seg_context + mi_col,
+             sizeof(*cpi->above_seg_context) * mi_width);
+  vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK),
+             sizeof(cpi->left_seg_context[0]) * mi_height);
 }
 
 static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
@@ -818,7 +816,8 @@
 
   c1 = BLOCK_4X4;
   if (bsize >= BLOCK_8X8) {
-    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+    pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+                                 mi_row, mi_col, bsize);
     c1 = *(get_sb_partitioning(x, bsize));
   }
   partition = partition_lookup[bsl][c1];
@@ -861,7 +860,8 @@
   }
 
   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
-    update_partition_context(cm, mi_row, mi_col, c1, bsize);
+    update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
+                             mi_row, mi_col, c1, bsize);
 }
 
 // Check to see if the given partition size is allowed for a specified number
@@ -1052,7 +1052,9 @@
       pick_sb_modes(cpi, mi_row, mi_col, &none_rate, &none_dist, bsize,
                     get_block_context(x, bsize), INT64_MAX);
 
-      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+      pl = partition_plane_context(cpi->above_seg_context,
+                                   cpi->left_seg_context,
+                                   mi_row, mi_col, bsize);
       none_rate += x->partition_cost[pl][PARTITION_NONE];
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -1143,7 +1145,8 @@
       assert(0);
   }
 
-  pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+  pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+                               mi_row, mi_col, bsize);
   if (last_part_rate < INT_MAX)
     last_part_rate += x->partition_cost[pl][partition];
 
@@ -1193,10 +1196,13 @@
 
       split_rate += rt;
       split_dist += dt;
-      pl = partition_plane_context(cm, mi_row + y_idx, mi_col + x_idx, bsize);
+      pl = partition_plane_context(cpi->above_seg_context,
+                                   cpi->left_seg_context,
+                                   mi_row + y_idx, mi_col + x_idx, bsize);
       split_rate += x->partition_cost[pl][PARTITION_NONE];
     }
-    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+    pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+                                 mi_row, mi_col, bsize);
     if (split_rate < INT_MAX) {
       split_rate += x->partition_cost[pl][PARTITION_SPLIT];
 
@@ -1525,7 +1531,9 @@
                   get_block_context(x, bsize), best_rd);
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
-        pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+        pl = partition_plane_context(cpi->above_seg_context,
+                                     cpi->left_seg_context,
+                                     mi_row, mi_col, bsize);
         this_rate += x->partition_cost[pl][PARTITION_NONE];
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
@@ -1585,7 +1593,9 @@
       }
     }
     if (sum_rd < best_rd && i == 4) {
-      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+      pl = partition_plane_context(cpi->above_seg_context,
+                                   cpi->left_seg_context,
+                                   mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1641,7 +1651,9 @@
       }
     }
     if (sum_rd < best_rd) {
-      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+      pl = partition_plane_context(cpi->above_seg_context,
+                                   cpi->left_seg_context,
+                                   mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_HORZ];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1683,7 +1695,9 @@
       }
     }
     if (sum_rd < best_rd) {
-      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
+      pl = partition_plane_context(cpi->above_seg_context,
+                                   cpi->left_seg_context,
+                                   mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_VERT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1734,7 +1748,8 @@
     cpi->set_ref_frame_mask = 1;
     pick_sb_modes(cpi, mi_row, mi_col, &r, &d, BLOCK_64X64,
                   get_block_context(x, BLOCK_64X64), INT64_MAX);
-    pl = partition_plane_context(cm, mi_row, mi_col, BLOCK_64X64);
+    pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+                                 mi_row, mi_col, BLOCK_64X64);
     r += x->partition_cost[pl][PARTITION_NONE];
 
     *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64;
@@ -1750,8 +1765,8 @@
   int mi_col;
 
   // Initialize the left context for the new SB row
-  vpx_memset(&cm->left_context, 0, sizeof(cm->left_context));
-  vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context));
+  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
+  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
 
   // Code each SB in the row
   for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
@@ -1856,10 +1871,11 @@
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cm->above_context[0], 0,
-             sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * aligned_mi_cols);
-  vpx_memset(cm->above_seg_context, 0,
-             sizeof(PARTITION_CONTEXT) * aligned_mi_cols);
+  vpx_memset(cpi->above_context[0], 0,
+             sizeof(*cpi->above_context[0]) *
+             2 * aligned_mi_cols * MAX_MB_PLANE);
+  vpx_memset(cpi->above_seg_context, 0,
+             sizeof(*cpi->above_seg_context) * aligned_mi_cols);
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -312,6 +312,12 @@
   cpi->mb_activity_map = 0;
   vpx_free(cpi->mb_norm_activity_map);
   cpi->mb_norm_activity_map = 0;
+
+  vpx_free(cpi->above_context[0]);
+  cpi->above_context[0] = NULL;
+
+  vpx_free(cpi->above_seg_context);
+  cpi->above_seg_context = NULL;
 }
 
 // Computes a q delta (in "q index" terms) to get from a starting q value
@@ -1040,6 +1046,19 @@
   CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map,
                   vpx_calloc(sizeof(unsigned int),
                              cm->mb_rows * cm->mb_cols));
+
+  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
+  // block where mi unit size is 8x8.
+  vpx_free(cpi->above_context[0]);
+  CHECK_MEM_ERROR(cm, cpi->above_context[0],
+                  vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+                             MAX_MB_PLANE,
+                             sizeof(*cpi->above_context[0])));
+
+  vpx_free(cpi->above_seg_context);
+  CHECK_MEM_ERROR(cm, cpi->above_seg_context,
+                  vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
+                             sizeof(*cpi->above_seg_context)));
 }
 
 
@@ -1070,6 +1089,15 @@
       vp9_init3smotion_compensation(&cpi->mb, y_stride);
     } else if (cpi->sf.search_method == DIAMOND) {
       vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+    }
+  }
+
+  {
+    int i;
+    for (i = 1; i < MAX_MB_PLANE; ++i) {
+      cpi->above_context[i] = cpi->above_context[0] +
+                              i * sizeof(*cpi->above_context[0]) * 2 *
+                              mi_cols_aligned_to_sb(cm->mi_cols);
     }
   }
 }
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -675,6 +675,13 @@
   // Debug / test stats
   int64_t mode_test_hits[BLOCK_SIZES];
 #endif
+
+  /* Y,U,V,(A) */
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
+
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT left_seg_context[8];
 } VP9_COMP;
 
 static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {