shithub: libvpx

Download patch

ref: ff2b8aa2c9f95a8456306d2a9ce28803eb90b38a
parent: 41a8a95bd1ca23cb8ab4bfbbe2fc1e219ea5243a
author: Jingning Han <jingning@google.com>
date: Tue Apr 23 06:12:18 EDT 2013

Contextual entropy coding of partition syntax

This commit enables selecting probability models for recursive block
partition information syntax, depending on its above/left partition
information, as well as the current block size. These conditional
probability models are reasonably stationary and consistent across
frames, hence the backward adaptive approach is used to maintain and
update the contextual models.

It achieves coding performance gains (on top of enabling rectangular
block sizes):
derf:   0.242%
yt:     0.391%
hd:     0.376%
stdhd:  0.645%

Change-Id: Ie513d9673337f0d27abd65fb566b711d0844ec2e

--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -57,11 +57,12 @@
   vpx_free(oci->above_context);
   vpx_free(oci->mip);
   vpx_free(oci->prev_mip);
+  vpx_free(oci->above_seg_context);
 
   oci->above_context = 0;
   oci->mip = 0;
   oci->prev_mip = 0;
-
+  oci->above_seg_context = 0;
 }
 
 int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
@@ -130,12 +131,23 @@
   oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
 
   oci->above_context =
-    vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * (3 + oci->mb_cols), 1);
+    vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * mb_cols_aligned_to_sb(oci), 1);
 
   if (!oci->above_context) {
     vp9_free_frame_buffers(oci);
     return 1;
   }
+
+  oci->above_seg_context =
+    vpx_calloc(sizeof(PARTITION_CONTEXT) * mb_cols_aligned_to_sb(oci), 1);
+
+  if (!oci->above_seg_context) {
+    vp9_free_frame_buffers(oci);
+    return 1;
+  }
+
+  vp9_update_mode_info_border(oci, oci->mip);
+  vp9_update_mode_info_in_image(oci, oci->mi);
 
   return 0;
 }
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -54,6 +54,8 @@
   ENTROPY_CONTEXT v[2];
 } ENTROPY_CONTEXT_PLANES;
 
+typedef char PARTITION_CONTEXT;
+
 static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
                                            ENTROPY_CONTEXT b) {
   return (a != 0) + (b != 0);
@@ -245,11 +247,6 @@
   return mb_height_log2(sb_type) + 2;
 }
 
-static INLINE int partition_plane(BLOCK_SIZE_TYPE sb_type) {
-  assert(mb_width_log2(sb_type) == mb_height_log2(sb_type));
-  return (mb_width_log2(sb_type) - 1);
-}
-
 typedef struct {
   MB_PREDICTION_MODE mode, uv_mode;
 #if CONFIG_COMP_INTERINTRA_PRED
@@ -377,6 +374,10 @@
   ENTROPY_CONTEXT_PLANES *above_context;
   ENTROPY_CONTEXT_PLANES *left_context;
 
+  // partition contexts
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT *left_seg_context;
+
   /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
   unsigned char segmentation_enabled;
 
@@ -448,6 +449,69 @@
   int q_index;
 
 } MACROBLOCKD;
+
+static INLINE void update_partition_context(MACROBLOCKD *xd,
+                                            BLOCK_SIZE_TYPE sb_type,
+                                            BLOCK_SIZE_TYPE sb_size) {
+  int bsl = mb_width_log2(sb_size), bs = 1 << bsl;
+  int bwl = mb_width_log2(sb_type);
+  int bhl = mb_height_log2(sb_type);
+  int boffset = mb_width_log2(BLOCK_SIZE_SB64X64) - bsl;
+  int i;
+  // skip macroblock partition
+  if (bsl == 0)
+    return;
+
+  // update the partition context at the end notes. set partition bits
+  // of block sizes larger than the current one to be one, and partition
+  // bits of smaller block sizes to be zero.
+  if ((bwl == bsl) && (bhl == bsl)) {
+    for (i = 0; i < bs; i++)
+      xd->left_seg_context[i] = ~(0xf << boffset);
+    for (i = 0; i < bs; i++)
+      xd->above_seg_context[i] = ~(0xf << boffset);
+#if CONFIG_SBSEGMENT
+  } else if ((bwl == bsl) && (bhl < bsl)) {
+    for (i = 0; i < bs; i++)
+      xd->left_seg_context[i] = ~(0xe << boffset);
+    for (i = 0; i < bs; i++)
+      xd->above_seg_context[i] = ~(0xf << boffset);
+  }  else if ((bwl < bsl) && (bhl == bsl)) {
+    for (i = 0; i < bs; i++)
+      xd->left_seg_context[i] = ~(0xf << boffset);
+    for (i = 0; i < bs; i++)
+      xd->above_seg_context[i] = ~(0xe << boffset);
+#endif
+  } else if ((bwl < bsl) && (bhl < bsl)) {
+    for (i = 0; i < bs; i++)
+      xd->left_seg_context[i] = ~(0xe << boffset);
+    for (i = 0; i < bs; i++)
+      xd->above_seg_context[i] = ~(0xe << boffset);
+  } else {
+    assert(0);
+  }
+}
+
+static INLINE int partition_plane_context(MACROBLOCKD *xd,
+                                          BLOCK_SIZE_TYPE sb_type) {
+  int bsl = mb_width_log2(sb_type), bs = 1 << bsl;
+  int above = 0, left = 0, i;
+  int boffset = mb_width_log2(BLOCK_SIZE_SB64X64) - bsl;
+
+  assert(mb_width_log2(sb_type) == mb_height_log2(sb_type));
+  assert(bsl >= 0);
+  assert(boffset >= 0);
+
+  for (i = 0; i < bs; i++)
+    above |= (xd->above_seg_context[i] & (1 << boffset));
+  for (i = 0; i < bs; i++)
+    left |= (xd->left_seg_context[i] & (1 << boffset));
+
+  above = (above > 0);
+  left  = (left > 0);
+
+  return (left * 2 + above) + (bsl - 1) * PARTITION_PLOFFSET;
+}
 
 #define ACTIVE_HT   110                // quantization stepsize threshold
 
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -152,13 +152,22 @@
 const vp9_prob vp9_mbsplit_probs [VP9_NUMMBSPLITS - 1] = { 110, 111, 150};
 
 #if CONFIG_SBSEGMENT
-const vp9_prob vp9_partition_probs[PARTITION_PLANES][PARTITION_TYPES - 1] = {
-  {110, 111, 150},
-  {110, 111, 150},
+const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS]
+                                  [PARTITION_TYPES - 1] = {
+  {202, 162, 107},
+  {16,  2,   169},
+  {3,   246,  19},
+  {104, 90,  134},
+  {183, 70,  109},
+  {30,  14,  162},
+  {67,  208,  22},
+  {4,   17,   5},
 };
 #else
-const vp9_prob vp9_partition_probs[PARTITION_PLANES][PARTITION_TYPES - 1] = {
-  {200}, {200},
+const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS]
+                                  [PARTITION_TYPES - 1] = {
+  {200}, {200}, {200}, {200},
+  {200}, {200}, {200}, {200},
 };
 #endif
 
@@ -660,7 +669,7 @@
                                         interintra_prob, factor);
   }
 #endif
-  for (i = 0; i < PARTITION_PLANES; i++)
+  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
     update_mode_probs(PARTITION_TYPES, vp9_partition_tree,
                       fc->partition_counts[i], fc->pre_partition_prob[i],
                       fc->partition_prob[i], 0);
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -44,6 +44,7 @@
   PARTITION_TYPES
 } PARTITION_TYPE;
 
-#define PARTITION_PLANES 2  // number of probability models
+#define PARTITION_PLOFFSET   4  // number of probability models per block size
+#define NUM_PARTITION_CONTEXTS (2 * PARTITION_PLOFFSET)
 
 #endif  // VP9_COMMON_VP9_ENUMS_H_
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -68,7 +68,7 @@
   vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
   vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
   vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
-  vp9_prob partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1];
+  vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 
   vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
   vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
@@ -90,7 +90,7 @@
   vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1];
   vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
   vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1];
-  vp9_prob pre_partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1];
+  vp9_prob pre_partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
   unsigned int bmode_counts[VP9_NKF_BINTRAMODES];
   unsigned int ymode_counts[VP9_YMODES];   /* interframe intra mode probs */
   unsigned int sb_ymode_counts[VP9_I32X32_MODES];
@@ -98,7 +98,7 @@
   unsigned int i8x8_mode_counts[VP9_I8X8_MODES];   /* interframe intra probs */
   unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS];
   unsigned int mbsplit_counts[VP9_NUMMBSPLITS];
-  unsigned int partition_counts[PARTITION_PLANES][PARTITION_TYPES];
+  unsigned int partition_counts[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 
   vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES];
   vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES];
@@ -251,6 +251,10 @@
   ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
   ENTROPY_CONTEXT_PLANES left_context[4];  /* (up to) 4 contexts "" */
 
+  // partition contexts
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT left_seg_context[4];
+
   /* keyframe block modes are predicted by their above, left neighbors */
 
   vp9_prob kf_bmode_prob[VP9_KF_BINTRAMODES]
@@ -331,6 +335,10 @@
   *idx = new_idx;
 
   buf[new_idx]++;
+}
+
+static int mb_cols_aligned_to_sb(VP9_COMMON *cm) {
+  return (cm->mb_cols + 3) & ~3;
 }
 
 // TODO(debargha): merge the two functions
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -479,7 +479,7 @@
   } else {
     nmv_context *const nmvc = &pbi->common.fc.nmvc;
     MACROBLOCKD *const xd = &pbi->mb;
-    int i, j;
+    int i;
 
     if (cm->mcomp_filter_type == SWITCHABLE)
       read_switchable_interp_probs(pbi, r);
@@ -512,11 +512,6 @@
     if (vp9_read_bit(r))
       for (i = 0; i < VP9_I32X32_MODES - 1; ++i)
         cm->fc.sb_ymode_prob[i] = vp9_read_prob(r);
-
-    for (j = 0; j < PARTITION_PLANES; j++)
-      if (vp9_read_bit(r))
-        for (i = 0; i < PARTITION_TYPES - 1; i++)
-          cm->fc.partition_prob[j][i] = vp9_read_prob(r);
 
     read_nmvprobs(r, nmvc, xd->allow_high_precision_mv);
   }
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -796,6 +796,8 @@
   xd->prev_mode_info_context = cm->prev_mi + mb_idx;
   xd->above_context = cm->above_context + mb_col;
   xd->left_context = cm->left_context + mb_row % 4;
+  xd->above_seg_context = cm->above_seg_context + mb_col;
+  xd->left_seg_context  = cm->left_seg_context + (mb_row & 3);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -865,10 +867,14 @@
     return;
 
   if (bsize > BLOCK_SIZE_MB16X16) {
+    int pl;
     // read the partition information
+    xd->left_seg_context = pc->left_seg_context + (mb_row & 3);
+    xd->above_seg_context = pc->above_seg_context + mb_col;
+    pl = partition_plane_context(xd, bsize);
     partition = treed_read(r, vp9_partition_tree,
-                           pc->fc.partition_prob[bsl - 1]);
-    pc->fc.partition_counts[bsl - 1][partition]++;
+                           pc->fc.partition_prob[pl]);
+    pc->fc.partition_counts[pl][partition]++;
   }
 
   switch (partition) {
@@ -907,6 +913,13 @@
     default:
       assert(0);
   }
+  // update partition context
+  if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
+    return;
+
+  xd->left_seg_context = pc->left_seg_context + (mb_row & 3);
+  xd->above_seg_context = pc->above_seg_context + mb_col;
+  update_partition_context(xd, subsize, bsize);
 }
 
 /* Decode a row of Superblocks (4x4 region of MBs) */
@@ -918,6 +931,7 @@
        mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
     // For a SB there are 2 left contexts, each pertaining to a MB row within
     vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+    vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context));
     for (mb_col = pc->cur_tile_mb_col_start;
          mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
       decode_modes_sb(pbi, mb_row, mb_col, r, BLOCK_SIZE_SB64X64);
@@ -1359,7 +1373,10 @@
   pc->tile_rows    = 1 << pc->log2_tile_rows;
 
   vpx_memset(pc->above_context, 0,
-             sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
+             sizeof(ENTROPY_CONTEXT_PLANES) * mb_cols_aligned_to_sb(pc));
+
+  vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
+                                       mb_cols_aligned_to_sb(pc));
 
   if (pbi->oxcf.inv_tile_order) {
     const int n_cols = pc->tile_columns;
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1176,6 +1176,7 @@
                            int mb_row, int mb_col,
                            BLOCK_SIZE_TYPE bsize) {
   VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
   const int mis = cm->mode_info_stride;
   int bwl, bhl;
 #if CONFIG_SBSEGMENT
@@ -1210,22 +1211,32 @@
   else
     assert(0);
 
-  if (bsize > BLOCK_SIZE_MB16X16)
+  if (bsize > BLOCK_SIZE_MB16X16) {
+    int pl;
+    xd->left_seg_context = cm->left_seg_context + (mb_row & 3);
+    xd->above_seg_context = cm->above_seg_context + mb_col;
+    pl = partition_plane_context(xd, bsize);
     // encode the partition information
-    write_token(bc, vp9_partition_tree, cm->fc.partition_prob[bsl - 1],
+    write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl],
                 vp9_partition_encodings + partition);
+  }
 
   switch (partition) {
     case PARTITION_NONE:
+      subsize = bsize;
       write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
       break;
 #if CONFIG_SBSEGMENT
     case PARTITION_HORZ:
+      subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 :
+                                                BLOCK_SIZE_SB32X16;
       write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
       if ((mb_row + bh) < cm->mb_rows)
         write_modes_b(cpi, m + bh * mis, bc, tok, tok_end, mb_row + bh, mb_col);
       break;
     case PARTITION_VERT:
+      subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 :
+                                                BLOCK_SIZE_SB16X32;
       write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
       if ((mb_col + bw) < cm->mb_cols)
         write_modes_b(cpi, m + bw, bc, tok, tok_end, mb_row, mb_col + bw);
@@ -1249,6 +1260,14 @@
     default:
       assert(0);
   }
+
+  // update partition context
+  if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
+    return;
+
+  xd->left_seg_context = cm->left_seg_context + (mb_row & 3);
+  xd->above_seg_context = cm->above_seg_context + mb_col;
+  update_partition_context(xd, subsize, bsize);
 }
 
 static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
@@ -1259,9 +1278,13 @@
   int mb_row, mb_col;
 
   m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis;
+  vpx_memset(c->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
+                                       mb_cols_aligned_to_sb(c));
+
   for (mb_row = c->cur_tile_mb_row_start;
        mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) {
     m = m_ptr;
+    vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context));
     for (mb_col = c->cur_tile_mb_col_start;
          mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4)
       write_modes_sb(cpi, m, bc, tok, tok_end, mb_row, mb_col,
@@ -2252,14 +2275,6 @@
       }
     }
     update_mbintra_mode_probs(cpi, &header_bc);
-
-    for (i = 0; i < PARTITION_PLANES; i++) {
-      vp9_prob Pnew[PARTITION_TYPES - 1];
-      unsigned int bct[PARTITION_TYPES - 1][2];
-      update_mode(&header_bc, PARTITION_TYPES, vp9_partition_encodings,
-                  vp9_partition_tree, Pnew, pc->fc.partition_prob[i], bct,
-                  (unsigned int *)cpi->partition_count[i]);
-    }
 
     vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc);
   }
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -163,7 +163,7 @@
   PICK_MODE_CONTEXT sb64x32_context[2];
 #endif
   PICK_MODE_CONTEXT sb64_context;
-  int partition_cost[PARTITION_PLANES][PARTITION_TYPES];
+  int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 
   void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
   void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -551,6 +551,10 @@
   xd->above_context = cm->above_context + mb_col;
   xd->left_context  = cm->left_context + (mb_row & 3);
 
+  // partition contexts
+  xd->above_seg_context = cm->above_seg_context + mb_col;
+  xd->left_seg_context  = cm->left_seg_context + (mb_row & 3);
+
   // Activity map pointer
   x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
   x->active_ptr = cpi->active_map + idx_map;
@@ -749,7 +753,12 @@
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_SB32X32;
+  int pl;
 
+  xd->left_seg_context  = cm->left_seg_context + (mb_row & 0x03);
+  xd->above_seg_context = cm->above_seg_context + mb_col;
+  pl = partition_plane_context(xd, bsize);
+
   if (is_sb == BLOCK_SIZE_SB32X32) {
     set_offsets(cpi, mb_row, mb_col, bsize);
     update_state(cpi, &x->sb32_context[xd->sb_index],
@@ -759,7 +768,7 @@
                       output_enabled, mb_row, mb_col, bsize);
     if (output_enabled) {
       update_stats(cpi, mb_row, mb_col);
-      cpi->partition_count[partition_plane(bsize)][PARTITION_NONE]++;
+      cpi->partition_count[pl][PARTITION_NONE]++;
 
       (*tp)->token = EOSB_TOKEN;
       (*tp)++;
@@ -769,7 +778,7 @@
     int i;
 
     if (output_enabled)
-      cpi->partition_count[partition_plane(bsize)][PARTITION_VERT]++;
+      cpi->partition_count[pl][PARTITION_VERT]++;
     for (i = 0; i < 2 && mb_col + i != cm->mb_cols; i++) {
       set_offsets(cpi, mb_row, mb_col + i, BLOCK_SIZE_SB16X32);
       update_state(cpi, &x->sb16x32_context[xd->sb_index][i],
@@ -787,7 +796,7 @@
     int i;
 
     if (output_enabled)
-      cpi->partition_count[partition_plane(bsize)][PARTITION_HORZ]++;
+      cpi->partition_count[pl][PARTITION_HORZ]++;
     for (i = 0; i < 2 && mb_row + i != cm->mb_rows; i++) {
       set_offsets(cpi, mb_row + i, mb_col, BLOCK_SIZE_SB32X16);
       update_state(cpi, &x->sb32x16_context[xd->sb_index][i],
@@ -805,7 +814,7 @@
   } else {
     int i;
     if (output_enabled)
-      cpi->partition_count[partition_plane(bsize)][PARTITION_SPLIT]++;
+      cpi->partition_count[pl][PARTITION_SPLIT]++;
 
     for (i = 0; i < 4; i++) {
       const int x_idx = i & 1, y_idx = i >> 1;
@@ -834,6 +843,10 @@
     }
   }
 
+  xd->above_seg_context = cm->above_seg_context + mb_col;
+  xd->left_seg_context  = cm->left_seg_context + (mb_row & 3);
+  update_partition_context(xd, is_sb, BLOCK_SIZE_SB32X32);
+
   // debug output
 #if DBG_PRNT_SEGMAP
   {
@@ -853,7 +866,12 @@
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_SB64X64;
+  int pl;
 
+  xd->left_seg_context  = cm->left_seg_context + (mb_row & 3);
+  xd->above_seg_context = cm->above_seg_context + mb_col;
+  pl = partition_plane_context(xd, bsize);
+
   if (is_sb[0] == BLOCK_SIZE_SB64X64) {
     set_offsets(cpi, mb_row, mb_col, bsize);
     update_state(cpi, &x->sb64_context, bsize, 1);
@@ -863,12 +881,13 @@
 
     (*tp)->token = EOSB_TOKEN;
     (*tp)++;
-    cpi->partition_count[partition_plane(bsize)][PARTITION_NONE]++;
+
+    cpi->partition_count[pl][PARTITION_NONE]++;
 #if CONFIG_SBSEGMENT
   } else if (is_sb[0] == BLOCK_SIZE_SB32X64) {
     int i;
 
-    cpi->partition_count[partition_plane(bsize)][PARTITION_VERT]++;
+    cpi->partition_count[pl][PARTITION_VERT]++;
     for (i = 0; i < 2 && mb_col + i * 2 != cm->mb_cols; i++) {
       set_offsets(cpi, mb_row, mb_col + i * 2, BLOCK_SIZE_SB32X64);
       update_state(cpi, &x->sb32x64_context[i], BLOCK_SIZE_SB32X64, 1);
@@ -882,7 +901,7 @@
   } else if (is_sb[0] == BLOCK_SIZE_SB64X32) {
     int i;
 
-    cpi->partition_count[partition_plane(bsize)][PARTITION_HORZ]++;
+    cpi->partition_count[pl][PARTITION_HORZ]++;
     for (i = 0; i < 2 && mb_row + i * 2 != cm->mb_rows; i++) {
       set_offsets(cpi, mb_row + i * 2, mb_col, BLOCK_SIZE_SB64X32);
       update_state(cpi, &x->sb64x32_context[i], BLOCK_SIZE_SB64X32, 1);
@@ -896,7 +915,7 @@
 #endif
   } else {
     int i;
-    cpi->partition_count[partition_plane(bsize)][PARTITION_SPLIT]++;
+    cpi->partition_count[pl][PARTITION_SPLIT]++;
     for (i = 0; i < 4; i++) {
       const int x_idx = i & 1, y_idx = i >> 1;
 
@@ -910,6 +929,12 @@
                 is_sb[i]);
     }
   }
+
+  if (is_sb[0] > BLOCK_SIZE_SB32X32) {
+    xd->above_seg_context = cm->above_seg_context + mb_col;
+    xd->left_seg_context  = cm->left_seg_context + (mb_row & 3);
+    update_partition_context(xd, is_sb[0], BLOCK_SIZE_SB64X64);
+  }
 }
 
 static void encode_sb_row(VP9_COMP *cpi,
@@ -919,10 +944,11 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  int mb_col;
+  int mb_col, pl;
 
   // Initialize the left context for the new SB row
   vpx_memset(cm->left_context, 0, sizeof(cm->left_context));
+  vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context));
 
   // Code each SB in the row
   for (mb_col = cm->cur_tile_mb_col_start;
@@ -932,10 +958,13 @@
     int sb64_rate = 0, sb64_dist = 0;
     int sb64_skip = 0;
     ENTROPY_CONTEXT_PLANES l[4], a[4];
+    PARTITION_CONTEXT seg_l[4], seg_a[4];
     TOKENEXTRA *tp_orig = *tp;
 
     memcpy(&a, cm->above_context + mb_col, sizeof(a));
     memcpy(&l, cm->left_context, sizeof(l));
+    memcpy(&seg_a, cm->above_seg_context + mb_col, sizeof(seg_a));
+    memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
     for (i = 0; i < 4; i++) {
       const int x_idx = (i & 1) << 1, y_idx = i & 2;
       int sb32_rate = 0, sb32_dist = 0;
@@ -982,8 +1011,10 @@
       vpx_memcpy(cm->left_context + y_idx, l2, sizeof(l2));
       vpx_memcpy(cm->above_context + mb_col + x_idx, a2, sizeof(a2));
 
-      sb32_rate += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
-                                    [PARTITION_SPLIT];
+      xd->left_seg_context  = cm->left_seg_context + (y_idx & 3);
+      xd->above_seg_context = cm->above_seg_context + mb_col + x_idx;
+      pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
+      sb32_rate += x->partition_cost[pl][PARTITION_SPLIT];
 
       if (cpi->sf.splitmode_breakout) {
         sb32_skip = splitmodes_used;
@@ -1015,8 +1046,10 @@
           d += d2;
         }
 
-        r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
-                              [PARTITION_HORZ];
+        xd->left_seg_context  = cm->left_seg_context + (y_idx & 3);
+        xd->above_seg_context = cm->above_seg_context + mb_col + x_idx;
+        pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
+        r += x->partition_cost[pl][PARTITION_HORZ];
 
         /* is this better than MB coding? */
         if (RDCOST(x->rdmult, x->rddiv, r, d) <
@@ -1054,8 +1087,10 @@
           d += d2;
         }
 
-        r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
-                              [PARTITION_VERT];
+        xd->left_seg_context  = cm->left_seg_context + (y_idx & 3);
+        xd->above_seg_context = cm->above_seg_context + mb_col + x_idx;
+        pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
+        r += x->partition_cost[pl][PARTITION_VERT];
 
         /* is this better than MB coding? */
         if (RDCOST(x->rdmult, x->rddiv, r, d) <
@@ -1078,9 +1113,12 @@
         pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
                       tp, &r, &d, BLOCK_SIZE_SB32X32,
                       &x->sb32_context[xd->sb_index]);
-        r += x->partition_cost[partition_plane(BLOCK_SIZE_SB32X32)]
-                              [PARTITION_NONE];
 
+        xd->left_seg_context  = cm->left_seg_context + (y_idx & 3);
+        xd->above_seg_context = cm->above_seg_context + mb_col + x_idx;
+        pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32);
+        r += x->partition_cost[pl][PARTITION_NONE];
+
         if (RDCOST(x->rdmult, x->rddiv, r, d) <
                 RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) {
           sb32_rate = r;
@@ -1109,9 +1147,13 @@
 
     memcpy(cm->above_context + mb_col, &a, sizeof(a));
     memcpy(cm->left_context, &l, sizeof(l));
+    memcpy(cm->above_seg_context + mb_col, &seg_a, sizeof(seg_a));
+    memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
 
-    sb64_rate += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
-                                  [PARTITION_SPLIT];
+    xd->left_seg_context  = cm->left_seg_context;
+    xd->above_seg_context = cm->above_seg_context + mb_col;
+    pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
+    sb64_rate += x->partition_cost[pl][PARTITION_SPLIT];
 
 #if CONFIG_SBSEGMENT
     // check 64x32
@@ -1137,8 +1179,10 @@
         d += d2;
       }
 
-      r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
-                            [PARTITION_HORZ];
+      xd->left_seg_context  = cm->left_seg_context;
+      xd->above_seg_context = cm->above_seg_context + mb_col;
+      pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
+      r += x->partition_cost[pl][PARTITION_HORZ];
 
       /* is this better than MB coding? */
       if (RDCOST(x->rdmult, x->rddiv, r, d) <
@@ -1175,8 +1219,10 @@
         d += d2;
       }
 
-      r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
-                            [PARTITION_VERT];
+      xd->left_seg_context  = cm->left_seg_context;
+      xd->above_seg_context = cm->above_seg_context + mb_col;
+      pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
+      r += x->partition_cost[pl][PARTITION_VERT];
 
       /* is this better than MB coding? */
       if (RDCOST(x->rdmult, x->rddiv, r, d) <
@@ -1197,9 +1243,12 @@
 
       pick_sb_modes(cpi, mb_row, mb_col, tp, &r, &d,
                     BLOCK_SIZE_SB64X64, &x->sb64_context);
-      r += x->partition_cost[partition_plane(BLOCK_SIZE_SB64X64)]
-                            [PARTITION_NONE];
 
+      xd->left_seg_context  = cm->left_seg_context;
+      xd->above_seg_context = cm->above_seg_context + mb_col;
+      pl = partition_plane_context(xd, BLOCK_SIZE_SB64X64);
+      r += x->partition_cost[pl][PARTITION_NONE];
+
       if (RDCOST(x->rdmult, x->rddiv, r, d) <
               RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
         sb64_rate = r;
@@ -1268,7 +1317,9 @@
 #endif
 
   vpx_memset(cm->above_context, 0,
-             sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
+             sizeof(ENTROPY_CONTEXT_PLANES) * mb_cols_aligned_to_sb(cm));
+  vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) *
+                                       mb_cols_aligned_to_sb(cm));
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -103,7 +103,7 @@
   vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
   vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
   vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
-  vp9_prob partition_prob[PARTITION_PLANES][PARTITION_TYPES - 1];
+  vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 
   vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
                                  [VP9_SWITCHABLE_FILTERS - 1];
@@ -457,7 +457,7 @@
   int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS];
   int mbsplit_count[VP9_NUMMBSPLITS];
   int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES];
-  unsigned int partition_count[PARTITION_PLANES][PARTITION_TYPES];
+  unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 #if CONFIG_COMP_INTERINTRA_PRED
   unsigned int interintra_count[2];
   unsigned int interintra_select_count[2];
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -260,7 +260,7 @@
   fill_token_costs(cpi->mb.token_costs[TX_32X32],
                    cpi->common.fc.coef_probs_32x32, TX_32X32);
 
-  for (i = 0; i < 2; i++)
+  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
     vp9_cost_tokens(cpi->mb.partition_cost[i],
                     cpi->common.fc.partition_prob[i],
                     vp9_partition_tree);