shithub: libvpx

Download patch

ref: 7ac5ac52f917380ca3ed51757efad0b157bb08e9
parent: 0812c121e76d28af6865b9408e2cc7e37ed0390a
author: Jingning Han <jingning@google.com>
date: Tue May 21 17:28:42 EDT 2013

Merge 4x4 block level partition into codebase

Move 4x4/4x8/8x4 partition coding out of experimental list.

This commit fixed the unit test failure issues. It also resolved
the merge conflicts between 4x4 block level partition and iterative
motion search for comp_inter_inter.

Change-Id: I898671f0631f5ddc4f5cc68d4c62ead7de9c5a58

--- a/configure
+++ b/configure
@@ -246,7 +246,6 @@
     multiple_arf
     non420
     alpha
-    ab4x4
 "
 CONFIG_LIST="
     external_build
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -158,7 +158,7 @@
 
 union b_mode_info {
   struct {
-    B_PREDICTION_MODE first;
+    MB_PREDICTION_MODE first;
   } as_mode;
   int_mv as_mv[2];  // first, second inter predictor motion vectors
 };
@@ -174,13 +174,9 @@
 
 static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
   switch (sb_type) {
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB4X8:
-#endif
     case BLOCK_SIZE_AB4X4: return 0;
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB8X4:
-#endif
     case BLOCK_SIZE_SB8X8:
     case BLOCK_SIZE_SB8X16: return 1;
     case BLOCK_SIZE_SB16X8:
@@ -198,13 +194,9 @@
 
 static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
   switch (sb_type) {
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB8X4:
-#endif
     case BLOCK_SIZE_AB4X4: return 0;
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB4X8:
-#endif
     case BLOCK_SIZE_SB8X8:
     case BLOCK_SIZE_SB16X8: return 1;
     case BLOCK_SIZE_SB8X16:
@@ -222,11 +214,9 @@
 
 static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
   int a = b_width_log2(sb_type) - 1;
-#if CONFIG_AB4X4
   // align 4x4 block to mode_info
   if (a < 0)
     a = 0;
-#endif
   assert(a >= 0);
   return a;
 }
@@ -233,10 +223,8 @@
 
 static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
   int a = b_height_log2(sb_type) - 1;
-#if CONFIG_AB4X4
   if (a < 0)
     a = 0;
-#endif
   assert(a >= 0);
   return a;
 }
@@ -413,9 +401,7 @@
   int sb_index;   // index of 32x32 block inside the 64x64 block
   int mb_index;   // index of 16x16 block inside the 32x32 block
   int b_index;    // index of 8x8 block inside the 16x16 block
-#if CONFIG_AB4X4
   int ab_index;   // index of 4x4 block inside the 8x8 block
-#endif
   int q_index;
 
 } MACROBLOCKD;
@@ -435,12 +421,10 @@
     case BLOCK_SIZE_SB8X16:
     case BLOCK_SIZE_SB8X8:
       return &xd->b_index;
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB8X4:
     case BLOCK_SIZE_SB4X8:
     case BLOCK_SIZE_AB4X4:
       return &xd->ab_index;
-#endif
     default:
       assert(0);
       return NULL;
@@ -456,12 +440,6 @@
   int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
   int i;
 
-#if !CONFIG_AB4X4
-  // skip 8x8 block partition
-  if (bsl == 0)
-    return;
-#endif
-
   // update the partition context at the end notes. set partition bits
   // of block sizes larger than the current one to be one, and partition
   // bits of smaller block sizes to be zero.
@@ -508,11 +486,7 @@
   above = (above > 0);
   left  = (left > 0);
 
-#if CONFIG_AB4X4
   return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
-#else
-  return (left * 2 + above) + (bsl - 1) * PARTITION_PLOFFSET;
-#endif
 }
 
 static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
@@ -529,10 +503,8 @@
         subsize = BLOCK_SIZE_SB32X16;
       else if (bsize == BLOCK_SIZE_MB16X16)
         subsize = BLOCK_SIZE_SB16X8;
-#if CONFIG_AB4X4
       else if (bsize == BLOCK_SIZE_SB8X8)
         subsize = BLOCK_SIZE_SB8X4;
-#endif
       else
         assert(0);
       break;
@@ -543,10 +515,8 @@
         subsize = BLOCK_SIZE_SB16X32;
       else if (bsize == BLOCK_SIZE_MB16X16)
         subsize = BLOCK_SIZE_SB8X16;
-#if CONFIG_AB4X4
       else if (bsize == BLOCK_SIZE_SB8X8)
         subsize = BLOCK_SIZE_SB4X8;
-#endif
       else
         assert(0);
       break;
@@ -557,10 +527,8 @@
         subsize = BLOCK_SIZE_MB16X16;
       else if (bsize == BLOCK_SIZE_MB16X16)
         subsize = BLOCK_SIZE_SB8X8;
-#if CONFIG_AB4X4
       else if (bsize == BLOCK_SIZE_SB8X8)
         subsize = BLOCK_SIZE_AB4X4;
-#endif
       else
         assert(0);
       break;
@@ -571,39 +539,39 @@
 }
 
 // convert MB_PREDICTION_MODE to B_PREDICTION_MODE
-static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
+static MB_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
   switch (mode) {
-    case DC_PRED: return B_DC_PRED;
-    case V_PRED: return B_V_PRED;
-    case H_PRED: return B_H_PRED;
-    case TM_PRED: return B_TM_PRED;
-    case D45_PRED: return B_D45_PRED;
-    case D135_PRED: return B_D135_PRED;
-    case D117_PRED: return B_D117_PRED;
-    case D153_PRED: return B_D153_PRED;
-    case D27_PRED: return B_D27_PRED;
-    case D63_PRED: return B_D63_PRED;
+    case DC_PRED: return DC_PRED;
+    case V_PRED: return V_PRED;
+    case H_PRED: return H_PRED;
+    case TM_PRED: return TM_PRED;
+    case D45_PRED: return D45_PRED;
+    case D135_PRED: return D135_PRED;
+    case D117_PRED: return D117_PRED;
+    case D153_PRED: return D153_PRED;
+    case D27_PRED: return D27_PRED;
+    case D63_PRED: return D63_PRED;
     default:
        assert(0);
-       return B_MODE_COUNT;  // Dummy value
+       return MB_MODE_COUNT;  // Dummy value
   }
 }
 
 // transform mapping
-static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
+static TX_TYPE txfm_map(MB_PREDICTION_MODE bmode) {
   switch (bmode) {
-    case B_TM_PRED :
-    case B_D135_PRED :
+    case TM_PRED :
+    case D135_PRED :
       return ADST_ADST;
 
-    case B_V_PRED :
-    case B_D117_PRED :
-    case B_D63_PRED:
+    case V_PRED :
+    case D117_PRED :
+    case D63_PRED:
       return ADST_DCT;
 
-    case B_H_PRED :
-    case B_D153_PRED :
-    case B_D27_PRED :
+    case H_PRED :
+    case D153_PRED :
+    case D27_PRED :
       return DCT_ADST;
 
     default:
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -106,12 +106,10 @@
 const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS]
                                   [PARTITION_TYPES - 1] = {
   // FIXME(jingning,rbultje) put real probabilities here
-#if CONFIG_AB4X4
   {202, 162, 107},
   {16,  2,   169},
   {3,   246,  19},
   {104, 90,  134},
-#endif
   {202, 162, 107},
   {16,  2,   169},
   {3,   246,  19},
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -22,10 +22,8 @@
 
 typedef enum BLOCK_SIZE_TYPE {
   BLOCK_SIZE_AB4X4,
-#if CONFIG_AB4X4
   BLOCK_SIZE_SB4X8,
   BLOCK_SIZE_SB8X4,
-#endif
   BLOCK_SIZE_SB8X8,
   BLOCK_SIZE_SB8X16,
   BLOCK_SIZE_SB16X8,
@@ -48,10 +46,6 @@
 } PARTITION_TYPE;
 
 #define PARTITION_PLOFFSET   4  // number of probability models per block size
-#if CONFIG_AB4X4
 #define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
-#else
-#define NUM_PARTITION_CONTEXTS (3 * PARTITION_PLOFFSET)
-#endif
 
 #endif  // VP9_COMMON_VP9_ENUMS_H_
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -141,7 +141,7 @@
       (cur_mb->bmi + b - 2)->as_mv[0].as_int;
 }
 
-static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
+static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
   // FIXME(rbultje, jingning): temporary hack because jenkins doesn't
   // understand this condition. This will go away soon.
   if (b == 0 || b == 2) {
@@ -160,7 +160,7 @@
   return (cur_mb->bmi + b - 1)->as_mode.first;
 }
 
-static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
+static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
                                           int b, int mi_stride) {
   if (!(b >> 1)) {
     /* On top edge, get from MB above us */
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -467,6 +467,12 @@
 prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
 specialize vp9_sad8x8x8 sse4
 
+prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
+specialize vp9_sad8x4x8
+
+prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
+specialize vp9_sad4x8x8
+
 prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
 specialize vp9_sad4x4x8 sse4
 
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -119,7 +119,6 @@
     m->mbmi.mb_skip_coeff = vp9_read(r, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
 
   // luma mode
-#if CONFIG_AB4X4
   if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
     const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis);
     const MB_PREDICTION_MODE L = xd->left_available ?
@@ -128,26 +127,14 @@
   } else {
      m->mbmi.mode = I4X4_PRED;
   }
-#else
-  m->mbmi.mode = m->mbmi.sb_type > BLOCK_SIZE_SB8X8 ?
-      read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]):
-      read_kf_mb_ymode(r, cm->kf_ymode_prob[cm->kf_ymode_probs_index]);
-#endif
 
   m->mbmi.ref_frame = INTRA_FRAME;
 
-#if CONFIG_AB4X4
   if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
-#else
-  if (m->mbmi.mode == I4X4_PRED) {
-#endif
     int idx, idy;
     int bw = 1 << b_width_log2(m->mbmi.sb_type);
     int bh = 1 << b_height_log2(m->mbmi.sb_type);
 
-#if !CONFIG_AB4X4
-    bw = 1, bh = 1;
-#endif
     for (idy = 0; idy < 2; idy += bh) {
       for (idx = 0; idx < 2; idx += bw) {
         int ib = idy * 2 + idx;
@@ -169,12 +156,7 @@
 
   if (cm->txfm_mode == TX_MODE_SELECT &&
     !(m->mbmi.mb_skip_coeff && m->mbmi.ref_frame != INTRA_FRAME)
-#if CONFIG_AB4X4
-      && m->mbmi.sb_type >= BLOCK_SIZE_SB8X8
-#else
-      && m->mbmi.mode != I4X4_PRED
-#endif
-      ) {
+      && m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
     const int allow_16x16 = m->mbmi.sb_type >= BLOCK_SIZE_MB16X16;
     const int allow_32x32 = m->mbmi.sb_type >= BLOCK_SIZE_SB32X32;
     m->mbmi.txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32);
@@ -186,12 +168,7 @@
              m->mbmi.mode <= TM_PRED) {
     m->mbmi.txfm_size = TX_16X16;
   } else if (cm->txfm_mode >= ALLOW_8X8 &&
-#if CONFIG_AB4X4
-             m->mbmi.sb_type >= BLOCK_SIZE_SB8X8
-#else
-             m->mbmi.mode != I4X4_PRED
-#endif
-             ) {
+             m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
     m->mbmi.txfm_size = TX_8X8;
   } else {
     m->mbmi.txfm_size = TX_4X4;
@@ -631,16 +608,10 @@
       if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
         mbmi->mode = ZEROMV;
       } else {
-#if CONFIG_AB4X4
         if (bsize >= BLOCK_SIZE_SB8X8)
           mbmi->mode = read_sb_mv_ref(r, mv_ref_p);
         else
           mbmi->mode = SPLITMV;
-#else
-        mbmi->mode = bsize > BLOCK_SIZE_SB8X8 ?
-                                   read_sb_mv_ref(r, mv_ref_p)
-                                 : read_mv_ref(r, mv_ref_p);
-#endif
         vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]);
       }
 
@@ -706,9 +677,6 @@
     mbmi->uv_mode = DC_PRED;
     switch (mbmi->mode) {
       case SPLITMV:
-#if !CONFIG_AB4X4
-        bw = 1, bh = 1;
-#endif
         mbmi->need_to_clamp_mvs = 0;
         for (idy = 0; idy < 2; idy += bh) {
           for (idx = 0; idx < 2; idx += bw) {
@@ -848,7 +816,6 @@
     // required for left and above block mv
     mv0->as_int = 0;
 
-#if CONFIG_AB4X4
     if (bsize >= BLOCK_SIZE_SB8X8) {
       mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
       cm->fc.sb_ymode_counts[mbmi->mode]++;
@@ -855,26 +822,10 @@
     } else {
       mbmi->mode = I4X4_PRED;
     }
-#else
-    if (bsize > BLOCK_SIZE_SB8X8) {
-      mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
-      cm->fc.sb_ymode_counts[mbmi->mode]++;
-    } else {
-      mbmi->mode = read_ymode(r, cm->fc.ymode_prob);
-      cm->fc.ymode_counts[mbmi->mode]++;
-    }
-#endif
 
     // If MB mode is I4X4_PRED read the block modes
-#if CONFIG_AB4X4
     if (bsize < BLOCK_SIZE_SB8X8) {
-#else
-    if (mbmi->mode == I4X4_PRED) {
-#endif
       int idx, idy;
-#if !CONFIG_AB4X4
-      bw = 1, bh = 1;
-#endif
       for (idy = 0; idy < 2; idy += bh) {
         for (idx = 0; idx < 2; idx += bw) {
           int ib = idy * 2 + idx, k;
@@ -893,15 +844,9 @@
     cm->fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
   }
 
-#if CONFIG_AB4X4
-  if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
-      bsize >= BLOCK_SIZE_SB8X8) {
-#else
   if (cm->txfm_mode == TX_MODE_SELECT &&
       (mbmi->mb_skip_coeff == 0 || mbmi->ref_frame == INTRA_FRAME) &&
-      ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != I4X4_PRED) ||
-       (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
-#endif
+      bsize >= BLOCK_SIZE_SB8X8) {
     const int allow_16x16 = bsize >= BLOCK_SIZE_MB16X16;
     const int allow_32x32 = bsize >= BLOCK_SIZE_SB32X32;
     mbmi->txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32);
@@ -909,21 +854,9 @@
              cm->txfm_mode >= ALLOW_32X32) {
     mbmi->txfm_size = TX_32X32;
   } else if (cm->txfm_mode >= ALLOW_16X16 &&
-             bsize >= BLOCK_SIZE_MB16X16
-#if !CONFIG_AB4X4
-      && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
-       (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))
-#endif
-       ) {
+             bsize >= BLOCK_SIZE_MB16X16) {
     mbmi->txfm_size = TX_16X16;
-  } else if (cm->txfm_mode >= ALLOW_8X8 &&
-#if CONFIG_AB4X4
-      (bsize >= BLOCK_SIZE_SB8X8))
-#else
-      (!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == I4X4_PRED) &&
-       !(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV)))
-#endif
-  {
+  } else if (cm->txfm_mode >= ALLOW_8X8 && (bsize >= BLOCK_SIZE_SB8X8)) {
     mbmi->txfm_size = TX_8X8;
   } else {
     mbmi->txfm_size = TX_4X4;
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -486,23 +486,17 @@
                            vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
   MACROBLOCKD *const xd = &pbi->mb;
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
     if (xd->ab_index > 0)
       return;
-#endif
   set_offsets(pbi, bsize, mi_row, mi_col);
   vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r);
   set_refs(pbi, mi_row, mi_col);
 
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
-    decode_sb_intra(pbi, xd, mi_row, mi_col, r, bsize);
-#if CONFIG_AB4X4
+    decode_sb_intra(pbi, xd, mi_row, mi_col, r, (bsize < BLOCK_SIZE_SB8X8) ?
+                                     BLOCK_SIZE_SB8X8 : bsize);
   else if (bsize < BLOCK_SIZE_SB8X8)
-#else
-  else if (bsize == BLOCK_SIZE_SB8X8 &&
-      xd->mode_info_context->mbmi.mode == SPLITMV)
-#endif
     decode_atom(pbi, xd, mi_row, mi_col, r, BLOCK_SIZE_SB8X8);
   else
     decode_sb(pbi, xd, mi_row, mi_col, r, bsize);
@@ -522,17 +516,11 @@
   if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols)
     return;
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
     if (xd->ab_index != 0)
       return;
-#endif
 
-#if CONFIG_AB4X4
   if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
-  if (bsize > BLOCK_SIZE_SB8X8) {
-#endif
     int pl;
     // read the partition information
     xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK);
@@ -573,13 +561,8 @@
       assert(0);
   }
   // update partition context
-#if CONFIG_AB4X4
   if (bsize >= BLOCK_SIZE_SB8X8 &&
       (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
-#else
-  if (bsize > BLOCK_SIZE_SB8X8 &&
-      (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) {
-#endif
     set_partition_seg_context(pc, xd, mi_row, mi_col);
     update_partition_context(xd, subsize, bsize);
   }
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -369,12 +369,6 @@
   write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m);
 }
 
-#if !CONFIG_AB4X4
-static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
-  write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m);
-}
-#endif
-
 static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
   write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
 }
@@ -722,27 +716,13 @@
     active_section = 6;
 #endif
 
-#if CONFIG_AB4X4
     if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8)
       write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
-#else
-    if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
-      write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
-    else
-      write_ymode(bc, mode, pc->fc.ymode_prob);
-#endif
 
-#if CONFIG_AB4X4
     if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
-#else
-    if (mode == I4X4_PRED) {
-#endif
       int idx, idy;
       int bw = 1 << b_width_log2(mi->sb_type);
       int bh = 1 << b_height_log2(mi->sb_type);
-#if !CONFIG_AB4X4
-      bw = 1, bh = 1;
-#endif
       for (idy = 0; idy < 2; idy += bh)
         for (idx = 0; idx < 2; idx += bw)
           write_sb_ymode(bc, m->bmi[idy * 2 + idx].as_mode.first,
@@ -761,16 +741,8 @@
 
     // If segment skip is not enabled code the mode.
     if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
-#if CONFIG_AB4X4
       if (mi->sb_type >= BLOCK_SIZE_SB8X8)
         write_sb_mv_ref(bc, mode, mv_ref_p);
-#else
-      if (mi->sb_type > BLOCK_SIZE_SB8X8) {
-        write_sb_mv_ref(bc, mode, mv_ref_p);
-      } else {
-        write_mv_ref(bc, mode, mv_ref_p);
-      }
-#endif
       vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]);
     }
 
@@ -817,9 +789,6 @@
         int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl;
         int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl;
         int idx, idy;
-#if !CONFIG_AB4X4
-        bw = 1, bh = 1;
-#endif
         for (idy = 0; idy < 2; idy += bh) {
           for (idx = 0; idx < 2; idx += bw) {
             j = idy * 2 + idx;
@@ -859,21 +828,9 @@
     }
   }
 
-#if CONFIG_AB4X4
-  if (((rf == INTRA_FRAME && mi->sb_type >= BLOCK_SIZE_SB8X8) ||
-       (rf != INTRA_FRAME && mi->sb_type >= BLOCK_SIZE_SB8X8)) &&
-      pc->txfm_mode == TX_MODE_SELECT &&
-      !(skip_coeff || vp9_segfeature_active(xd, segment_id,
-                                            SEG_LVL_SKIP)))
-#else
-  if (((rf == INTRA_FRAME && mode != I4X4_PRED) ||
-       (rf != INTRA_FRAME && mode != SPLITMV)) &&
-      pc->txfm_mode == TX_MODE_SELECT &&
+  if (mi->sb_type >= BLOCK_SIZE_SB8X8 && pc->txfm_mode == TX_MODE_SELECT &&
       !(rf != INTRA_FRAME &&
-        (skip_coeff || vp9_segfeature_active(xd, segment_id,
-                                            SEG_LVL_SKIP))))
-#endif
-  {
+        (skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
     TX_SIZE sz = mi->txfm_size;
     // FIXME(rbultje) code ternary symbol once all experiments are merged
     vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
@@ -905,7 +862,6 @@
     vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP));
   }
 
-#if CONFIG_AB4X4
   if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
     const B_PREDICTION_MODE A = above_block_mode(m, 0, mis);
     const B_PREDICTION_MODE L = xd->left_available ?
@@ -912,24 +868,11 @@
                                  left_block_mode(m, 0) : DC_PRED;
     write_kf_bmode(bc, ym, c->kf_bmode_prob[A][L]);
   }
-#else
-  if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
-    sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
-  else
-    kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
-#endif
 
-#if CONFIG_AB4X4
   if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
-#else
-  if (ym == I4X4_PRED) {
-#endif
     int idx, idy;
     int bw = 1 << b_width_log2(m->mbmi.sb_type);
     int bh = 1 << b_height_log2(m->mbmi.sb_type);
-#if !CONFIG_AB4X4
-    bw = 1, bh = 1;
-#endif
     for (idy = 0; idy < 2; idy += bh) {
       for (idx = 0; idx < 2; idx += bw) {
         int i = idy * 2 + idx;
@@ -944,14 +887,8 @@
 
   write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
 
-#if CONFIG_AB4X4
   if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT &&
-      !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
-#else
-  if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT &&
-    !(m->mbmi.ref_frame != INTRA_FRAME && (skip_coeff ||
-      vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
-#endif
+      !((skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
     TX_SIZE sz = m->mbmi.txfm_size;
     // FIXME(rbultje) code ternary symbol once all experiments are merged
     vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
@@ -969,11 +906,9 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
-#if CONFIG_AB4X4
   if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8)
     if (xd->ab_index > 0)
       return;
-#endif
   xd->mode_info_context = m;
   set_mi_row_col(&cpi->common, xd, mi_row,
                  1 << mi_height_log2(m->mbmi.sb_type),
@@ -1026,17 +961,11 @@
   else
     assert(0);
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
     if (xd->ab_index > 0)
       return;
-#endif
 
-#if CONFIG_AB4X4
   if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
-  if (bsize > BLOCK_SIZE_SB8X8) {
-#endif
     int pl;
     xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
     xd->above_seg_context = cm->above_seg_context + mi_col;
@@ -1078,13 +1007,8 @@
   }
 
   // update partition context
-#if CONFIG_AB4X4
   if (bsize >= BLOCK_SIZE_SB8X8 &&
       (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
-#else
-  if (bsize > BLOCK_SIZE_SB8X8 &&
-      (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) {
-#endif
     set_partition_seg_context(cm, xd, mi_row, mi_col);
     update_partition_context(xd, subsize, bsize);
   }
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -140,11 +140,9 @@
 
   // TODO(jingning): Need to refactor the structure arrays that buffers the
   // coding mode decisions of each partition type.
-#if CONFIG_AB4X4
   PICK_MODE_CONTEXT ab4x4_context[4][4][4];
   PICK_MODE_CONTEXT sb8x4_context[4][4][4];
   PICK_MODE_CONTEXT sb4x8_context[4][4][4];
-#endif
   PICK_MODE_CONTEXT sb8x8_context[4][4][4];
   PICK_MODE_CONTEXT sb8x16_context[4][4][2];
   PICK_MODE_CONTEXT sb16x8_context[4][4][2];
@@ -158,9 +156,7 @@
   PICK_MODE_CONTEXT sb64_context;
   int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 
-#if CONFIG_AB4X4
   BLOCK_SIZE_TYPE b_partitioning[4][4][4];
-#endif
   BLOCK_SIZE_TYPE mb_partitioning[4][4];
   BLOCK_SIZE_TYPE sb_partitioning[4];
   BLOCK_SIZE_TYPE sb64_partitioning;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -619,11 +619,9 @@
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
     if (xd->ab_index != 0)
       return;
-#endif
 
   set_offsets(cpi, mi_row, mi_col, bsize);
   xd->mode_info_context->mbmi.sb_type = bsize;
@@ -708,7 +706,6 @@
       return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
     case BLOCK_SIZE_SB8X8:
       return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB8X4:
       return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
     case BLOCK_SIZE_SB4X8:
@@ -715,7 +712,6 @@
       return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
     case BLOCK_SIZE_AB4X4:
       return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
-#endif
     default:
       assert(0);
       return NULL;
@@ -732,10 +728,8 @@
       return &x->sb_partitioning[xd->sb_index];
     case BLOCK_SIZE_MB16X16:
       return &x->mb_partitioning[xd->sb_index][xd->mb_index];
-#if CONFIG_AB4X4
     case BLOCK_SIZE_SB8X8:
       return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index];
-#endif
     default:
       assert(0);
       return NULL;
@@ -785,11 +779,9 @@
   if (sub_index != -1)
     *(get_sb_index(xd, bsize)) = sub_index;
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
     if (xd->ab_index > 0)
       return;
-#endif
   set_offsets(cpi, mi_row, mi_col, bsize);
   update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
@@ -816,13 +808,8 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-#if CONFIG_AB4X4
   c1 = BLOCK_SIZE_AB4X4;
-  if (bsize >= BLOCK_SIZE_SB8X8)
-#else
-  if (bsize > BLOCK_SIZE_SB8X8)
-#endif
-  {
+  if (bsize >= BLOCK_SIZE_SB8X8) {
     set_partition_seg_context(cm, xd, mi_row, mi_col);
     pl = partition_plane_context(xd, bsize);
     c1 = *(get_sb_partitioning(x, bsize));
@@ -831,13 +818,8 @@
   bwl = b_width_log2(c1), bhl = b_height_log2(c1);
 
   if (bsl == bwl && bsl == bhl) {
-#if CONFIG_AB4X4
     if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
         cpi->partition_count[pl][PARTITION_NONE]++;
-#else
-    if (output_enabled && bsize > BLOCK_SIZE_SB8X8)
-      cpi->partition_count[pl][PARTITION_NONE]++;
-#endif
     encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
   } else if (bsl == bhl && bsl > bwl) {
     if (output_enabled)
@@ -868,13 +850,8 @@
     }
   }
 
-#if CONFIG_AB4X4
   if (bsize >= BLOCK_SIZE_SB8X8 &&
       (bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) {
-#else
-  if (bsize > BLOCK_SIZE_SB8X8 &&
-      (bsize == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) {
-#endif
     set_partition_seg_context(cm, xd, mi_row, mi_col);
     update_partition_context(xd, c1, bsize);
   }
@@ -900,7 +877,6 @@
   BLOCK_SIZE_TYPE subsize;
   int srate = INT_MAX, sdist = INT_MAX;
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
     if (xd->ab_index != 0) {
       *rate = 0;
@@ -907,7 +883,6 @@
       *dist = 0;
       return;
     }
-#endif
   assert(mi_height_log2(bsize) == mi_width_log2(bsize));
 
   // buffer the above/left context information of the block in search.
@@ -925,11 +900,7 @@
              sizeof(PARTITION_CONTEXT) * ms);
 
   // PARTITION_SPLIT
-#if CONFIG_AB4X4
   if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
-  if (bsize >= BLOCK_SIZE_MB16X16) {
-#endif
     int r4 = 0, d4 = 0;
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     *(get_sb_partitioning(x, bsize)) = subsize;
@@ -951,12 +922,8 @@
     }
     set_partition_seg_context(cm, xd, mi_row, mi_col);
     pl = partition_plane_context(xd, bsize);
-#if CONFIG_AB4X4
     if (r4 < INT_MAX)
       r4 += x->partition_cost[pl][PARTITION_SPLIT];
-#else
-    r4 += x->partition_cost[pl][PARTITION_SPLIT];
-#endif
     assert(r4 >= 0);
     assert(d4 >= 0);
     srate = r4;
@@ -966,11 +933,7 @@
 
   // PARTITION_HORZ
   if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) &&
-#if CONFIG_AB4X4
       (bsize >= BLOCK_SIZE_SB8X8)) {
-#else
-      (bsize >= BLOCK_SIZE_MB16X16)) {
-#endif
     int r2, d2;
     int mb_skip = 0;
     subsize = get_subsize(bsize, PARTITION_HORZ);
@@ -993,12 +956,8 @@
     }
     set_partition_seg_context(cm, xd, mi_row, mi_col);
     pl = partition_plane_context(xd, bsize);
-#if CONFIG_AB4X4
     if (r2 < INT_MAX)
       r2 += x->partition_cost[pl][PARTITION_HORZ];
-#else
-    r2 += x->partition_cost[pl][PARTITION_HORZ];
-#endif
     if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
          RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
       srate = r2;
@@ -1010,11 +969,7 @@
 
   // PARTITION_VERT
   if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) &&
-#if CONFIG_AB4X4
       (bsize >= BLOCK_SIZE_SB8X8)) {
-#else
-      (bsize >= BLOCK_SIZE_MB16X16)) {
-#endif
     int r2, d2;
     int mb_skip = 0;
     subsize = get_subsize(bsize, PARTITION_VERT);
@@ -1036,12 +991,8 @@
     }
     set_partition_seg_context(cm, xd, mi_row, mi_col);
     pl = partition_plane_context(xd, bsize);
-#if CONFIG_AB4X4
     if (r2 < INT_MAX)
       r2 += x->partition_cost[pl][PARTITION_VERT];
-#else
-    r2 += x->partition_cost[pl][PARTITION_VERT];
-#endif
     if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
          RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
       srate = r2;
@@ -1056,11 +1007,7 @@
     int r, d;
     pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
                   get_block_context(x, bsize));
-#if CONFIG_AB4X4
     if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
-    if (bsize >= BLOCK_SIZE_MB16X16) {
-#endif
       set_partition_seg_context(cm, xd, mi_row, mi_col);
       pl = partition_plane_context(xd, bsize);
       r += x->partition_cost[pl][PARTITION_NONE];
@@ -1070,11 +1017,7 @@
         RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
       srate = r;
       sdist = d;
-#if CONFIG_AB4X4
       if (bsize >= BLOCK_SIZE_SB8X8)
-#else
-      if (bsize >= BLOCK_SIZE_MB16X16)
-#endif
         *(get_sb_partitioning(x, bsize)) = bsize;
     }
   }
@@ -1601,11 +1544,7 @@
   const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
   const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
 
-#if CONFIG_AB4X4
   if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
-#else
-  if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8) {
-#endif
     ++cpi->sb_ymode_count[m];
   } else {
     ++cpi->ymode_count[m];
@@ -1615,9 +1554,6 @@
     int idx, idy;
     int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type);
     int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type);
-#if !CONFIG_AB4X4
-    bw = 1, bh = 1;
-#endif
     for (idy = 0; idy < 2; idy += bh) {
       for (idx = 0; idx < 2; idx += bw) {
         int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode.first;
@@ -1698,17 +1634,11 @@
     vp9_update_zbin_extra(cpi, x);
   }
 
-#if CONFIG_AB4X4
-  if (mbmi->ref_frame == INTRA_FRAME &&
-      bsize < BLOCK_SIZE_SB8X8) {
-    vp9_encode_intra_block_y(cm, x, BLOCK_SIZE_SB8X8);
-    vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_SB8X8);
-    vp9_encode_sbuv(cm, x, BLOCK_SIZE_SB8X8);
-#else
-  if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
-    vp9_encode_intra_block_y(cm, x, bsize);
-    vp9_encode_intra_block_uv(cm, x, bsize);
-#endif
+  if (mbmi->ref_frame == INTRA_FRAME) {
+    vp9_encode_intra_block_y(cm, x, (bsize < BLOCK_SIZE_SB8X8) ?
+                                    BLOCK_SIZE_SB8X8 : bsize);
+    vp9_encode_intra_block_uv(cm, x, (bsize < BLOCK_SIZE_SB8X8) ?
+                                     BLOCK_SIZE_SB8X8 : bsize);
     if (output_enabled)
       sum_intra_stats(cpi, x);
   } else {
@@ -1730,12 +1660,7 @@
                                                            : bsize);
   }
 
-#if CONFIG_AB4X4
-  if (mbmi->ref_frame == INTRA_FRAME &&
-      bsize < BLOCK_SIZE_SB8X8) {
-#else
   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
-#endif
     vp9_tokenize_sb(cpi, xd, t, !output_enabled,
                     (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
   } else if (!x->skip) {
@@ -1783,11 +1708,7 @@
           sz = TX_16X16;
         if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
           sz = TX_8X8;
-#if CONFIG_AB4X4
         if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8)
-#else
-        if (sz == TX_8X8 && mbmi->mode == SPLITMV)
-#endif
           sz = TX_4X4;
       } else if (mbmi->mode != I4X4_PRED) {
         sz = mbmi->txfm_size;
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -573,16 +573,9 @@
   int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl;
   int idx, idy;
 
-#if CONFIG_AB4X4
   if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
-#else
-  if (mbmi->mode == SPLITMV) {
-#endif
     int i;
     PARTITION_INFO *pi = x->partition_info;
-#if !CONFIG_AB4X4
-    bw = 1, bh = 1;
-#endif
     for (idy = 0; idy < 2; idy += bh) {
       for (idx = 0; idx < 2; idx += bw) {
         i = idy * 2 + idx;
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -33,18 +33,11 @@
                   x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree);
 
   // TODO(rbultje) separate tables for superblock costing?
-#if CONFIG_AB4X4
   vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.sb_ymode_prob,
                   vp9_sb_ymode_tree);
   vp9_cost_tokens(c->mb.mbmode_cost[0],
                   x->sb_kf_ymode_prob[c->common.kf_ymode_probs_index],
                   vp9_sb_ymode_tree);
-#else
-  vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree);
-  vp9_cost_tokens(c->mb.mbmode_cost[0],
-                  x->kf_ymode_prob[c->common.kf_ymode_probs_index],
-                  vp9_kf_ymode_tree);
-#endif
   vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
                   x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
   vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1639,12 +1639,12 @@
 
   BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4,
       vp9_sub_pixel_avg_variance8x4, NULL, NULL,
-      NULL, NULL, NULL,
+      NULL, NULL, vp9_sad8x4x8,
       vp9_sad8x4x4d)
 
   BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8,
       vp9_sub_pixel_avg_variance4x8, NULL, NULL,
-      NULL, NULL, NULL,
+      NULL, NULL, vp9_sad4x8x8,
       vp9_sad4x8x4d)
 
   BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -582,11 +582,7 @@
                                      int *bestrate, int *bestratey,
                                      int *bestdistortion,
                                      BLOCK_SIZE_TYPE bsize) {
-#if CONFIG_AB4X4
   MB_PREDICTION_MODE mode;
-#else
-  B_PREDICTION_MODE mode;
-#endif
   MACROBLOCKD *xd = &x->e_mbd;
   int64_t best_rd = INT64_MAX;
   int rate = 0;
@@ -606,19 +602,12 @@
   DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
 
   assert(ib < 4);
-#if !CONFIG_AB4X4
-  bw = 1, bh = 1;
-#endif
 
   vpx_memcpy(ta, a, sizeof(ta));
   vpx_memcpy(tl, l, sizeof(tl));
   xd->mode_info_context->mbmi.txfm_size = TX_4X4;
 
-#if CONFIG_AB4X4
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
-#else
-  for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
-#endif
     int64_t this_rd;
     int ratey = 0;
 
@@ -732,11 +721,7 @@
   int bw = 1 << b_width_log2(bsize);
   int bh = 1 << b_height_log2(bsize);
   int idx, idy;
-#if CONFIG_AB4X4
   int cost = 0;
-#else
-  int cost = mb->mbmode_cost[xd->frame_type][I4X4_PRED];
-#endif
   int distortion = 0;
   int tot_rate_y = 0;
   int64_t total_rd = 0;
@@ -749,10 +734,6 @@
   xd->mode_info_context->mbmi.mode = I4X4_PRED;
   bmode_costs = mb->inter_bmode_costs;
 
-#if !CONFIG_AB4X4
-  bw = 1, bh = 1;
-#endif
-
   for (idy = 0; idy < 2; idy += bh) {
     for (idx = 0; idx < 2; idx += bw) {
       MODE_INFO *const mic = xd->mode_info_context;
@@ -812,12 +793,10 @@
   TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
   int i;
 
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8) {
     x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
     return best_rd;
   }
-#endif
 
   for (i = 0; i < NB_TXFM_MODES; i++)
     txfm_cache[i] = INT64_MAX;
@@ -973,11 +952,9 @@
   MB_MODE_INFO * mbmi = &mic->mbmi;
   const int mis = xd->mode_info_stride;
   int i, cost = 0, thismvcost = 0;
-#if CONFIG_AB4X4
   int idx, idy;
   int bw = 1 << b_width_log2(mbmi->sb_type);
   int bh = 1 << b_height_log2(mbmi->sb_type);
-#endif
 
   /* We have to be careful retrieving previously-encoded motion vectors.
    Ones from this macroblock have to be pulled from the BLOCKD array
@@ -1061,7 +1038,6 @@
     x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
     if (mbmi->second_ref_frame > 0)
       x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
-#if CONFIG_AB4X4
     for (idy = 0; idy < bh; ++idy) {
       for (idx = 0; idx < bw; ++idx) {
         vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
@@ -1071,7 +1047,6 @@
                    sizeof(x->partition_info->bmi[i]));
       }
     }
-#endif
   }
 
   cost += thismvcost;
@@ -1092,9 +1067,6 @@
   int bwl = b_width_log2(bsize), bw = 1 << bwl;
   int bhl = b_height_log2(bsize), bh = 1 << bhl;
   int idx, idy;
-#if !CONFIG_AB4X4
-  bw = 1, bh = 1;
-#endif
 
   *labelyrate = 0;
   *distortion = 0;
@@ -1262,18 +1234,10 @@
   ENTROPY_CONTEXT t_above[4], t_left[4];
   ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
 
-#if !CONFIG_AB4X4
-  bh = 1, bw = 1;
-#endif
-
   vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
   vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
 
-#if CONFIG_AB4X4
   v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
-#else
-  v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
-#endif
 
   // 64 makes this threshold really big effectively
   // making it so that we very rarely check mvs on
@@ -1282,19 +1246,13 @@
   label_mv_thresh = 1 * bsi->mvthresh / label_count;
 
   // Segmentation method overheads
-#if !CONFIG_AB4X4
-  rate += vp9_cost_mv_ref(cpi, SPLITMV,
-                          mbmi->mb_mode_context[mbmi->ref_frame]);
-  this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
-  br += rate;
-#endif
   other_segment_rd = this_segment_rd;
 
   for (idy = 0; idy < 2; idy += bh) {
     for (idx = 0; idx < 2; idx += bw) {
       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
-      // loop for 4x4/4x8/8x4 block coding
-#if CONFIG_AB4X4
+      // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
+#if CONFIG_AB4X4 || 1
       int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
       int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
       B_PREDICTION_MODE mode_selected = ZERO4X4;
@@ -2148,6 +2106,8 @@
         if (cpi->sf.comp_inter_joint_serach) {
           const int b_sz[BLOCK_SIZE_TYPES][2] = {
               {4, 4},
+              {4, 8},
+              {8, 4},
               {8, 8},
               {8, 16},
               {16, 8},
@@ -2617,11 +2577,7 @@
                           &dist_uv, &uv_skip,
                           (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
                                                        bsize);
-#if CONFIG_AB4X4
   if (bsize < BLOCK_SIZE_SB8X8)
-#else
-  if (bsize == BLOCK_SIZE_SB8X8)
-#endif
     err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
                                        &rate4x4_y_tokenonly,
                                        &dist4x4_y, err);
@@ -2633,11 +2589,7 @@
     memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
     xd->mode_info_context->mbmi.mode = mode;
     xd->mode_info_context->mbmi.txfm_size = txfm_size;
-#if CONFIG_AB4X4
   } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) {
-#else
-  } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) {
-#endif
     *returnrate = rate4x4_y + rate_uv +
         vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
     *returndist = dist4x4_y + (dist_uv >> 2);
@@ -2802,17 +2754,10 @@
       txfm_cache[i] = INT64_MAX;
 
     // Test best rd so far against threshold for trying this mode.
-#if CONFIG_AB4X4
     if (bsize >= BLOCK_SIZE_SB8X8 &&
         (best_rd < cpi->rd_threshes[mode_index] ||
          cpi->rd_threshes[mode_index] == INT_MAX))
       continue;
-#else
-    if (best_rd <= cpi->rd_threshes[mode_index] ||
-        cpi->rd_threshes[mode_index] == INT_MAX) {
-      continue;
-    }
-#endif
 
     x->skip = 0;
     this_mode = vp9_mode_order[mode_index].mode;
@@ -2823,11 +2768,7 @@
       continue;
     }
 
-#if CONFIG_AB4X4
     if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
-#else
-    if (cpi->speed > 0) {
-#endif
       if (!(ref_frame_mask & (1 << ref_frame))) {
         continue;
       }
@@ -2873,7 +2814,6 @@
     mbmi->interp_filter = cm->mcomp_filter_type;
     vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
 
-#if CONFIG_AB4X4
     if (bsize >= BLOCK_SIZE_SB8X8 &&
         (this_mode == I4X4_PRED || this_mode == SPLITMV))
       continue;
@@ -2880,11 +2820,6 @@
     if (bsize < BLOCK_SIZE_SB8X8 &&
         !(this_mode == I4X4_PRED || this_mode == SPLITMV))
       continue;
-#else
-    if (bsize != BLOCK_SIZE_SB8X8 &&
-        (this_mode == I4X4_PRED || this_mode == SPLITMV))
-      continue;
-#endif
 
     if (comp_pred) {
       if (ref_frame == ALTREF_FRAME) {
@@ -2959,11 +2894,9 @@
       distortion2 += dist_uv[TX_4X4];
       distortion_uv = dist_uv[TX_4X4];
       mbmi->uv_mode = mode_uv[TX_4X4];
-#if CONFIG_AB4X4
       txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
       for (i = 0; i < NB_TXFM_MODES; ++i)
         txfm_cache[i] = txfm_cache[ONLY_4X4];
-#endif
     } else if (ref_frame == INTRA_FRAME) {
       TX_SIZE uv_tx;
       vp9_build_intra_predictors_sby_s(xd, bsize);
@@ -3097,11 +3030,9 @@
       distortion2 += distortion_uv;
       skippable = skippable && uv_skippable;
 
-#if CONFIG_AB4X4
       txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
       for (i = 0; i < NB_TXFM_MODES; ++i)
         txfm_cache[i] = txfm_cache[ONLY_4X4];
-#endif
 
       if (!mode_excluded) {
         if (is_comp_pred)
@@ -3157,11 +3088,7 @@
       // Is Mb level skip allowed (i.e. not coded at segment level).
       mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
 
-#if CONFIG_AB4X4
       if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
-#else
-      if (skippable) {
-#endif
         // Back out the coefficient coding costs
         rate2 -= (rate_y + rate_uv);
         // for best_yrd calculation
@@ -3342,13 +3269,11 @@
     }
   }
 
-#if CONFIG_AB4X4
   if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
     *returnrate = INT_MAX;
     *returndistortion = INT_MAX;
     return best_rd;
   }
-#endif
 
   assert((cm->mcomp_filter_type == SWITCHABLE) ||
          (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
@@ -3381,10 +3306,7 @@
       cpi->is_src_frame_alt_ref &&
       (cpi->oxcf.arnr_max_frames == 0) &&
       (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)
-#if CONFIG_AB4X4
-      && bsize >= BLOCK_SIZE_SB8X8
-#endif
-     ) {
+      && bsize >= BLOCK_SIZE_SB8X8) {
     mbmi->mode = ZEROMV;
     mbmi->ref_frame = ALTREF_FRAME;
     mbmi->second_ref_frame = NONE;
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -593,6 +593,37 @@
                             ref_ptr[3], ref_stride, 0x7fffffff);
 }
 
+void vp9_sad8x4x8_c(const uint8_t *src_ptr,
+                     int  src_stride,
+                     const uint8_t *ref_ptr,
+                     int  ref_stride,
+                     uint32_t *sad_array) {
+  sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr, ref_stride,
+                             0x7fffffff);
+  sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 1, ref_stride,
+                             0x7fffffff);
+  sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 2, ref_stride,
+                             0x7fffffff);
+  sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 3, ref_stride,
+                             0x7fffffff);
+  sad_array[4] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 4, ref_stride,
+                             0x7fffffff);
+  sad_array[5] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 5, ref_stride,
+                             0x7fffffff);
+  sad_array[6] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 6, ref_stride,
+                             0x7fffffff);
+  sad_array[7] = vp9_sad8x4(src_ptr, src_stride,
+                             ref_ptr + 7, ref_stride,
+                             0x7fffffff);
+}
+
 void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
                      int  src_stride,
                      const uint8_t* const ref_ptr[],
@@ -606,6 +637,37 @@
                             ref_ptr[2], ref_stride, 0x7fffffff);
   sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
                             ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad4x8x8_c(const uint8_t *src_ptr,
+                     int  src_stride,
+                     const uint8_t *ref_ptr,
+                     int  ref_stride,
+                     uint32_t *sad_array) {
+  sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr, ref_stride,
+                             0x7fffffff);
+  sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 1, ref_stride,
+                             0x7fffffff);
+  sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 2, ref_stride,
+                             0x7fffffff);
+  sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 3, ref_stride,
+                             0x7fffffff);
+  sad_array[4] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 4, ref_stride,
+                             0x7fffffff);
+  sad_array[5] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 5, ref_stride,
+                             0x7fffffff);
+  sad_array[6] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 6, ref_stride,
+                             0x7fffffff);
+  sad_array[7] = vp9_sad4x8(src_ptr, src_stride,
+                             ref_ptr + 7, ref_stride,
+                             0x7fffffff);
 }
 
 void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -119,12 +119,8 @@
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
   const int eob = xd->plane[plane].eobs[block];
   const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
-#if CONFIG_AB4X4
   const BLOCK_SIZE_TYPE sb_type = (mbmi->sb_type < BLOCK_SIZE_SB8X8) ?
                                    BLOCK_SIZE_SB8X8 : mbmi->sb_type;
-#else
-  const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
-#endif
   const int bwl = b_width_log2(sb_type);
   const int off = block >> (2 * tx_size);
   const int mod = bwl - tx_size - xd->plane[plane].subsampling_x;