shithub: libvpx

Download patch

ref: 9daf3154db8e5225ebc69fb120389ff9b2a9dd3f
parent: 78ecbc98e4df70e94e8f2029c8d2d65f643e6f74
author: Adrian Grange <agrange@google.com>
date: Fri Apr 6 12:38:34 EDT 2012

Superblock encoding order

This is the first patch to add superblock (32x32) coding
order capabilities. It does not yet do any mode selection
at the SB level, that will follow in a further patch.

This patch encodes rows of SBs rather than
MBs, each SB contains 2x2 MBs.

Two intra prediction modes have been disabled since they
require reconstructed data for the above-right MB which
may not have been encoded yet (e.g. for the bottom right
MB in each SB).

Results on the one test clip I have tried (720p GIPS clip)
suggest that it is somewhere around 0.2dB worse than the
baseline version, so there may be bugs.

It has been tested with no experiments enabled and with
the following 3 experiments enabled:
  --enable-enhanced_interp
  --enable-high_precision_mv
  --enable-sixteenth_subpel_uv
in each case the decode buffer matches the recon buffer
(using "cmp" to compare the dumped/decoded frames).
Note: Testing these experiments individually created
errors.

Some problems were found with other experiments but it
is unclear what state these experiments are in:
  --enable-comp_intra_pred
  --enable-newentropy
  --enable-uvintra

This code has not been extensively tested yet, so there
is every likelihood that further bugs remain. I also
intend to do some code cleanup & refactoring in tandem
with the next patch that adds the 32x32 modes.

Change-Id: I1eba7f740a70b3510df58db53464535ef881b4d9

--- a/configure
+++ b/configure
@@ -224,6 +224,7 @@
     sixteenth_subpel_uv
     comp_intra_pred
     newentropy
+    superblocks
 "
 CONFIG_LIST="
     external_build
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -337,6 +337,9 @@
 #if CONFIG_RUNTIME_CPU_DETECT
     struct VP8_COMMON_RTCD  *rtcd;
 #endif
+
+    int mb_index;   // Index of the MB in the SB (0..3)
+
 } MACROBLOCKD;
 
 
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -81,7 +81,7 @@
         else
             cnt[CNT_INTRA] += 2;
     }
-    /* Process above left or the one frome last frame */
+    /* Process above left or the one from last frame */
     if ( aboveleft->mbmi.ref_frame != INTRA_FRAME||
          (lf_here->mbmi.ref_frame==LAST_FRAME && refframe == LAST_FRAME))
     {
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -666,7 +666,6 @@
     const int mis = pbi->common.mode_info_stride;
     MACROBLOCKD *const xd  = & pbi->mb;
 
-    int index = mb_row * pbi->common.mb_cols + mb_col;
     int_mv *const mv = & mbmi->mv;
     int mb_to_left_edge;
     int mb_to_right_edge;
@@ -1017,91 +1016,75 @@
 
 void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 {
-    MODE_INFO *mi = pbi->common.mi;
+    int i;
+    VP8_COMMON *cm = &pbi->common;
+    MODE_INFO *mi = cm->mi;
+    int sb_row, sb_col;
+    int sb_rows = (cm->mb_rows + 1)>>1;
+    int sb_cols = (cm->mb_cols + 1)>>1;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
 
-    MODE_INFO *prev_mi = pbi->common.prev_mi;
+    MODE_INFO *prev_mi = cm->prev_mi;
 
-    int mb_row = -1;
-
-#if 0
-    FILE *statsfile;
-    statsfile = fopen("decsegmap.stt", "a");
-    fprintf(statsfile, "\n" );
-#endif
-
     mb_mode_mv_init(pbi);
 
 #if CONFIG_QIMODE
-    if(pbi->common.frame_type==KEY_FRAME && !pbi->common.kf_ymode_probs_update)
+    if(cm->frame_type==KEY_FRAME && !cm->kf_ymode_probs_update)
     {
-        pbi->common.kf_ymode_probs_index = vp8_read_literal(&pbi->bc, 3);
+        cm->kf_ymode_probs_index = vp8_read_literal(&pbi->bc, 3);
     }
 #endif
 
-    while (++mb_row < pbi->common.mb_rows)
+    for (sb_row=0; sb_row<sb_rows; sb_row++)
     {
-        int mb_col = -1;
-        int mb_to_top_edge;
-        int mb_to_bottom_edge;
+        int mb_col = 0;
+        int mb_row = (sb_row <<1);
 
-        pbi->mb.mb_to_top_edge =
-        mb_to_top_edge = -((mb_row * 16)) << 3;
-        mb_to_top_edge -= LEFT_TOP_MARGIN;
+        for (sb_col=0; sb_col<sb_cols; sb_col++)
+        {
+            for ( i=0; i<4; i++ )
+            {
+                int mb_to_top_edge;
+                int mb_to_bottom_edge;
 
-        pbi->mb.mb_to_bottom_edge =
-        mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
-        mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * cm->mode_info_stride + dx;
 
-#if 0
-        fprintf(statsfile, "\n" );
-#endif
+                if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+                {
+                    /* next macroblock */
+                    mb_row += dy;
+                    mb_col += dx;
+                    mi += offset_extended;
+                    prev_mi += offset_extended;
+                    continue;
+                }
 
-        while (++mb_col < pbi->common.mb_cols)
-        {
-            /*read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
-            if(pbi->common.frame_type == KEY_FRAME)
-            {
-                //printf("<%d %d> \n", mb_row, mb_col);
-                vp8_kfread_modes(pbi, mi, mb_row, mb_col);
-            }
-            else
-            {
-                read_mb_modes_mv(pbi, mi, &mi->mbmi,
-                prev_mi,
-                mb_row, mb_col);
-            }
+                pbi->mb.mb_to_top_edge = mb_to_top_edge = -((mb_row * 16)) << 3;
+                                         mb_to_top_edge -= LEFT_TOP_MARGIN;
 
-            //printf("%3d", mi->mbmi.mode);
+                pbi->mb.mb_to_bottom_edge =
+                mb_to_bottom_edge =
+                        ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+                mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
 
-            /*
-            if(pbi->common.current_video_frame==7)
-            {
-                FILE *fmode=fopen("kfmode.txt", "a");
-                fprintf(fmode, "%3d:%3d:%d\n",mb_row, mb_col, mi->mbmi.mode);
-                fclose(fmode);
+                if(cm->frame_type == KEY_FRAME)
+                    vp8_kfread_modes(pbi, mi, mb_row, mb_col);
+                else
+                    read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row,
+                                     mb_col);
 
-            }*/
-            /*
-            if(mi->mbmi.mode==I8X8_PRED)
-            {
-                printf("F%3d:%d:%d\n", pbi->common.current_video_frame, mb_row, mb_col);
+                /* next macroblock */
+                mb_row += dy;
+                mb_col += dx;
+                mi += offset_extended;
+                prev_mi += offset_extended;
             }
-            */
-#if 0
-            fprintf(statsfile, "%2d%2d%2d   ",
-                mi->mbmi.segment_id, mi->mbmi.ref_frame, mi->mbmi.mode );
-#endif
-            prev_mi++;
-            mi++;       /* next macroblock */
         }
-       // printf("\n");
-        prev_mi++;
-        mi++;           /* skip left predictor each row */
-    }
 
-#if 0
-    fclose(statsfile);
-#endif
-
-
+        mi += cm->mode_info_stride + (1 - (cm->mb_cols & 0x1));
+        prev_mi += cm->mode_info_stride + (1 - (cm->mb_cols & 0x1));
+    }
 }
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -240,8 +240,6 @@
         }
 #endif
 
-
-
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
@@ -256,10 +254,14 @@
                 RECON_INVOKE(&pbi->common.rtcd.recon,
                     build_intra_predictors_mby)(xd);
             }
+#if 0
+            // Intra-modes requiring recon data from top-right
+            // MB have been temporarily disabled.
             else
             {
                 vp8_intra_prediction_down_copy(xd);
             }
+#endif
         }
     }
     else
@@ -431,7 +433,6 @@
                 (xd->qcoeff+16*16, xd->block[16].dequant,
                 xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
                 xd->dst.uv_stride, xd->eobs+16);
-
 }
 
 
@@ -459,102 +460,139 @@
 FILE *vpxlog = 0;
 #endif
 
+/* Decode a row of Superblocks (2x2 region of MBs) */
 static void
-decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
+decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd)
 {
+    int i;
+    int sb_col;
+    int mb_row, mb_col;
     int recon_yoffset, recon_uvoffset;
-    int mb_col;
     int ref_fb_idx = pc->lst_fb_idx;
     int dst_fb_idx = pc->new_fb_idx;
     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+    int sb_cols = (pc->mb_cols + 1)>>1;
+    ENTROPY_CONTEXT_PLANES left_context[2];
 
-    vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
-    recon_yoffset = mb_row * recon_y_stride * 16;
-    recon_uvoffset = mb_row * recon_uv_stride * 8;
-    /* reset above block coeffs */
+    // For a SB there are 2 left contexts, each pertaining to a MB row within
+    vpx_memset(left_context, 0, sizeof(left_context));
 
-    xd->above_context = pc->above_context;
-    xd->up_available = (mb_row != 0);
+    mb_row = mbrow;
+    mb_col = 0;
 
-    xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-    xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
-    for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+    for (sb_col=0; sb_col<sb_cols; sb_col++)
     {
-        /* Distance of Mb to the various image edges.
-         * These are specified to 8th pel as they are always compared to values
-         * that are in 1/8th pel units
-         */
-        xd->mb_to_left_edge = -((mb_col * 16) << 3);
-        xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+        // Process the 4 MBs within the SB in the order:
+        // top-left, top-right, bottom-left, bottom-right
+        for ( i=0; i<4; i++ )
+        {
+            int dy = row_delta[i];
+            int dx = col_delta[i];
+            int offset_extended = dy * xd->mode_info_stride + dx;
 
-        update_blockd_bmi(xd);
+            if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols))
+            {
+                // MB lies outside frame, skip on to next
+                mb_row += dy;
+                mb_col += dx;
+                xd->mode_info_context += offset_extended;
+                continue;
+            }
 
-        xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
-        xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
-        xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+#ifdef DEC_DEBUG
+            dec_debug = (pc->current_video_frame==0 && mb_row==0 && mb_col==0);
+#endif
+            // Copy in the appropriate left context for this MB row
+            vpx_memcpy (&pc->left_context,
+                        &left_context[i>>1],
+                        sizeof(ENTROPY_CONTEXT_PLANES));
 
-        xd->left_available = (mb_col != 0);
+            // Set above context pointer
+            xd->above_context = pc->above_context + mb_col;
 
-        /* Select the appropriate reference frame for this MB */
-        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
-            ref_fb_idx = pc->lst_fb_idx;
-        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
-            ref_fb_idx = pc->gld_fb_idx;
-        else
-            ref_fb_idx = pc->alt_fb_idx;
+            /* Distance of Mb to the various image edges.
+             * These are specified to 8th pel as they are always compared to
+             * values that are in 1/8th pel units
+             */
+            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
 
-        xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
-        xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
-        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+            xd->mb_to_left_edge = -((mb_col * 16) << 3);
+            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
-        if (xd->mode_info_context->mbmi.second_ref_frame)
-        {
-            int second_ref_fb_idx;
+            xd->up_available = (mb_row != 0);
+            xd->left_available = (mb_col != 0);
 
+            update_blockd_bmi(xd);
+
+            recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+            recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+            xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+            xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+            xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
             /* Select the appropriate reference frame for this MB */
-            if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
-                second_ref_fb_idx = pc->lst_fb_idx;
-            else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
-                second_ref_fb_idx = pc->gld_fb_idx;
+            if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+                ref_fb_idx = pc->lst_fb_idx;
+            else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+                ref_fb_idx = pc->gld_fb_idx;
             else
-                second_ref_fb_idx = pc->alt_fb_idx;
+                ref_fb_idx = pc->alt_fb_idx;
 
-            xd->second_pre.y_buffer = pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
-            xd->second_pre.u_buffer = pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
-            xd->second_pre.v_buffer = pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
-        }
+            xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer +recon_yoffset;
+            xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer +recon_uvoffset;
+            xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer +recon_uvoffset;
 
-        if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
-        {
-            /* propagate errors from reference frames */
-            xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
-        }
+            if (xd->mode_info_context->mbmi.second_ref_frame)
+            {
+                int second_ref_fb_idx;
 
-#ifdef DEC_DEBUG
-        dec_debug = (pc->current_video_frame==1 && mb_row==4 && mb_col==0);
-#endif
-        decode_macroblock(pbi, xd, mb_row * pc->mb_cols  + mb_col);
+                /* Select the appropriate reference frame for this MB */
+                if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+                    second_ref_fb_idx = pc->lst_fb_idx;
+                else if (xd->mode_info_context->mbmi.second_ref_frame ==
+                                                                   GOLDEN_FRAME)
+                    second_ref_fb_idx = pc->gld_fb_idx;
+                else
+                    second_ref_fb_idx = pc->alt_fb_idx;
 
-        /* check if the boolean decoder has suffered an error */
-        xd->corrupted |= vp8dx_bool_error(xd->current_bc);
-        recon_yoffset += 16;
-        recon_uvoffset += 8;
+                xd->second_pre.y_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
+                xd->second_pre.u_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
+                xd->second_pre.v_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+            }
 
-        ++xd->mode_info_context;  /* next mb */
+            if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
+            {
+                /* propagate errors from reference frames */
+                xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
+            }
 
-        xd->above_context++;
+            decode_macroblock(pbi, xd, mb_row * pc->mb_cols + mb_col);
 
-    }
+            /* check if the boolean decoder has suffered an error */
+            xd->corrupted |= vp8dx_bool_error(xd->current_bc);
 
-    /* adjust to the next row of mbs */
-    vp8_extend_mb_row(
-        &pc->yv12_fb[dst_fb_idx],
-        xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
-    );
+            // Store the modified left context for the MB row locally
+            vpx_memcpy (&left_context[i>>1],
+                        &pc->left_context,
+                        sizeof(ENTROPY_CONTEXT_PLANES));
 
-    ++xd->mode_info_context;      /* skip prediction column */
+            // skip to next MB
+            xd->mode_info_context += offset_extended;
+            mb_row += dy;
+            mb_col += dx;
+        }
+    }
+
+    /* skip prediction column */
+    xd->mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride;
 }
 
 static unsigned int read_partition_size(const unsigned char *cx_size)
@@ -797,8 +835,6 @@
 
             if (Width != pc->Width  ||  Height != pc->Height)
             {
-                int prev_mb_rows = pc->mb_rows;
-
                 if (pc->Width <= 0)
                 {
                     pc->Width = Width;
@@ -1228,10 +1264,10 @@
     // Resset the macroblock mode info context to the start of the list
     xd->mode_info_context = pc->mi;
 
-    /* Decode a row of macro blocks */
-    for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
+    /* Decode a row of superblocks */
+    for (mb_row = 0; mb_row < pc->mb_rows; mb_row+=2)
     {
-        decode_mb_row(pbi, pc, mb_row, xd);
+        decode_sb_row(pbi, pc, mb_row, xd);
     }
     corrupt_tokens |= xd->corrupted;
 
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -477,7 +477,8 @@
     }
 
 #if CONFIG_DEBUG
-    vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+    if(cm->show_frame)
+        vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
 #endif
 
     vp8_clear_system_state();
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -577,6 +577,7 @@
 
 static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 {
+    int i;
     VP8_COMMON *const pc = & cpi->common;
     vp8_writer *const w = & cpi->bc;
     const MV_CONTEXT *mvc = pc->fc.mvc;
@@ -584,16 +585,12 @@
     const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp;
 #endif
     MACROBLOCKD *xd = &cpi->mb.e_mbd;
+    MODE_INFO *m;
+    MODE_INFO *prev_m;
 
-    int i;
-    int pred_context;
-
-
-    MODE_INFO *m = pc->mi;
-    MODE_INFO *prev_m = pc->prev_mi;
-
     const int mis = pc->mode_info_stride;
-    int mb_row = -1;
+    int mb_row, mb_col;
+    int row, col;
 
 #if CONFIG_NEWENTROPY
     int prob_skip_false[3] = {0, 0, 0};
@@ -605,6 +602,9 @@
     vp8_prob pred_prob;
     unsigned char prediction_flag;
 
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+
     cpi->mb.partition_info = cpi->mb.pi;
 
     // Update the probabilities used to encode reference frame data
@@ -624,7 +624,7 @@
             if ( (cpi->skip_false_count[k] + cpi->skip_true_count[k]) )
             {
                 prob_skip_false[k] = cpi->skip_false_count[k] * 256 /
-                                  (cpi->skip_false_count[k] + cpi->skip_true_count[k]);
+                (cpi->skip_false_count[k] + cpi->skip_true_count[k]);
 
                 if (prob_skip_false[k] <= 1)
                     prob_skip_false[k] = 1;
@@ -701,265 +701,310 @@
 #endif
     vp8_write_mvprobs(cpi);
 
-    while (++mb_row < pc->mb_rows)
+    mb_row = 0;
+    for (row=0; row < pc->mb_rows; row += 2)
     {
-        int mb_col = -1;
+        m = pc->mi + row * mis;
+        prev_m = pc->prev_mi + row * mis;
 
-        while (++mb_col < pc->mb_cols)
+        mb_col = 0;
+        for (col=0; col < pc->mb_cols; col += 2)
         {
-            const MB_MODE_INFO *const mi = & m->mbmi;
-            const MV_REFERENCE_FRAME rf = mi->ref_frame;
-            const MB_PREDICTION_MODE mode = mi->mode;
-            const int segment_id = mi->segment_id;
+            int i;
 
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
-            xd->mb_to_left_edge = -((mb_col * 16) << 3);
-            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+            // Process the 4 MBs in the order:
+            // top-left, top-right, bottom-left, bottom-right
+            for (i=0; i<4; i++)
+            {
+                const MB_MODE_INFO *const mi = & m->mbmi;
+                const MV_REFERENCE_FRAME rf = mi->ref_frame;
+                const MB_PREDICTION_MODE mode = mi->mode;
+                const int segment_id = mi->segment_id;
 
-            // Make sure the MacroBlockD mode info pointer is set correctly
-            xd->mode_info_context = m;
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * mis + dx;
 
-            xd->prev_mode_info_context = prev_m;
+                if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols))
+                {
+                    // MB lies outside frame, move on
+                    mb_row += dy;
+                    mb_col += dx;
+                    m += offset_extended;
+                    prev_m += offset_extended;
+                    cpi->mb.partition_info += offset_extended;
+                    continue;
+                }
 
+                // Distance of Mb to the various image edges.
+                // These specified to 8th pel as they are always compared to MV
+                // values that are in 1/8th pel units
+                xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+                xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+                xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+                // Make sure the MacroBlockD mode info pointer is set correctly
+                xd->mode_info_context = m;
+
+                xd->prev_mode_info_context = prev_m;
+
 #ifdef ENTROPY_STATS
-            active_section = 9;
+                active_section = 9;
 #endif
 
-            if (cpi->mb.e_mbd.update_mb_segmentation_map)
-            {
-                // Is temporal coding of the segment map enabled
-                if (pc->temporal_update)
+                if (cpi->mb.e_mbd.update_mb_segmentation_map)
                 {
-                    prediction_flag =
-                        get_pred_flag( xd, PRED_SEG_ID );
-                    pred_prob =
-                        get_pred_prob( pc, xd, PRED_SEG_ID);
+                    // Is temporal coding of the segment map enabled
+                    if (pc->temporal_update)
+                    {
+                        prediction_flag = get_pred_flag( xd, PRED_SEG_ID );
+                        pred_prob = get_pred_prob( pc, xd, PRED_SEG_ID);
 
-                    // Code the segment id prediction flag for this mb
-                    vp8_write( w, prediction_flag, pred_prob );
+                        // Code the segment id prediction flag for this mb
+                        vp8_write( w, prediction_flag, pred_prob );
 
-                    // If the mbs segment id was not predicted code explicitly
-                    if (!prediction_flag)
+                        // If the mb segment id wasn't predicted code explicitly
+                        if (!prediction_flag)
+                            write_mb_segid(w, mi, &cpi->mb.e_mbd);
+                    }
+                    else
+                    {
+                        // Normal unpredicted coding
                         write_mb_segid(w, mi, &cpi->mb.e_mbd);
+                    }
                 }
-                else
-                {
-                    // Normal undpredicted coding
-                    write_mb_segid(w, mi, &cpi->mb.e_mbd);
-                }
-            }
 
-            if ( pc->mb_no_coeff_skip &&
-                 ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
-                   ( get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0 ) ) )
-            {
+                if ( pc->mb_no_coeff_skip &&
+                     ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
+                       ( get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0 ) ) )
+                {
 #if CONFIG_NEWENTROPY
-                vp8_encode_bool(w, mi->mb_skip_coeff,
-                                get_pred_prob(pc, xd, PRED_MBSKIP));
+                    vp8_encode_bool(w, mi->mb_skip_coeff,
+                                    get_pred_prob(pc, xd, PRED_MBSKIP));
 #else
-                vp8_encode_bool(w, mi->mb_skip_coeff, prob_skip_false);
+                    vp8_encode_bool(w, mi->mb_skip_coeff, prob_skip_false);
 #endif
-            }
+                }
 
-            // Encode the reference frame.
-            encode_ref_frame( w, pc, xd,
-                              segment_id, rf );
+                // Encode the reference frame.
+                encode_ref_frame( w, pc, xd, segment_id, rf );
 
-            if (rf == INTRA_FRAME)
-            {
+                if (rf == INTRA_FRAME)
+                {
 #ifdef ENTROPY_STATS
-                active_section = 6;
+                    active_section = 6;
 #endif
 
-                if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
-                    write_ymode(w, mode, pc->fc.ymode_prob);
+                    if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
+                        write_ymode(w, mode, pc->fc.ymode_prob);
 
-                if (mode == B_PRED)
-                {
-                    int j = 0;
+                    if (mode == B_PRED)
+                    {
+                        int j = 0;
 #if CONFIG_COMP_INTRA_PRED
-                    int uses_second = m->bmi[0].as_mode.second != (B_PREDICTION_MODE) (B_DC_PRED - 1);
-                    vp8_write(w, uses_second, 128);
+                        int uses_second =
+                                m->bmi[0].as_mode.second !=
+                                        (B_PREDICTION_MODE) (B_DC_PRED - 1);
+                        vp8_write(w, uses_second, 128);
 #endif
-                    do {
+                        do {
 #if CONFIG_COMP_INTRA_PRED
-                        B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second;
+                            B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second;
 #endif
-                        write_bmode(w, m->bmi[j].as_mode.first, pc->fc.bmode_prob);
+                            write_bmode(w, m->bmi[j].as_mode.first,
+                                        pc->fc.bmode_prob);
 #if CONFIG_COMP_INTRA_PRED
-                        if (uses_second)
-                        {
-                            write_bmode(w, mode2, pc->fc.bmode_prob);
-                        }
+                            if (uses_second)
+                            {
+                                write_bmode(w, mode2, pc->fc.bmode_prob);
+                            }
 #endif
-                    } while (++j < 16);
-                }
-                if(mode == I8X8_PRED)
-                {
-                    write_i8x8_mode(w, m->bmi[0].as_mode.first, pc->i8x8_mode_prob);
-                    write_i8x8_mode(w, m->bmi[2].as_mode.first, pc->i8x8_mode_prob);
-                    write_i8x8_mode(w, m->bmi[8].as_mode.first, pc->i8x8_mode_prob);
-                    write_i8x8_mode(w, m->bmi[10].as_mode.first, pc->i8x8_mode_prob);
-                }
-                else
-                {
+                        } while (++j < 16);
+                    }
+                    if(mode == I8X8_PRED)
+                    {
+                        write_i8x8_mode(w, m->bmi[0].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[2].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[8].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[10].as_mode.first,
+                                        pc->i8x8_mode_prob);
+                    }
+                    else
+                    {
 #if CONFIG_UVINTRA
-                    write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob[mode]);
+                        write_uv_mode(w, mi->uv_mode,
+                                      pc->fc.uv_mode_prob[mode]);
 #ifdef MODE_STATS
-                    if(mode!=B_PRED)
-                        ++cpi->y_uv_mode_count[mode][mi->uv_mode];
+                        if(mode!=B_PRED)
+                            ++cpi->y_uv_mode_count[mode][mi->uv_mode];
 #endif
 
 #else
-                    write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob);
+                        write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob);
 #endif /*CONFIG_UVINTRA*/
 
+                    }
                 }
-            }
-            else
-            {
-                int_mv best_mv;
-                int ct[4];
+                else
+                {
+                    int_mv best_mv;
+                    int ct[4];
 
-                vp8_prob mv_ref_p [VP8_MVREFS-1];
+                    vp8_prob mv_ref_p [VP8_MVREFS-1];
 
-                {
-                    int_mv n1, n2;
+                    {
+                        int_mv n1, n2;
 
-                    vp8_find_near_mvs(xd, m,
-                        prev_m,
-                        &n1, &n2, &best_mv, ct, rf, cpi->common.ref_frame_sign_bias);
-                    vp8_mv_ref_probs(&cpi->common, mv_ref_p, ct);
+                        vp8_find_near_mvs(xd, m, prev_m, &n1, &n2, &best_mv, ct,
+                                          rf, cpi->common.ref_frame_sign_bias);
+                        vp8_mv_ref_probs(&cpi->common, mv_ref_p, ct);
 
 
 #ifdef ENTROPY_STATS
-                    accum_mv_refs(mode, ct);
+                        accum_mv_refs(mode, ct);
 #endif
-                }
+                    }
 
 #ifdef ENTROPY_STATS
-                active_section = 3;
+                    active_section = 3;
 #endif
 
-                // Is the segment coding of mode enabled
-                if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
-                {
-                    write_mv_ref(w, mode, mv_ref_p);
-                    vp8_accum_mv_refs(&cpi->common, mode, ct);
-                }
+                    // Is the segment coding of mode enabled
+                    if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
+                    {
+                        write_mv_ref(w, mode, mv_ref_p);
+                        vp8_accum_mv_refs(&cpi->common, mode, ct);
+                    }
 
-                {
-                    switch (mode)   /* new, split require MVs */
                     {
-                    case NEWMV:
+                        switch (mode)   /* new, split require MVs */
+                        {
+                        case NEWMV:
 #ifdef ENTROPY_STATS
-                        active_section = 5;
+                            active_section = 5;
 #endif
 
 #if CONFIG_HIGH_PRECISION_MV
-                        if (xd->allow_high_precision_mv)
-                            write_mv_hp(w, &mi->mv.as_mv, &best_mv, mvc_hp);
-                        else
+                            if (xd->allow_high_precision_mv)
+                                write_mv_hp(w, &mi->mv.as_mv, &best_mv, mvc_hp);
+                            else
 #endif
-                        write_mv(w, &mi->mv.as_mv, &best_mv, mvc);
+                            write_mv(w, &mi->mv.as_mv, &best_mv, mvc);
 
-                        if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
-                        {
-                            vp8_write(w, mi->second_ref_frame != INTRA_FRAME,
-                                      get_pred_prob( pc, xd, PRED_COMP ) );
-                        }
-                        if (mi->second_ref_frame)
-                        {
-                            const int second_rf = mi->second_ref_frame;
-                            int_mv n1, n2;
-                            int ct[4];
-                            vp8_find_near_mvs(xd, m,
+                            if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
+                            {
+                                vp8_write(w,
+                                          mi->second_ref_frame != INTRA_FRAME,
+                                          get_pred_prob( pc, xd, PRED_COMP ) );
+                            }
+                            if (mi->second_ref_frame)
+                            {
+                                const int second_rf = mi->second_ref_frame;
+                                int_mv n1, n2;
+                                int ct[4];
+                                vp8_find_near_mvs(xd, m,
                                               prev_m,
                                               &n1, &n2, &best_mv,
                                               ct, second_rf,
                                               cpi->common.ref_frame_sign_bias);
 #if CONFIG_HIGH_PRECISION_MV
-                            if (xd->allow_high_precision_mv)
-                                write_mv_hp(w, &mi->second_mv.as_mv, &best_mv, mvc_hp);
-                            else
+                                if (xd->allow_high_precision_mv)
+                                    write_mv_hp(w, &mi->second_mv.as_mv,
+                                                &best_mv, mvc_hp);
+                                else
 #endif
-                            write_mv(w, &mi->second_mv.as_mv, &best_mv, mvc);
-                        }
-                        break;
-                    case SPLITMV:
-                    {
-                        int j = 0;
+                                write_mv(w, &mi->second_mv.as_mv, &best_mv,
+                                         mvc);
+                            }
+                            break;
+                        case SPLITMV:
+                        {
+                            int j = 0;
 
 #ifdef MODE_STATS
-                        ++count_mb_seg [mi->partitioning];
+                            ++count_mb_seg [mi->partitioning];
 #endif
 
-                        write_split(w, mi->partitioning);
+                            write_split(w, mi->partitioning);
 
-                        do
-                        {
-                            B_PREDICTION_MODE blockmode;
-                            int_mv blockmv;
-                            const int *const  L = vp8_mbsplits [mi->partitioning];
-                            int k = -1;  /* first block in subset j */
-                            int mv_contz;
-                            int_mv leftmv, abovemv;
+                            do
+                            {
+                                B_PREDICTION_MODE blockmode;
+                                int_mv blockmv;
+                                const int *const  L =
+                                        vp8_mbsplits [mi->partitioning];
+                                int k = -1;  /* first block in subset j */
+                                int mv_contz;
+                                int_mv leftmv, abovemv;
 
-                            blockmode =  cpi->mb.partition_info->bmi[j].mode;
-                            blockmv =  cpi->mb.partition_info->bmi[j].mv;
+                                blockmode = cpi->mb.partition_info->bmi[j].mode;
+                                blockmv = cpi->mb.partition_info->bmi[j].mv;
 #if CONFIG_DEBUG
-                            while (j != L[++k])
-                                if (k >= 16)
-                                    assert(0);
+                                while (j != L[++k])
+                                    if (k >= 16)
+                                        assert(0);
 #else
-                            while (j != L[++k]);
+                                while (j != L[++k]);
 #endif
-                            leftmv.as_int = left_block_mv(m, k);
-                            abovemv.as_int = above_block_mv(m, k, mis);
-                            mv_contz = vp8_mv_cont(&leftmv, &abovemv);
+                                leftmv.as_int = left_block_mv(m, k);
+                                abovemv.as_int = above_block_mv(m, k, mis);
+                                mv_contz = vp8_mv_cont(&leftmv, &abovemv);
 
-                            write_sub_mv_ref(w, blockmode, vp8_sub_mv_ref_prob2 [mv_contz]);
+                                write_sub_mv_ref(w, blockmode,
+                                               vp8_sub_mv_ref_prob2 [mv_contz]);
 
-                            if (blockmode == NEW4X4)
-                            {
+                                if (blockmode == NEW4X4)
+                                {
 #ifdef ENTROPY_STATS
-                                active_section = 11;
+                                    active_section = 11;
 #endif
 #if CONFIG_HIGH_PRECISION_MV
-                                if (xd->allow_high_precision_mv)
-                                    write_mv_hp(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT_HP *) mvc_hp);
-                                else
+                                    if (xd->allow_high_precision_mv)
+                                        write_mv_hp(w, &blockmv.as_mv, &best_mv,
+                                                (const MV_CONTEXT_HP *) mvc_hp);
+                                    else
 #endif
-                                write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
+                                    write_mv(w, &blockmv.as_mv, &best_mv,
+                                             (const MV_CONTEXT *) mvc);
+                                }
                             }
+                            while (++j < cpi->mb.partition_info->count);
                         }
-                        while (++j < cpi->mb.partition_info->count);
-                    }
-                    break;
-                    default:
-                        if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
-                        {
-                            vp8_write(w, mi->second_ref_frame != INTRA_FRAME,
-                                      get_pred_prob( pc, xd, PRED_COMP ) );
-                        }
                         break;
+                        default:
+                            if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
+                            {
+                                vp8_write(w,
+                                          mi->second_ref_frame != INTRA_FRAME,
+                                          get_pred_prob( pc, xd, PRED_COMP ) );
+                            }
+                            break;
+                        }
                     }
                 }
-            }
 
-            ++m;
-            ++prev_m;
-            assert((prev_m-cpi->common.prev_mip)==(m-cpi->common.mip));
-            assert((prev_m-cpi->common.prev_mi)==(m-cpi->common.mi));
-            cpi->mb.partition_info++;
+                // Next MB
+                mb_row += dy;
+                mb_col += dx;
+                m += offset_extended;
+                prev_m += offset_extended;
+                cpi->mb.partition_info += offset_extended;
+#if CONFIG_DEBUG
+                assert((prev_m-cpi->common.prev_mip)==(m-cpi->common.mip));
+                assert((prev_m-cpi->common.prev_mi)==(m-cpi->common.mi));
+#endif
+            }
         }
 
-        ++m;  /* skip L prediction border */
-        ++prev_m;
-        cpi->mb.partition_info++;
+        // Next SB
+        mb_row += 2;
+        m += mis + (1 - (pc->mb_cols & 0x1));
+        prev_m += mis + (1 - (pc->mb_cols & 0x1));
+        cpi->mb.partition_info += mis + (1 - (pc->mb_cols & 0x1));
     }
 }
 
@@ -968,14 +1013,17 @@
     vp8_writer *const bc = & cpi->bc;
     VP8_COMMON *const c = & cpi->common;
     const int mis = c->mode_info_stride;
-    /* const */
-    MODE_INFO *m = c->mi;
-    int mb_row = -1;
+    MODE_INFO *m;
+    int i;
+    int row, col;
+    int mb_row, mb_col;
 #if CONFIG_NEWENTROPY
     int prob_skip_false[3] = {0, 0, 0};
 #else
     int prob_skip_false = 0;
 #endif
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
 
     MACROBLOCKD *xd = &cpi->mb.e_mbd;
 
@@ -1030,87 +1078,115 @@
     }
 #endif
 
-    while (++mb_row < c->mb_rows)
+    mb_row = 0;
+    for (row=0; row < c->mb_rows; row += 2)
     {
-        int mb_col = -1;
+        m = c->mi + row * mis;
 
-        while (++mb_col < c->mb_cols)
+        mb_col = 0;
+        for (col=0; col < c->mb_cols; col += 2)
         {
-            const int ym = m->mbmi.mode;
-            int segment_id = m->mbmi.segment_id;
-
-            xd->mode_info_context = m;
-
-            if (cpi->mb.e_mbd.update_mb_segmentation_map)
+            // Process the 4 MBs in the order:
+            // top-left, top-right, bottom-left, bottom-right
+            for (i=0; i<4; i++)
             {
-                write_mb_segid(bc, &m->mbmi, &cpi->mb.e_mbd);
-            }
+                int ym;
+                int segment_id;
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * mis + dx;
 
-            if ( c->mb_no_coeff_skip &&
-                 ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
-                   (get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0) ) )
-            {
-#if CONFIG_NEWENTROPY
-                vp8_encode_bool(bc, m->mbmi.mb_skip_coeff,
-                                get_pred_prob(c, xd, PRED_MBSKIP));
-#else
-                vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
-#endif
-            }
-#if CONFIG_QIMODE
-            kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
-#else
-            kfwrite_ymode(bc, ym, c->kf_ymode_prob);
-#endif
-            if (ym == B_PRED)
-            {
-                const int mis = c->mode_info_stride;
-                int i = 0;
-#if CONFIG_COMP_INTRA_PRED
-                int uses_second = m->bmi[0].as_mode.second != (B_PREDICTION_MODE) (B_DC_PRED - 1);
-                vp8_write(bc, uses_second, 128);
-#endif
-                do
+                if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols))
                 {
-                    const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
-                    const B_PREDICTION_MODE L = left_block_mode(m, i);
-                    const int bm = m->bmi[i].as_mode.first;
-#if CONFIG_COMP_INTRA_PRED
-                    const int bm2 = m->bmi[i].as_mode.second;
-#endif
+                    // MB lies outside frame, move on
+                    mb_row += dy;
+                    mb_col += dx;
+                    m += offset_extended;
+                    continue;
+                }
 
-#ifdef ENTROPY_STATS
-                    ++intra_mode_stats [A] [L] [bm];
-#endif
+                ym = m->mbmi.mode;
+                segment_id = m->mbmi.segment_id;
 
-                    write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
-#if CONFIG_COMP_INTRA_PRED
-                    if (uses_second)
+                if (cpi->mb.e_mbd.update_mb_segmentation_map)
+                {
+                    write_mb_segid(bc, &m->mbmi, &cpi->mb.e_mbd);
+                }
+
+                if ( c->mb_no_coeff_skip &&
+                     ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
+                       (get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0) ) )
+                {
+    #if CONFIG_NEWENTROPY
+                    vp8_encode_bool(bc, m->mbmi.mb_skip_coeff,
+                                    get_pred_prob(c, xd, PRED_MBSKIP));
+    #else
+                    vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
+    #endif
+                }
+    #if CONFIG_QIMODE
+                kfwrite_ymode(bc, ym,
+                              c->kf_ymode_prob[c->kf_ymode_probs_index]);
+    #else
+                kfwrite_ymode(bc, ym, c->kf_ymode_prob);
+    #endif
+                if (ym == B_PRED)
+                {
+                    const int mis = c->mode_info_stride;
+                    int i = 0;
+    #if CONFIG_COMP_INTRA_PRED
+                    int uses_second =
+                            m->bmi[0].as_mode.second !=
+                                    (B_PREDICTION_MODE) (B_DC_PRED - 1);
+                    vp8_write(bc, uses_second, 128);
+    #endif
+                    do
                     {
-                        write_bmode(bc, bm2, c->kf_bmode_prob [A] [L]);
+                        const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
+                        const B_PREDICTION_MODE L = left_block_mode(m, i);
+                        const int bm = m->bmi[i].as_mode.first;
+    #if CONFIG_COMP_INTRA_PRED
+                        const int bm2 = m->bmi[i].as_mode.second;
+    #endif
+
+    #ifdef ENTROPY_STATS
+                        ++intra_mode_stats [A] [L] [bm];
+    #endif
+
+                        write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
+    #if CONFIG_COMP_INTRA_PRED
+                        if (uses_second)
+                        {
+                            write_bmode(bc, bm2, c->kf_bmode_prob [A] [L]);
+                        }
+    #endif
                     }
-#endif
+                    while (++i < 16);
                 }
-                while (++i < 16);
-            }
-            if(ym == I8X8_PRED)
-            {
-                write_i8x8_mode(bc, m->bmi[0].as_mode.first, c->i8x8_mode_prob);
-                write_i8x8_mode(bc, m->bmi[2].as_mode.first, c->i8x8_mode_prob);
-                write_i8x8_mode(bc, m->bmi[8].as_mode.first, c->i8x8_mode_prob);
-                write_i8x8_mode(bc, m->bmi[10].as_mode.first, c->i8x8_mode_prob);
-            }
-            else
+                if(ym == I8X8_PRED)
+                {
+                    write_i8x8_mode(bc, m->bmi[0].as_mode.first,
+                                    c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[2].as_mode.first,
+                                    c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[8].as_mode.first,
+                                    c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[10].as_mode.first,
+                                    c->i8x8_mode_prob);
+                }
+                else
 #if CONFIG_UVINTRA
-                write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+                    write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
 #else
-                write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob);
+                    write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob);
 #endif
-
-            m++;
+                // Next MB
+                mb_row += dy;
+                mb_col += dx;
+                m += offset_extended;
+            }
         }
-        //printf("\n");
-        m++;    // skip L prediction border
+        mb_row += 2;
     }
 }
 
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -61,14 +61,31 @@
     } bmi[16];
 } PARTITION_INFO;
 
+// Structure to hold snapshot of coding context during the mode picking process
+// TODO Do we need all of these?
 typedef struct
 {
-    DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+    MODE_INFO mic;
+    PARTITION_INFO partition_info;
+    int_mv best_ref_mv;
+    int rate;
+    int distortion;
+    int intra_error;
+    int best_mode_index;
+    int rddiv;
+    int rdmult;
+
+} PICK_MODE_CONTEXT;
+
+typedef struct
+{
+    DECLARE_ALIGNED(16, short, src_diff[400]);  // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
     DECLARE_ALIGNED(16, short, coeff[400]);     // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
-    DECLARE_ALIGNED(16, unsigned char, thismb[256]);
+    DECLARE_ALIGNED(16, unsigned char, thismb[256]);    // 16x16 Y
 
     unsigned char *thismb_ptr;
-    // 16 Y blocks, 4 U blocks, 4 V blocks, 1 DC 2nd order block each with 16 entries
+    // 16 Y blocks, 4 U blocks, 4 V blocks,
+    // 1 DC 2nd order block each with 16 entries
     BLOCK block[25];
 
     YV12_BUFFER_CONFIG src;
@@ -113,7 +130,6 @@
     int mv_row_min;
     int mv_row_max;
 
-    int vector_range;    // Used to monitor limiting range of recent vectors to guide search.
     int skip;
 
     int encode_breakout;
@@ -134,6 +150,16 @@
 
     int optimize;
     int q_index;
+
+    int encode_as_sb;
+
+    // Structure to hold context for each of the 4 MBs within a SB:
+    // when encoded as 4 independent MBs:
+    PICK_MODE_CONTEXT mb_context[4];
+#if CONFIG_SUPERBLOCKS
+    // when 4 MBs share coding parameters:
+    PICK_MODE_CONTEXT sb_context[4];
+#endif
 
     void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
     void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -56,10 +56,16 @@
                                       MB_ROW_COMP *mbr_ei,
                                       int mb_row,
                                       int count);
+extern int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
+                                            int recon_yoffset,
+                                            int recon_uvoffset);
 void vp8_build_block_offsets(MACROBLOCK *x);
 void vp8_setup_block_ptrs(MACROBLOCK *x);
-int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
-int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
+void vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
+                                   int recon_yoffset, int recon_uvoffset,
+                                   int output_enabled);
+void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x,
+                                    TOKENEXTRA **t, int output_enabled);
 static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );
 
 
@@ -315,7 +321,9 @@
             recon_yoffset += 16;
 #endif
             //Copy current mb to a buffer
-            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
+            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                      x->src.y_stride,
+                                                      x->thismb, 16);
 
             // measure activity
             mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
@@ -380,81 +388,407 @@
     adjust_act_zbin(cpi, x);
 }
 
-static
-void encode_mb_row(VP8_COMP *cpi,
-                   VP8_COMMON *cm,
-                   int mb_row,
-                   MACROBLOCK  *x,
-                   MACROBLOCKD *xd,
-                   TOKENEXTRA **tp,
-                   int *totalrate)
+static void update_state (VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx)
 {
+    int i;
+    MACROBLOCKD *xd = &x->e_mbd;
+    MODE_INFO *mi = &ctx->mic;
+    int mb_mode = mi->mbmi.mode;
+    int mb_mode_index = ctx->best_mode_index;
+
+#if CONFIG_DEBUG
+    assert (mb_mode < MB_MODE_COUNT);
+    assert (mb_mode_index < MAX_MODES);
+    assert (mi->mbmi.ref_frame < MAX_REF_FRAMES);
+#endif
+
+    // Restore the coding context of the MB to that that was in place
+    // when the mode was picked for it
+    vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO));
+
+    if (mb_mode == B_PRED)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            xd->block[i].bmi.as_mode = xd->mode_info_context->bmi[i].as_mode;
+            assert (xd->block[i].bmi.as_mode.first < MB_MODE_COUNT);
+        }
+    }
+    else if (mb_mode == I8X8_PRED)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            xd->block[i].bmi = xd->mode_info_context->bmi[i];
+        }
+    }
+    else if (mb_mode == SPLITMV)
+    {
+        vpx_memcpy(x->partition_info, &ctx->partition_info,
+                   sizeof(PARTITION_INFO));
+
+        xd->mode_info_context->mbmi.mv.as_int =
+                                      x->partition_info->bmi[15].mv.as_int;
+    }
+
+    if (cpi->common.frame_type == KEY_FRAME)
+    {
+        // Restore the coding modes to that held in the coding context
+        //if (mb_mode == B_PRED)
+        //    for (i = 0; i < 16; i++)
+        //    {
+        //        xd->block[i].bmi.as_mode =
+        //                          xd->mode_info_context->bmi[i].as_mode;
+        //        assert(xd->mode_info_context->bmi[i].as_mode < MB_MODE_COUNT);
+        //    }
+    }
+    else
+    {
+/*
+        // Reduce the activation RD thresholds for the best choice mode
+        if ((cpi->rd_baseline_thresh[mb_mode_index] > 0) &&
+            (cpi->rd_baseline_thresh[mb_mode_index] < (INT_MAX >> 2)))
+        {
+            int best_adjustment = (cpi->rd_thresh_mult[mb_mode_index] >> 2);
+
+            cpi->rd_thresh_mult[mb_mode_index] =
+                    (cpi->rd_thresh_mult[mb_mode_index]
+                     >= (MIN_THRESHMULT + best_adjustment)) ?
+                            cpi->rd_thresh_mult[mb_mode_index] - best_adjustment :
+                            MIN_THRESHMULT;
+            cpi->rd_threshes[mb_mode_index] =
+                    (cpi->rd_baseline_thresh[mb_mode_index] >> 7)
+                    * cpi->rd_thresh_mult[mb_mode_index];
+
+        }
+*/
+        // Note how often each mode chosen as best
+        cpi->mode_chosen_counts[mb_mode_index]++;
+
+        rd_update_mvcount(cpi, x, &ctx->best_ref_mv);
+
+        cpi->prediction_error += ctx->distortion;
+        cpi->intra_error += ctx->intra_error;
+    }
+}
+
+static void pick_mb_modes (VP8_COMP *cpi,
+                           VP8_COMMON *cm,
+                           int mb_row,
+                           int mb_col,
+                           MACROBLOCK  *x,
+                           MACROBLOCKD *xd,
+                           TOKENEXTRA **tp,
+                           int *totalrate)
+{
+    int i;
+    int map_index;
     int recon_yoffset, recon_uvoffset;
-    int mb_col;
     int ref_fb_idx = cm->lst_fb_idx;
     int dst_fb_idx = cm->new_fb_idx;
     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
-    int map_index = (mb_row * cpi->common.mb_cols);
+    ENTROPY_CONTEXT_PLANES left_context[2];
+    ENTROPY_CONTEXT_PLANES above_context[2];
+    ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
+                                                        + mb_col;
 
-    // Reset the left context
-    vp8_zero(cm->left_context)
+    // Offsets to move pointers from MB to MB within a SB in raster order
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
 
-    // reset above block coeffs
-    xd->above_context = cm->above_context;
+    /* Function should not modify L & A contexts; save and restore on exit */
+    vpx_memcpy (left_context,
+                cpi->left_context,
+                sizeof(left_context));
+    vpx_memcpy (above_context,
+                initial_above_context_ptr,
+                sizeof(above_context));
 
-    xd->up_available = (mb_row != 0);
-    recon_yoffset = (mb_row * recon_y_stride * 16);
-    recon_uvoffset = (mb_row * recon_uv_stride * 8);
+    /* Encode MBs in raster order within the SB */
+    for ( i=0; i<4; i++ )
+    {
+        int dy = row_delta[i];
+        int dx = col_delta[i];
+        int offset_unextended = dy * cm->mb_cols + dx;
+        int offset_extended   = dy * xd->mode_info_stride + dx;
 
-    cpi->tplist[mb_row].start = *tp;
-    //printf("Main mb_row = %d\n", mb_row);
+        // TODO Many of the index items here can be computed more efficiently!
 
-    // Distance of Mb to the top & bottom edges, specified in 1/8th pel
-    // units as they are always compared to values that are in 1/8th pel units
-    xd->mb_to_top_edge = -((mb_row * 16) << 3);
-    xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+        if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+        {
+            // MB lies outside frame, move on
+            mb_row += dy;
+            mb_col += dx;
 
-    // Set up limit values for vertical motion vector components
-    // to prevent them extending beyond the UMV borders
-    x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
-                        + (VP8BORDERINPIXELS - 16);
+            // Update pointers
+            x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+            x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+            x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
 
-    // Set the mb activity pointer to the start of the row.
-    x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+            x->gf_active_ptr += offset_unextended;
+            x->partition_info += offset_extended;
+            xd->mode_info_context += offset_extended;
+            xd->prev_mode_info_context += offset_extended;
+#if CONFIG_DEBUG
+            assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+                   (xd->mode_info_context - cpi->common.mip));
+#endif
+            continue;
+        }
 
-    // for each macroblock col in image
-    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+        // Index of the MB in the SB 0..3
+        xd->mb_index = i;
+
+        map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+        x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+        // set above context pointer
+        xd->above_context = cm->above_context + mb_col;
+
+        // Restore the appropriate left context depending on which
+        // row in the SB the MB is situated
+        vpx_memcpy (&cm->left_context,
+                    &cpi->left_context[i>>1],
+                    sizeof(ENTROPY_CONTEXT_PLANES));
+
+        // Set up distance of MB to edge of frame in 1/8th pel units
+        xd->mb_to_top_edge    = -((mb_row * 16) << 3);
+        xd->mb_to_left_edge   = -((mb_col * 16) << 3);
+        xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+        xd->mb_to_right_edge  = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+        // Set up limit values for MV components to prevent them from
+        // extending beyond the UMV borders assuming 16x16 block size
+        x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+                            + (VP8BORDERINPIXELS - 16);
+        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+                            + (VP8BORDERINPIXELS - 16);
+
+        xd->up_available   = (mb_row != 0);
+        xd->left_available = (mb_col != 0);
+
+        recon_yoffset  = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+        recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col *  8);
+
+        xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+        xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+        // Copy current MB to a work buffer
+        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                  x->src.y_stride,
+                                                  x->thismb, 16);
+
+        x->rddiv = cpi->RDDIV;
+        x->rdmult = cpi->RDMULT;
+
+        if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+            vp8_activity_masking(cpi, x);
+
+        // Is segmentation enabled
+        if (xd->segmentation_enabled)
+        {
+            // Code to set segment id in xd->mbmi.segment_id
+            if (cpi->segmentation_map[map_index] <= 3)
+                xd->mode_info_context->mbmi.segment_id =
+                              cpi->segmentation_map[map_index];
+            else
+                xd->mode_info_context->mbmi.segment_id = 0;
+
+            vp8cx_mb_init_quantizer(cpi, x);
+        }
+        else
+            // Set to Segment 0 by default
+            xd->mode_info_context->mbmi.segment_id = 0;
+
+        x->active_ptr = cpi->active_map + map_index;
+
+        /* force 4x4 transform for mode selection */
+        xd->mode_info_context->mbmi.txfm_size = TX_4X4; // TODO IS this right??
+
+        cpi->update_context = 0;    // TODO Do we need this now??
+
+        // Find best coding mode & reconstruct the MB so it is available
+        // as a predictor for MBs that follow in the SB
+        if (cm->frame_type == KEY_FRAME)
+        {
+            *totalrate += vp8_rd_pick_intra_mode(cpi, x);
+
+            // Save the coding context
+            vpx_memcpy (&x->mb_context[i].mic, xd->mode_info_context,
+                        sizeof(MODE_INFO));
+
+            // Dummy encode, do not do the tokenization
+            vp8cx_encode_intra_macro_block(cpi, x, tp, 0);
+            //Note the encoder may have changed the segment_id
+        }
+        else
+        {
+            *totalrate += vp8cx_pick_mode_inter_macroblock(cpi, x,
+                                                           recon_yoffset,
+                                                           recon_uvoffset);
+
+            // Dummy encode, do not do the tokenization
+            vp8cx_encode_inter_macroblock(cpi, x, tp,
+                                         recon_yoffset, recon_uvoffset, 0);
+        }
+
+        // Keep a copy of the updated left context
+        vpx_memcpy (&cpi->left_context[i>>1],
+                    &cm->left_context,
+                    sizeof(ENTROPY_CONTEXT_PLANES));
+
+        // Next MB
+        mb_row += dy;
+        mb_col += dx;
+
+        x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+        x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+        x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
+
+        x->gf_active_ptr += offset_unextended;
+        x->partition_info += offset_extended;
+        xd->mode_info_context += offset_extended;
+        xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+        assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+               (xd->mode_info_context - cpi->common.mip));
+#endif
+    }
+
+    /* Restore L & A coding context to those in place on entry */
+    vpx_memcpy (cpi->left_context,
+                left_context,
+                sizeof(left_context));
+    vpx_memcpy (initial_above_context_ptr,
+                above_context,
+                sizeof(above_context));
+}
+
+static void encode_sb ( VP8_COMP *cpi,
+                        VP8_COMMON *cm,
+                        int mbrow,
+                        int mbcol,
+                        MACROBLOCK  *x,
+                        MACROBLOCKD *xd,
+                        TOKENEXTRA **tp )
+{
+    int i, j;
+    int map_index;
+    int mb_row, mb_col;
+    int recon_yoffset, recon_uvoffset;
+    int ref_fb_idx = cm->lst_fb_idx;
+    int dst_fb_idx = cm->new_fb_idx;
+    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+
+    mb_row = mbrow;
+    mb_col = mbcol;
+
+    /* Encode MBs in raster order within the SB */
+    for ( i=0; i<4; i++ )
     {
+        int dy = row_delta[i];
+        int dx = col_delta[i];
+        int offset_extended   = dy * xd->mode_info_stride + dx;
+        int offset_unextended = dy * cm->mb_cols + dx;
+
+        if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+        {
+            // MB lies outside frame, move on
+            mb_row += dy;
+            mb_col += dx;
+
+            x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+            x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+            x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
+
+            x->gf_active_ptr      += offset_unextended;
+            x->partition_info     += offset_extended;
+            xd->mode_info_context += offset_extended;
+            xd->prev_mode_info_context += offset_extended;
+
+#if CONFIG_DEBUG
+            assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+                   (xd->mode_info_context - cpi->common.mip));
+#endif
+            continue;
+        }
+
+        xd->mb_index = i;
+
 #ifdef ENC_DEBUG
-        enc_debug = (cpi->common.current_video_frame ==1 && mb_row==4 && mb_col==0);
+        enc_debug = (cpi->common.current_video_frame == 0 &&
+                     mb_row==0 && mb_col==0);
         mb_col_debug=mb_col;
         mb_row_debug=mb_row;
 #endif
-        // Distance of Mb to the left & right edges, specified in
-        // 1/8th pel units as they are always compared to values
-        // that are in 1/8th pel units
-        xd->mb_to_left_edge = -((mb_col * 16) << 3);
-        xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 
-        // Set up limit values for horizontal motion vector components
-        // to prevent them extending beyond the UMV borders
+        // Restore MB state to that when it was picked
+#if CONFIG_SUPERBLOCKS
+        if (x->encode_as_sb)
+            update_state (cpi, x, &x->sb_context[i]);
+        else
+#endif
+            update_state (cpi, x, &x->mb_context[i]);
+
+        // Copy in the appropriate left context
+        vpx_memcpy (&cm->left_context,
+                    &cpi->left_context[i>>1],
+                    sizeof(ENTROPY_CONTEXT_PLANES));
+
+        map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+        x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+        // reset above block coeffs
+        xd->above_context = cm->above_context + mb_col;
+
+        // Set up distance of MB to edge of the frame in 1/8th pel units
+        xd->mb_to_top_edge    = -((mb_row * 16) << 3);
+        xd->mb_to_left_edge   = -((mb_col * 16) << 3);
+        xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+        xd->mb_to_right_edge  = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+        // Set up limit values for MV components to prevent them from
+        // extending beyond the UMV borders assuming 16x16 block size
+        x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+                            + (VP8BORDERINPIXELS - 16);
         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
                             + (VP8BORDERINPIXELS - 16);
 
+#if CONFIG_SUPERBLOCKS
+        // Set up limit values for MV components to prevent them from
+        // extending beyond the UMV borders assuming 32x32 block size
+        x->mv_row_min_sb = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_row_max_sb = ((cm->mb_rows - 1 - mb_row) * 16)
+                            + (VP8BORDERINPIXELS - 32);
+        x->mv_col_min_sb = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+        x->mv_col_max_sb = ((cm->mb_cols - 1 - mb_col) * 16)
+                            + (VP8BORDERINPIXELS - 32);
+#endif
+
+        xd->up_available = (mb_row != 0);
+        xd->left_available = (mb_col != 0);
+
+        recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+        recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
-        xd->left_available = (mb_col != 0);
 
-        x->rddiv = cpi->RDDIV;
-        x->rdmult = cpi->RDMULT;
+        // Copy current MB to a work buffer
+        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                  x->src.y_stride,
+                                                  x->thismb, 16);
 
-        //Copy current mb to a buffer
-        RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
-
         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
             vp8_activity_masking(cpi, x);
 
@@ -462,8 +796,9 @@
         if (xd->segmentation_enabled)
         {
             // Code to set segment id in xd->mbmi.segment_id
-            if (cpi->segmentation_map[map_index+mb_col] <= 3)
-                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col];
+            if (cpi->segmentation_map[map_index] <= 3)
+                xd->mode_info_context->mbmi.segment_id =
+                              cpi->segmentation_map[map_index];
             else
                 xd->mode_info_context->mbmi.segment_id = 0;
 
@@ -473,14 +808,13 @@
             // Set to Segment 0 by default
             xd->mode_info_context->mbmi.segment_id = 0;
 
-        x->active_ptr = cpi->active_map + map_index + mb_col;
+        x->active_ptr = cpi->active_map + map_index;
 
-        /* force 4x4 transform for mode selection */
-        xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+        cpi->update_context = 0;
 
         if (cm->frame_type == KEY_FRAME)
         {
-            *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
+            vp8cx_encode_intra_macro_block(cpi, x, tp, 1);
             //Note the encoder may have changed the segment_id
 
 #ifdef MODE_STATS
@@ -489,7 +823,8 @@
         }
         else
         {
-            *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
+            vp8cx_encode_inter_macroblock(cpi, x, tp,
+                                         recon_yoffset, recon_uvoffset, 1);
             //Note the encoder may have changed the segment_id
 
 #ifdef MODE_STATS
@@ -502,63 +837,148 @@
                 for (b = 0; b < x->partition_info->count; b++)
                 {
                     inter_b_modes[x->partition_info->bmi[b].mode] ++;
-                }
+               }
             }
 
 #endif
 
             // Count of last ref frame 0,0 usage
-            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
+            if ((xd->mode_info_context->mbmi.mode == ZEROMV) &&
+                (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                 cpi->inter_zz_count ++;
         }
 
+        // TODO Partitioning is broken!
         cpi->tplist[mb_row].stop = *tp;
 
-        // Increment pointer into gf usage flags structure.
-        x->gf_active_ptr++;
+        // Copy back updated left context
+        vpx_memcpy (&cpi->left_context[i>>1],
+                    &cm->left_context,
+                    sizeof(ENTROPY_CONTEXT_PLANES));
 
-        // Increment the activity mask pointers.
-        x->mb_activity_ptr++;
+        // Next MB
+        mb_row += dy;
+        mb_col += dx;
 
-        // adjust to the next column of macroblocks
-        x->src.y_buffer += 16;
-        x->src.u_buffer += 8;
-        x->src.v_buffer += 8;
+        x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+        x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+        x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
 
-        recon_yoffset += 16;
-        recon_uvoffset += 8;
+        x->gf_active_ptr      += offset_unextended;
+        x->partition_info     += offset_extended;
+        xd->mode_info_context += offset_extended;
+        xd->prev_mode_info_context += offset_extended;
 
-        // skip to next mb
-        xd->mode_info_context++;
-
-        xd->prev_mode_info_context++;
-        assert((xd->prev_mode_info_context - cpi->common.prev_mip)
-            ==(xd->mode_info_context - cpi->common.mip));
-        x->partition_info++;
-
-        xd->above_context++;
+#if CONFIG_DEBUG
+        assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+               (xd->mode_info_context - cpi->common.mip));
+#endif
     }
 
-    //extend the recon for intra prediction
-    vp8_extend_mb_row(
-        &cm->yv12_fb[dst_fb_idx],
-        xd->dst.y_buffer + 16,
-        xd->dst.u_buffer + 8,
-        xd->dst.v_buffer + 8);
+    // debug output
+#if DBG_PRNT_SEGMAP
+        {
+            FILE *statsfile;
+            statsfile = fopen("segmap2.stt", "a");
+            fprintf(statsfile, "\n" );
+            fclose(statsfile);
+        }
+    #endif
+}
 
-    // this is to account for the border
-    xd->prev_mode_info_context++;
-    xd->mode_info_context++;
-    x->partition_info++;
+static
+void encode_sb_row ( VP8_COMP *cpi,
+                     VP8_COMMON *cm,
+                     int mb_row,
+                     MACROBLOCK  *x,
+                     MACROBLOCKD *xd,
+                     TOKENEXTRA **tp,
+                     int *totalrate )
+{
+    int mb_col;
+    int mb_cols = cm->mb_cols;
 
-// debug output
-#if DBG_PRNT_SEGMAP
+    // Initialize the left context for the new SB row
+    vpx_memset (cpi->left_context, 0, sizeof(cpi->left_context));
+    vpx_memset (&cm->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    // Code each SB in the row
+    for (mb_col=0; mb_col<mb_cols; mb_col+=2)
     {
-        FILE *statsfile;
-        statsfile = fopen("segmap2.stt", "a");
-        fprintf(statsfile, "\n" );
-        fclose(statsfile);
+        int mb_rate = 0;
+#if CONFIG_SUPERBLOCKS
+        int sb_rate = INT_MAX;
+#endif
+
+#if CONFIG_DEBUG
+        MODE_INFO *mic = xd->mode_info_context;
+        PARTITION_INFO *pi = x->partition_info;
+        signed char  *gfa = x->gf_active_ptr;
+        unsigned char *yb = x->src.y_buffer;
+        unsigned char *ub = x->src.u_buffer;
+        unsigned char *vb = x->src.v_buffer;
+#endif
+
+        // Pick modes assuming the SB is coded as 4 independent MBs
+        pick_mb_modes (cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate);
+
+        x->src.y_buffer -= 32;
+        x->src.u_buffer -= 16;
+        x->src.v_buffer -= 16;
+
+        x->gf_active_ptr -= 2;
+        x->partition_info -= 2;
+        xd->mode_info_context -= 2;
+        xd->prev_mode_info_context -= 2;
+
+#if CONFIG_DEBUG
+        assert (x->gf_active_ptr == gfa);
+        assert (x->partition_info == pi);
+        assert (xd->mode_info_context == mic);
+        assert (x->src.y_buffer == yb);
+        assert (x->src.u_buffer == ub);
+        assert (x->src.v_buffer == vb);
+#endif
+
+#if CONFIG_SUPERBLOCKS
+        // Pick a mode assuming that it applies all 4 of the MBs in the SB
+        pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, &sb_rate);
+
+        // Decide whether to encode as a SB or 4xMBs
+        if(sb_rate < mb_rate)
+        {
+            x->encode_as_sb = 1;
+            *totalrate += sb_rate;
+        }
+        else
+#endif
+        {
+            x->encode_as_sb = 0;
+            *totalrate += mb_rate;
+        }
+
+        // Encode SB using best computed mode(s)
+        encode_sb (cpi, cm, mb_row, mb_col, x, xd, tp);
+
+#if CONFIG_DEBUG
+        assert (x->gf_active_ptr == gfa+2);
+        assert (x->partition_info == pi+2);
+        assert (xd->mode_info_context == mic+2);
+        assert (x->src.y_buffer == yb+32);
+        assert (x->src.u_buffer == ub+16);
+        assert (x->src.v_buffer == vb+16);
+#endif
     }
+
+    // this is to account for the border
+    x->gf_active_ptr += mb_cols - (mb_cols & 0x1);
+    x->partition_info += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+    xd->mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+    xd->prev_mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1);
+
+#if CONFIG_DEBUG
+    assert((xd->prev_mode_info_context - cpi->common.prev_mip) ==
+           (xd->mode_info_context - cpi->common.mip));
 #endif
 }
 
@@ -574,8 +994,6 @@
     // Activity map pointer
     x->mb_activity_ptr = cpi->mb_activity_map;
 
-    x->vector_range = 32;
-
     x->act_zbin_adj = 0;
 
     x->partition_info = x->pi;
@@ -593,7 +1011,7 @@
     if (cm->frame_type == KEY_FRAME)
         vp8_init_mbmode_probs(cm);
 
-    // Copy data over into macro block data sturctures.
+    // Copy data over into macro block data structures.
     x->src = * cpi->Source;
     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
     xd->dst = cm->yv12_fb[cm->new_fb_idx];
@@ -640,8 +1058,8 @@
     int totalrate;
 
     // Compute a modified set of reference frame probabilities to use when
-    // prediction fails. These are based on the current genreal estimates for
-    // this frame which may be updated with each itteration of the recode loop.
+    // prediction fails. These are based on the current general estimates for
+    // this frame which may be updated with each iteration of the recode loop.
     compute_mod_refprobs( cm );
 
 // debug output
@@ -740,7 +1158,6 @@
 #endif
 
     xd->mode_info_context = cm->mi;
-
     xd->prev_mode_info_context = cm->prev_mi;
 
     vp8_zero(cpi->MVcount);
@@ -775,19 +1192,20 @@
         vpx_usec_timer_start(&emr_timer);
 
         {
-            // for each macroblock row in the image
-            for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+            // For each row of SBs in the frame
+            for (mb_row = 0; mb_row < cm->mb_rows; mb_row+=2)
             {
-                encode_mb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
+                int offset = (cm->mb_cols+1) & ~0x1;
 
-                // adjust to the next row of MBs
-                x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
-                x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
-                x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
+                encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
+
+                // adjust to the next row of SBs
+                x->src.y_buffer += 32 * x->src.y_stride - 16 * offset;
+                x->src.u_buffer += 16 * x->src.uv_stride - 8 * offset;
+                x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset;
             }
 
             cpi->tok_count = tp - cpi->tok;
-
         }
 
         vpx_usec_timer_mark(&emr_timer);
@@ -795,8 +1213,9 @@
 
     }
 
-    // 256 rate units to the bit
-    cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
+    // 256 rate units to the bit,
+    // projected_frame_size in units of BYTES
+    cpi->projected_frame_size = totalrate >> 8;
 
     // Make a note of the percentage MBs coded Intra.
     if (cm->frame_type == KEY_FRAME)
@@ -813,7 +1232,8 @@
                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 
         if (tot_modes)
-            cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
+            cpi->this_frame_percent_intra =
+                   cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 
     }
 
@@ -1114,18 +1534,12 @@
 #endif
 }
 
-int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
+void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
+                                   MACROBLOCK *x,
+                                   TOKENEXTRA **t,
+                                   int output_enabled)
 {
-    int rate, i;
-    int mb_skip_context;
-
-    // Non rd path deprecated in test code base
-    //if (cpi->sf.RD && cpi->compressor_speed != 2)
-    vp8_rd_pick_intra_mode(cpi, x, &rate);
-    //else
-    //   vp8_pick_intra_mode(cpi, x, &rate);
-
-    if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+    if((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled)
     {
         adjust_act_zbin( cpi, x );
         vp8_update_zbin_extra(cpi, x);
@@ -1157,9 +1571,13 @@
 
     if(x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED)
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
-    sum_intra_stats(cpi, x);
-    vp8_tokenize_mb(cpi, &x->e_mbd, t);
-    return rate;
+
+    if (output_enabled)
+    {
+        // Tokenize
+        sum_intra_stats(cpi, x);
+        vp8_tokenize_mb(cpi, &x->e_mbd, t);
+    }
 }
 #ifdef SPEEDSTATS
 extern int cnt_pm;
@@ -1167,10 +1585,11 @@
 
 extern void vp8_fix_contexts(MACROBLOCKD *x);
 
-int vp8cx_encode_inter_macroblock
+void vp8cx_encode_inter_macroblock
 (
     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
-    int recon_yoffset, int recon_uvoffset
+    int recon_yoffset, int recon_uvoffset,
+    int output_enabled
 )
 {
     VP8_COMMON *cm = &cpi->common;
@@ -1184,64 +1603,6 @@
 
     x->skip = 0;
 
-    if (xd->segmentation_enabled)
-        x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
-    else
-        x->encode_breakout = cpi->oxcf.encode_breakout;
-
-    //if (cpi->sf.RD)
-    // For now this codebase is limited to a single rd encode path
-    {
-        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
-        int single, compound, hybrid;
-
-        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                               &distortion, &intra_error, &single, &compound, &hybrid);
-
-        cpi->rd_single_diff += single;
-        cpi->rd_comp_diff   += compound;
-        cpi->rd_hybrid_diff += hybrid;
-        if (x->e_mbd.mode_info_context->mbmi.ref_frame &&
-            x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-        {
-            unsigned char pred_context;
-
-            pred_context = get_pred_context( cm, xd, PRED_COMP );
-
-            if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME)
-                cpi->single_pred_count[pred_context]++;
-            else
-                cpi->comp_pred_count[pred_context]++;
-        }
-
-
-        /* test code: set transform size based on mode selection */
-        if( cpi->common.txfm_mode == ALLOW_8X8
-            && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
-            && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
-            && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-        {
-            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
-            cpi->t8x8_count ++;
-        }
-        else
-        {
-            x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
-            cpi->t4x4_count++;
-        }
-
-        /* restore cpi->zbin_mode_boost_enabled */
-        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
-
-    }
-    //else
-    // The non rd encode path has been deleted from this code base
-    // to simplify development
-    //    vp8_pick_inter_mode
-
-    cpi->prediction_error += distortion;
-    cpi->intra_error += intra_error;
-
     if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
     {
         // Adjust the zbin based on this MB rate.
@@ -1250,7 +1611,7 @@
 
     {
         // Experimental code. Special case for gf and arf zeromv modes.
-        // Increase zbin size to supress noise
+        // Increase zbin size to suppress noise
         cpi->zbin_mode_boost = 0;
         if (cpi->zbin_mode_boost_enabled)
         {
@@ -1282,6 +1643,21 @@
                            get_pred_ref( cm, xd )) );
     set_pred_flag( xd, PRED_REF, ref_pred_flag );
 
+    /* test code: set transform size based on mode selection */
+    if( cpi->common.txfm_mode == ALLOW_8X8
+        && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+        cpi->t8x8_count ++;
+    }
+    else
+    {
+        x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+        cpi->t4x4_count++;
+    }
+
     // If we have just a single reference frame coded for a segment then
     // exclude from the reference frame counts used to work out
     // probabilities. NOTE: At the moment we dont support custom trees
@@ -1323,7 +1699,9 @@
             vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
         }
-        sum_intra_stats(cpi, x);
+
+        if (output_enabled)
+            sum_intra_stats(cpi, x);
     }
     else
     {
@@ -1394,7 +1772,8 @@
             fflush(stdout);
         }
 #endif
-        vp8_tokenize_mb(cpi, xd, t);
+        if (output_enabled)
+            vp8_tokenize_mb(cpi, xd, t);
 #ifdef ENC_DEBUG
         if (enc_debug) {
           printf("Tokenized\n");
@@ -1432,5 +1811,4 @@
 #endif
         }
     }
-    return rate;
 }
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -101,7 +101,10 @@
     int i;
 
     MACROBLOCKD *x = &mb->e_mbd;
+#if 0
+    // Intra modes requiring top-right MB reconstructed data have been disabled
     vp8_intra_prediction_down_copy(x);
+#endif
 
     for (i = 0; i < 16; i++)
         vp8_encode_intra4x4block(rtcd, mb, i);
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -911,7 +911,7 @@
 // PGW TODO..
 // This code removes direct dependency on QIndex to determin the range
 // (now uses the actual quantizer) but has not been tuned.
-static double adjust_maxq_qrange(VP8_COMP *cpi)
+static void adjust_maxq_qrange(VP8_COMP *cpi)
 {
     int i;
     double q;
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c
@@ -109,7 +109,7 @@
     //VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16)
     best_err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                     (xd->dst.y_buffer, xd->dst.y_stride,
-                     xd->predictor, 16, &best_err);
+                     xd->predictor, 16, best_err);
 
     /* restore UMV window */
     x->mv_col_min = tmp_col_min;
@@ -158,7 +158,7 @@
     err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                     (ref->y_buffer + mb_y_offset,
                      ref->y_stride, xd->dst.y_buffer,
-                     xd->dst.y_stride, &err);
+                     xd->dst.y_stride, INT_MAX);
     dst_mv->as_int = 0;
 
     // Test last reference frame using the previous best mv as the
@@ -224,7 +224,7 @@
     err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                     (ref->y_buffer + mb_y_offset,
                      ref->y_stride, xd->dst.y_buffer,
-                     xd->dst.y_stride, &err);
+                     xd->dst.y_stride, INT_MAX);
 
     dst_mv->as_int = 0;
 
@@ -255,7 +255,7 @@
         err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16)
                         (xd->predictor, 16,
                          buf->y_buffer + mb_y_offset,
-                         buf->y_stride, &err);
+                         buf->y_stride, err);
         // find best
         if (err < best_err)
         {
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1257,7 +1257,6 @@
     int k = -1;
     int all_in;
     int best_site = -1;
-    MACROBLOCKD *xd = &x->e_mbd;
 
     int_mv fcenter_mv;
     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -205,8 +205,7 @@
         if ( minqtarget <= vp8_convert_qindex_to_q(i) )
             return i;
     }
-    if ( i == QINDEX_RANGE )
-        return QINDEX_RANGE-1;
+    return QINDEX_RANGE-1;
 }
 void init_minq_luts()
 {
@@ -408,7 +407,6 @@
     int i;
     int start_index = cpi->worst_quality;
     int target_index = cpi->worst_quality;
-    int retval = 0;
 
     // Convert the average q value to an index.
     for ( i = cpi->best_quality; i < cpi->worst_quality; i++ )
@@ -590,7 +588,7 @@
         // All other frames.
         else
         {
-            // No updeates.. leave things as they are.
+            // No updates.. leave things as they are.
             xd->update_mb_segmentation_map = 0;
             xd->update_mb_segmentation_data = 0;
         }
@@ -665,8 +663,6 @@
         cpi->mode_chosen_counts[i] = 0;
     }
 
-    cpi->mbs_tested_so_far = 0;
-
     // best quality defaults
     sf->RD = 1;
     sf->search_method = NSTEP;
@@ -2500,6 +2496,30 @@
     vp8_set_quantizer(cpi, find_fp_qindex());
     vp8_first_pass(cpi);
 }
+
+#if 1
+void write_yuv_frame_to_file(YV12_BUFFER_CONFIG *frame)
+{
+
+    // write the frame
+    int i;
+    char filename[255];
+    FILE *fp = fopen("encode_recon.yuv", "a");
+
+    for (i = 0; i < frame->y_height; i++)
+        fwrite(frame->y_buffer + i * frame->y_stride,
+            frame->y_width, 1, fp);
+    for (i = 0; i < frame->uv_height; i++)
+        fwrite(frame->u_buffer + i * frame->uv_stride,
+            frame->uv_width, 1, fp);
+    for (i = 0; i < frame->uv_height; i++)
+        fwrite(frame->v_buffer + i * frame->uv_stride,
+            frame->uv_width, 1, fp);
+
+    fclose(fp);
+}
+#endif
+
 //#define WRITE_RECON_BUFFER 1
 #if WRITE_RECON_BUFFER
 void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
@@ -2567,7 +2587,7 @@
     return (double)num_edge_pels/(double)num_pels;
 }
 
-// Function to test for conditions that indeicate we should loop
+// Function to test for conditions that indicate we should loop
 // back and recode a frame.
 static BOOL recode_loop_test( VP8_COMP *cpi,
                               int high_limit, int low_limit,
@@ -3563,9 +3583,12 @@
         loopfilter_frame(cpi, cm);
     }
 
+    if(cm->show_frame)
+        write_yuv_frame_to_file(cm->frame_to_show);
+
     update_reference_frames(cm);
 
-    // Work out the segment probabilites if segmentation is enabled and
+    // Work out the segment probabilities if segmentation is enabled and
     // the map is due to be updated
     if (xd->segmentation_enabled && xd->update_mb_segmentation_map)
     {
@@ -3935,12 +3958,14 @@
             // Low use of gf
             if ((gf_active_pct < 10) || ((gf_active_pct + gf_ref_usage_pct) < 15))
             {
-                // ...but last frame zero zero usage is reasonbable so a new gf might be appropriate
+                // ...but last frame zero zero usage is reasonable
+                // so a new gf might be appropriate
                 if (last_ref_zz_useage >= 25)
                 {
                     cpi->gf_bad_count ++;
 
-                    if (cpi->gf_bad_count >= 8)   // Check that the condition is stable
+                    // Check that the condition is stable
+                    if (cpi->gf_bad_count >= 8)
                     {
                         cpi->gf_update_recommended = 1;
                         cpi->gf_bad_count = 0;
@@ -3947,10 +3972,11 @@
                     }
                 }
                 else
-                    cpi->gf_bad_count = 0;        // Restart count as the background is not stable enough
+                    cpi->gf_bad_count = 0;  // Restart count as the background
+                                            // is not stable enough
             }
             else
-                cpi->gf_bad_count = 0;            // Gf useage has picked up so reset count
+                cpi->gf_bad_count = 0;  // Gf usage has picked up so reset count
         }
     }
     // If the signal is set but has not been read should we cancel it.
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -348,7 +348,6 @@
     unsigned int mode_check_freq[MAX_MODES];
     unsigned int mode_test_hit_counts[MAX_MODES];
     unsigned int mode_chosen_counts[MAX_MODES];
-    unsigned int mbs_tested_so_far;
 
     int rd_thresh_mult[MAX_MODES];
     int rd_baseline_thresh[MAX_MODES];
@@ -642,9 +641,17 @@
     int *lf_ref_frame_sign_bias;
     int *lf_ref_frame;
 
-    int force_next_frame_intra; /* force next frame to intra when kf_auto says so */
+    /* force next frame to intra when kf_auto says so */
+    int force_next_frame_intra;
 
     int droppable;
+
+    // Global store for SB left contexts, one for each MB row in the SB
+    ENTROPY_CONTEXT_PLANES left_context[2];
+
+    // TODO Do we still need this??
+    int update_context;
+
 } VP8_COMP;
 
 void control_data_rate(VP8_COMP *cpi);
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -874,6 +874,12 @@
         int this_rd;
         int ratey;
 
+        // TODO Temporarily ignore modes that need the above-right data. SB
+        // encoding means this data is not available for the bottom right MB
+        // Do we need to do this for mode2 also?
+        if (mode==B_LD_PRED || mode==B_VL_PRED)
+            continue;
+
         rate = bmode_costs[mode];
 
 #if CONFIG_COMP_INTRA_PRED
@@ -936,10 +942,10 @@
 
 static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
                                      int *rate_y, int *Distortion, int best_rd,
-                                     int allow_comp)
+                                     int allow_comp, int update_contexts)
 {
-    MACROBLOCKD *const xd = &mb->e_mbd;
     int i;
+    MACROBLOCKD *const xd = &mb->e_mbd;
     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
     int distortion = 0;
     int tot_rate_y = 0;
@@ -949,13 +955,25 @@
     ENTROPY_CONTEXT *tl;
     unsigned int *bmode_costs;
 
-    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
-    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    if (update_contexts)
+    {
+        ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
+        tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
+    }
+    else
+    {
+        vpx_memcpy(&t_above, mb->e_mbd.above_context,
+                   sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memcpy(&t_left, mb->e_mbd.left_context,
+                   sizeof(ENTROPY_CONTEXT_PLANES));
 
-    ta = (ENTROPY_CONTEXT *)&t_above;
-    tl = (ENTROPY_CONTEXT *)&t_left;
+        ta = (ENTROPY_CONTEXT *)&t_above;
+        tl = (ENTROPY_CONTEXT *)&t_left;
+    }
 
+#if 0
     vp8_intra_prediction_down_copy(xd);
+#endif
 
     bmode_costs = mb->inter_bmode_costs;
 
@@ -2348,7 +2366,7 @@
     }
 }
 
-static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
+/*static */void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
 {
     if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
     {
@@ -2445,7 +2463,7 @@
     // Get the context probability for the prediction flag
     pred_prob = get_pred_prob( cm, xd, PRED_REF );
 
-    // Get the set of probailities to use if prediction fails
+    // Get the set of probabilities to use if prediction fails
     mod_refprobs = cm->mod_refprobs[pred_ref];
 
     // For each possible selected reference frame work out a cost.
@@ -2459,7 +2477,7 @@
         // Get the prediction for the current mb
         cost = vp8_cost_bit( pred_prob, pred_flag );
 
-        // for incorectly predicted cases
+        // for incorrectly predicted cases
         if ( ! pred_flag )
         {
             if ( mod_refprobs[0] )
@@ -2503,6 +2521,7 @@
     int best_mode_index = 0;
     int mode8x8[2][4];
     unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
+    int mb_index = xd->mb_index;
 
     int i;
     int mode_index;
@@ -2549,6 +2568,7 @@
 
     vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
     vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
+    vpx_memset(&x->mb_context[mb_index], 0, sizeof(PICK_MODE_CONTEXT));
 
     for (i = 0; i < 4; i++)
     {
@@ -2599,7 +2619,6 @@
     }
 
     *returnintra = INT_MAX;
-    cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
 
     x->skip = 0;
 
@@ -2647,8 +2666,8 @@
         distortion2 = 0;
 
         this_mode = vp8_mode_order[mode_index];
-
         x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+
 #if CONFIG_COMP_INTRA_PRED
         x->e_mbd.mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE) (DC_PRED - 1);
         x->e_mbd.mode_info_context->mbmi.second_uv_mode = (MB_PREDICTION_MODE) (DC_PRED - 1);
@@ -2736,7 +2755,7 @@
             int tmp_rd;
 
             // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
-            tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd, 0);
+            tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd, 0, 0);
             rate2 += rate;
             distortion2 += distortion;
 
@@ -3049,7 +3068,7 @@
                 if (sse < threshold)
                 {
                      unsigned int q2dc = xd->block[24].dequant[0];
-                    /* If theres is no codeable 2nd order dc
+                    /* If there is no codeable 2nd order dc
                        or a very small uniform pixel change change */
                     if ((sse - var < q2dc * q2dc >>4) ||
                         (sse /2 > var && sse-var < 64))
@@ -3222,7 +3241,7 @@
 
         // Where skip is allowable add in the default per mb cost for the no skip case.
         // where we then decide to skip we have to delete this and replace it with the
-        // cost of signallying a skip
+        // cost of signaling a skip
         if (cpi->common.mb_no_coeff_skip)
         {
 #if CONFIG_NEWENTROPY
@@ -3329,7 +3348,7 @@
                 best_hybrid_rd = this_rd;
         }
 
-        // Did this mode help.. i.i is it the new best mode
+        // Did this mode help.. i.e. is it the new best mode
         if (this_rd < best_rd || x->skip)
         {
             if (!mode_excluded)
@@ -3454,9 +3473,6 @@
 
     }
 
-    // Note how often each mode chosen as best
-    cpi->mode_chosen_counts[best_mode_index] ++;
-
     // This code force Altref,0,0 and skip for the frame that overlays a
     // an alrtef unless Altref is filtered. However, this is unsafe if
     // segment level coding of ref frame or mode is enabled for this
@@ -3480,7 +3496,6 @@
         return;
     }
 
-
     // macroblock modes
     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
 
@@ -3509,8 +3524,6 @@
                                       x->partition_info->bmi[15].mv.as_int;
     }
 
-    rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
-
     if (best_single_rd == INT_MAX)
         *best_single_rd_diff = INT_MIN;
     else
@@ -3523,9 +3536,22 @@
         *best_hybrid_rd_diff = INT_MIN;
     else
         *best_hybrid_rd_diff = best_rd - best_hybrid_rd;
+
+    // Take a snapshot of the coding context so it can be
+    // restored if we decide to encode this way
+    x->mb_context[mb_index].best_mode_index = best_mode_index;
+    vpx_memcpy(&x->mb_context[mb_index].mic, x->e_mbd.mode_info_context,
+               sizeof(MODE_INFO));
+    vpx_memcpy(&x->mb_context[mb_index].partition_info, &best_partition,
+               sizeof(PARTITION_INFO));
+    vpx_memcpy(&x->mb_context[mb_index].best_ref_mv,
+               &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],
+               sizeof(int_mv));
+    //x->mb_context[mb_index].rddiv = x->rddiv;
+    //x->mb_context[mb_index].rdmult = x->rdmult;
 }
 
-void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
+int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x)
 {
     MACROBLOCKD *xd = &x->e_mbd;
     int error4x4, error16x16, error4x4d;
@@ -3540,7 +3566,7 @@
     int mode16x16;
     int mode8x8[2][4];
 
-    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
+    xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
     rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
     rate = rateuv;
@@ -3548,28 +3574,28 @@
     error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
                                             &rate16x16, &rate16x16_tokenonly,
                                             &dist16x16);
-    mode16x16 = x->e_mbd.mode_info_context->mbmi.mode;
+    mode16x16 = xd->mode_info_context->mbmi.mode;
 
     error8x8 = rd_pick_intra8x8mby_modes(cpi, x,
                 &rate8x8, &rate8x8_tokenonly,
                 &dist8x8, error16x16);
-    mode8x8[0][0]= x->e_mbd.mode_info_context->bmi[0].as_mode.first;
-    mode8x8[0][1]= x->e_mbd.mode_info_context->bmi[2].as_mode.first;
-    mode8x8[0][2]= x->e_mbd.mode_info_context->bmi[8].as_mode.first;
-    mode8x8[0][3]= x->e_mbd.mode_info_context->bmi[10].as_mode.first;
+    mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first;
+    mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first;
+    mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first;
+    mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first;
 #if CONFIG_COMP_INTRA_PRED
-    mode8x8[1][0]= x->e_mbd.mode_info_context->bmi[0].as_mode.second;
-    mode8x8[1][1]= x->e_mbd.mode_info_context->bmi[2].as_mode.second;
-    mode8x8[1][2]= x->e_mbd.mode_info_context->bmi[8].as_mode.second;
-    mode8x8[1][3]= x->e_mbd.mode_info_context->bmi[10].as_mode.second;
+    mode8x8[1][0]= xd->mode_info_context->bmi[0].as_mode.second;
+    mode8x8[1][1]= xd->mode_info_context->bmi[2].as_mode.second;
+    mode8x8[1][2]= xd->mode_info_context->bmi[8].as_mode.second;
+    mode8x8[1][3]= xd->mode_info_context->bmi[10].as_mode.second;
 #endif
 
     error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
                                          &rate4x4, &rate4x4_tokenonly,
-                                         &dist4x4, error16x16, 0);
+                                         &dist4x4, error16x16, 0, 0);
     error4x4d = rd_pick_intra4x4mby_modes(cpi, x,
                                          &rate4x4d, &rate4x4_tokenonly,
-                                         &dist4x4d, error16x16, 1);
+                                         &dist4x4d, error16x16, 1, 0);
 
     if(error8x8> error16x16)
     {
@@ -3579,12 +3605,13 @@
             if (error4x4d >= error4x4) // FIXME save original modes etc.
                 error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4,
                                                      &rate4x4_tokenonly,
-                                                     &dist4x4, error16x16, 0);
-            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+                                                     &dist4x4, error16x16, 0,
+                                                     cpi->update_context);
+            xd->mode_info_context->mbmi.mode = B_PRED;
         }
         else
         {
-            x->e_mbd.mode_info_context->mbmi.mode = mode16x16;
+            xd->mode_info_context->mbmi.mode = mode16x16;
             rate += rate16x16;
 
         }
@@ -3597,16 +3624,95 @@
             if (error4x4d >= error4x4) // FIXME save original modes etc.
                 error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4,
                                                      &rate4x4_tokenonly,
-                                                     &dist4x4, error16x16, 0);
-            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
+                                                     &dist4x4, error16x16, 0,
+                                                     cpi->update_context);
+            xd->mode_info_context->mbmi.mode = B_PRED;
         }
         else
         {
 
-            x->e_mbd.mode_info_context->mbmi.mode = I8X8_PRED;
+            xd->mode_info_context->mbmi.mode = I8X8_PRED;
             set_i8x8_block_modes(x, mode8x8);
             rate += rate8x8;
         }
     }
-    *rate_ = rate;
+    return rate;
+}
+
+int vp8cx_pick_mode_inter_macroblock
+(
+    VP8_COMP *cpi, MACROBLOCK *x,
+    int recon_yoffset, int recon_uvoffset
+)
+{
+    VP8_COMMON *cm = &cpi->common;
+    MACROBLOCKD *const xd = &x->e_mbd;
+    int rate;
+    int distortion;
+    int intra_error = 0;
+    unsigned char *segment_id = &xd->mode_info_context->mbmi.segment_id;
+#if CONFIG_COMPRED
+     unsigned char ref_pred_flag;
+#endif
+
+    if (xd->segmentation_enabled)
+        x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
+    else
+        x->encode_breakout = cpi->oxcf.encode_breakout;
+
+    //if (cpi->sf.RD)
+    // For now this codebase is limited to a single rd encode path
+    {
+        int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+        int single, compound, hybrid;
+
+        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
+                               &distortion, &intra_error, &single, &compound,
+                               &hybrid);
+
+        // TODO Save these to add in only if MB coding mode is selected?
+        cpi->rd_single_diff += single;
+        cpi->rd_comp_diff   += compound;
+        cpi->rd_hybrid_diff += hybrid;
+        if (xd->mode_info_context->mbmi.ref_frame &&
+            xd->mode_info_context->mbmi.mode != SPLITMV)
+        {
+            unsigned char pred_context;
+
+            pred_context = get_pred_context( cm, xd, PRED_COMP );
+
+            if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME)
+                cpi->single_pred_count[pred_context]++;
+            else
+                cpi->comp_pred_count[pred_context]++;
+        }
+
+        /* test code: set transform size based on mode selection */
+        if( cpi->common.txfm_mode == ALLOW_8X8
+            && xd->mode_info_context->mbmi.mode != I8X8_PRED
+            && xd->mode_info_context->mbmi.mode != B_PRED
+            && xd->mode_info_context->mbmi.mode != SPLITMV)
+        {
+            xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+            cpi->t8x8_count ++;
+        }
+        else
+        {
+            xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+            cpi->t4x4_count++;
+        }
+
+        /* restore cpi->zbin_mode_boost_enabled */
+        cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
+    }
+    //else
+    // The non rd encode path has been deleted from this code base
+    // to simplify development
+    //    vp8_pick_inter_mode
+
+    // Store metrics so they can be added in to totals if this mode is picked
+    x->mb_context[xd->mb_index].distortion  = distortion;
+    x->mb_context[xd->mb_index].intra_error = intra_error;
+
+    return rate;
 }
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -19,7 +19,7 @@
 extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset,
                                    int *returnrate, int *returndistortion, int *returnintra,
                                    int *best_single_rd_diff, int *best_comp_rd_diff, int *best_hybrid_rd_diff);
-extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
+extern int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x);
 
 extern void vp8_mv_pred
 (
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1460,7 +1460,7 @@
 static int compare_img(vpx_image_t *img1, vpx_image_t *img2)
 {
     int match = 1;
-    int i, j;
+    int i;
 
     match &= (img1->fmt == img2->fmt);
     match &= (img1->w == img2->w);
--