shithub: libvpx

Download patch

ref: a4c887da63131873a04b2af603efeb88b48b885e
parent: 6c61e28e7a8fc6cca6808b8579195aa051fc03eb
parent: 73207a1d8bf1ecf6bc6fea03ae56109fe18e9574
author: John Koleszar <jkoleszar@google.com>
date: Fri Jan 28 03:33:52 EST 2011

Merge remote branch 'origin/master' into experimental

Conflicts:
	vp8/encoder/rdopt.c

Change-Id: Ic17907df70fff45c9e766b5d0cbab0c5f1a1095f

--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -16,42 +16,6 @@
 /* Predict motion vectors using those from already-decoded nearby blocks.
    Note that we only consider one 4x4 subblock from each candidate 16x16
    macroblock.   */
-
-typedef union
-{
-    unsigned int as_int;
-    MV           as_mv;
-} int_mv;        /* facilitates rapid equality tests */
-
-static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
-{
-    MV xmv;
-    xmv = x->mbmi.mv.as_mv;
-
-    if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
-    {
-        xmv.row *= -1;
-        xmv.col *= -1;
-    }
-
-    mvp->as_mv = xmv;
-}
-
-
-void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
-{
-    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-}
-
-
 void vp8_find_near_mvs
 (
     MACROBLOCKD *xd,
@@ -82,7 +46,7 @@
         if (above->mbmi.mv.as_int)
         {
             (++mv)->as_int = above->mbmi.mv.as_int;
-            mv_bias(above, refframe, mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
             ++cntx;
         }
 
@@ -97,7 +61,7 @@
             int_mv this_mv;
 
             this_mv.as_int = left->mbmi.mv.as_int;
-            mv_bias(left, refframe, &this_mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
 
             if (this_mv.as_int != mv->as_int)
             {
@@ -119,7 +83,7 @@
             int_mv this_mv;
 
             this_mv.as_int = aboveleft->mbmi.mv.as_int;
-            mv_bias(aboveleft, refframe, &this_mv, ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
 
             if (this_mv.as_int != mv->as_int)
             {
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -17,6 +17,41 @@
 #include "modecont.h"
 #include "treecoder.h"
 
+typedef union
+{
+    unsigned int as_int;
+    MV           as_mv;
+} int_mv;        /* facilitates rapid equality tests */
+
+static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
+{
+    MV xmv;
+    xmv = mvp->as_mv;
+
+    if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
+    {
+        xmv.row *= -1;
+        xmv.col *= -1;
+    }
+
+    mvp->as_mv = xmv;
+}
+
+#define LEFT_TOP_MARGIN (16 << 3)
+#define RIGHT_BOTTOM_MARGIN (16 << 3)
+static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
+{
+    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
+        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
+    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
+        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+}
+
 void vp8_find_near_mvs
 (
     MACROBLOCKD *xd,
@@ -34,9 +69,5 @@
 const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);
 
 const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);
-
-#define LEFT_TOP_MARGIN (16 << 3)
-#define RIGHT_BOTTOM_MARGIN (16 << 3)
-
 
 #endif
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -9,6 +9,7 @@
  */
 
 #include "vpx_config.h"
+#include "variance.h"
 
 #if HAVE_ARMV7
 
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -144,51 +144,6 @@
     }
 }
 
-void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
-{
-    int b;
-
-    vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
-
-    ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
-
-    vp8_transform_intra_mby(x);
-
-    vp8_quantize_mby(x);
-
-    vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
-
-    RECON_INVOKE(&rtcd->common->recon, recon_mby)
-        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
-
-    // make sure block modes are set the way we want them for context updates
-    for (b = 0; b < 16; b++)
-    {
-        BLOCKD *d = &x->e_mbd.block[b];
-
-        switch (x->e_mbd.mode_info_context->mbmi.mode)
-        {
-
-        case DC_PRED:
-            d->bmi.mode = B_DC_PRED;
-            break;
-        case V_PRED:
-            d->bmi.mode = B_VE_PRED;
-            break;
-        case H_PRED:
-            d->bmi.mode = B_HE_PRED;
-            break;
-        case TM_PRED:
-            d->bmi.mode = B_TM_PRED;
-            break;
-        default:
-            d->bmi.mode = B_DC_PRED;
-            break;
-
-        }
-    }
-}
-
 void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 {
     vp8_build_intra_predictors_mbuv(&x->e_mbd);
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -19,7 +19,6 @@
 void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
 void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode);
 void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
-void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
 void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
 
 #endif
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -2423,7 +2423,7 @@
     if (cpi->oxcf.auto_key
         && cpi->frames_to_key > (int)cpi->key_frame_frequency )
     {
-        int current_pos = cpi->stats_in;
+        FIRSTPASS_STATS *current_pos = cpi->stats_in;
         FIRSTPASS_STATS tmp_frame;
 
         cpi->frames_to_key /= 2;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2256,7 +2256,6 @@
 
     cpi->common.error.setjmp = 1;
 
-    CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
     CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
 
     vp8_create_common(&cpi->common);
@@ -2293,9 +2292,9 @@
     cpi->gold_is_alt  = 0 ;
 
     // allocate memory for storing last frame's MVs for MV prediction.
-    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int_mv)));
-    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
-    CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
+    CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv)));
+    CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
+    CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
 
     // Create the encoder segmentation map and set all entries to 0
     CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
@@ -2766,7 +2765,6 @@
     vp8_dealloc_compressor_data(cpi);
     vpx_free(cpi->mb.ss);
     vpx_free(cpi->tok);
-    vpx_free(cpi->rdtok);
     vpx_free(cpi->cyclic_refresh_map);
 
     vp8_remove_common(&cpi->common);
@@ -3196,6 +3194,10 @@
     cm->uvdc_delta_q = 0;
     cm->uvac_delta_q = 0;
 
+    if(Q<4)
+    {
+        cm->y2dc_delta_q = 4-Q;
+    }
     // Set Segment specific quatizers
     mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
     mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
@@ -4472,31 +4474,32 @@
     }
 
     // This frame's MVs are saved and will be used in next frame's MV prediction.
+    // Last frame has one more line(add to bottom) and one more column(add to right) than cm->mip. The edge elements are initialized to 0.
     if(cm->show_frame)   //do not save for altref frame
     {
-      int mb_row;
-      int mb_col;
-      MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
-      //static int last_video_frame = 0;
+        int mb_row;
+        int mb_col;
+        MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
 
-      if(cm->frame_type != KEY_FRAME)
-      {
-        for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
+        if(cm->frame_type != KEY_FRAME)
         {
-          for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
-          {
-              if(tmp->mbmi.ref_frame != INTRA_FRAME)
-                cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride)].as_int = tmp->mbmi.mv.as_int;
+            for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
+            {
+                for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
+                {
+                    if(tmp->mbmi.ref_frame != INTRA_FRAME)
+                        cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride+1)].as_int = tmp->mbmi.mv.as_int;
 
-              cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
-              cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride)] = tmp->mbmi.ref_frame;
-              tmp++;
-          }
+                    cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride+1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
+                    cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride+1)] = tmp->mbmi.ref_frame;
+                    tmp++;
+                }
+            }
         }
-      }
     }
 
     // Update the GF useage maps.
+    // Update the GF useage maps.
     // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
     vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
 
@@ -4700,7 +4703,8 @@
     }
 
     // Update the buffer level variable.
-    if (cpi->common.refresh_alt_ref_frame)
+    // Non-viewable frames are a special case and are treated as pure overhead.
+    if ( !cm->show_frame )
         cpi->bits_off_target -= cpi->projected_frame_size;
     else
         cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -28,6 +28,7 @@
 #include "vpx/internal/vpx_codec_internal.h"
 #include "mcomp.h"
 #include "temporal_filter.h"
+#include "findnearmv.h"
 
 //#define SPEEDSTATS 1
 #define MIN_GF_INTERVAL             4
@@ -245,12 +246,6 @@
     BLOCK_MAX_SEGMENTS
 };
 
-typedef union
-{
-    unsigned int as_int;
-    MV           as_mv;
-} int_mv;        /* facilitates rapid equality tests */
-
 typedef struct
 {
 
@@ -309,8 +304,6 @@
 
     YV12_BUFFER_CONFIG last_frame_uf;
 
-    char *Dest;
-
     TOKENEXTRA *tok;
     unsigned int tok_count;
 
@@ -343,11 +336,6 @@
     int RDMULT;
     int RDDIV ;
 
-    TOKENEXTRA *rdtok;
-    vp8_writer rdbc;
-    int intra_mode_costs[10];
-
-
     CODING_CONTEXT coding_context;
 
     // Rate targetting variables
@@ -559,8 +547,6 @@
     int last_kffilt_lvl;
 
     int ref_frame_flags;
-
-    int exp[512];
 
     SPEED_FEATURES sf;
     int error_bins[1024];
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -290,14 +290,13 @@
     }
 
 #if !CONFIG_EXTEND_QRANGE
-    if (cpi->RDMULT < 125)
-        cpi->RDMULT = 125;
 #else
     if (cpi->RDMULT < 7)
         cpi->RDMULT = 7;
 #endif
-
     cpi->mb.errorperbit = (cpi->RDMULT / 100);
+    cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
+
 #if CONFIG_EXTEND_QRANGE
     if(cpi->mb.errorperbit<1)
         cpi->mb.errorperbit=1;
@@ -600,7 +599,52 @@
     return cost;
 }
 
+static void macro_block_yrd( MACROBLOCK *mb,
+                             int *Rate,
+                             int *Distortion,
+                             const vp8_encodemb_rtcd_vtable_t *rtcd)
+{
+    int b;
+    MACROBLOCKD *const x = &mb->e_mbd;
+    BLOCK   *const mb_y2 = mb->block + 24;
+    BLOCKD *const x_y2  = x->block + 24;
+    short *Y2DCPtr = mb_y2->src_diff;
+    BLOCK *beptr;
+    int d;
 
+    ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, mb->src.y_buffer,
+                                   mb->e_mbd.predictor, mb->src.y_stride );
+
+    // Fdct and building the 2nd order block
+    for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
+    {
+        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
+        *Y2DCPtr++ = beptr->coeff[0];
+        *Y2DCPtr++ = beptr->coeff[16];
+    }
+
+    // 2nd order fdct
+    mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
+
+    // Quantization
+    for (b = 0; b < 16; b++)
+    {
+        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
+    }
+
+    // DC predication and Quantization of 2nd Order block
+    mb->quantize_b(mb_y2, x_y2);
+
+    // Distortion
+    d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
+    d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
+
+    *Distortion = (d >> 4);
+
+    // rate
+    *Rate = vp8_rdcost_mby(mb);
+}
+
 static void rd_pick_intra4x4block(
     VP8_COMP *cpi,
     MACROBLOCK *x,
@@ -716,34 +760,36 @@
     return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
 }
 
-int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int *rate_y, int *Distortion)
+int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
+                                   MACROBLOCK *x,
+                                   int *Rate,
+                                   int *rate_y,
+                                   int *Distortion)
 {
-
     MB_PREDICTION_MODE mode;
     MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
     int rate, ratey;
-    unsigned int distortion;
+    int distortion;
     int best_rd = INT_MAX;
+    int this_rd;
+    int i;
 
     //Y Search for 16x16 intra prediction mode
     for (mode = DC_PRED; mode <= TM_PRED; mode++)
     {
-        int this_rd;
-        int dummy;
-        rate = 0;
+        for (i = 0; i < 16; i++)
+        {
+            vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
+        }
 
         x->e_mbd.mode_info_context->mbmi.mode = mode;
 
-        rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
+        vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
 
-        vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x);
+        macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
+        rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
+                                     [x->e_mbd.mode_info_context->mbmi.mode];
 
-        ratey = vp8_rdcost_mby(x);
-
-        rate += ratey;
-
-        VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, x->src.y_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride, &distortion, &dummy);
-
         this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 
         if (this_rd < best_rd)
@@ -752,7 +798,7 @@
             best_rd = this_rd;
             *Rate = rate;
             *rate_y = ratey;
-            *Distortion = (int)distortion;
+            *Distortion = distortion;
         }
     }
 
@@ -760,7 +806,6 @@
     return best_rd;
 }
 
-
 static int rd_cost_mbuv(MACROBLOCK *mb)
 {
     int b;
@@ -1001,52 +1046,10 @@
     return distortion;
 }
 
-static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp8_encodemb_rtcd_vtable_t *rtcd)
-{
-    int b;
-    MACROBLOCKD *const x = &mb->e_mbd;
-    BLOCK   *const mb_y2 = mb->block + 24;
-    BLOCKD *const x_y2  = x->block + 24;
-    short *Y2DCPtr = mb_y2->src_diff;
-    BLOCK *beptr;
-    int d;
-
-    ENCODEMB_INVOKE(rtcd, submby)(mb->src_diff, mb->src.y_buffer, mb->e_mbd.predictor, mb->src.y_stride);
-
-    // Fdct and building the 2nd order block
-    for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
-    {
-        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
-        *Y2DCPtr++ = beptr->coeff[0];
-        *Y2DCPtr++ = beptr->coeff[16];
-    }
-
-    // 2nd order fdct
-    mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
-
-    // Quantization
-    for (b = 0; b < 16; b++)
-    {
-        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
-    }
-
-    // DC predication and Quantization of 2nd Order block
-    mb->quantize_b(mb_y2, x_y2);
-
-    // Distortion
-    d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
 #if CONFIG_EXTEND_QRANGE
     d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff)<<2;
 #else
-    d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
 #endif
-
-    *Distortion = (d >> 4);
-
-    // rate
-    *Rate = vp8_rdcost_mby(mb);
-}
-
 unsigned char vp8_mbsplit_offset2[4][16] = {
     { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
     { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
@@ -1488,48 +1491,6 @@
     return bsi.segment_rd;
 }
 
-
-static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
-{
-    MV xmv;
-    xmv = x->mbmi.mv.as_mv;
-
-    if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
-    {
-        xmv.row *= -1;
-        xmv.col *= -1;
-    }
-
-    mvp->as_mv = xmv;
-}
-
-static void lf_mv_bias(const int lf_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
-{
-    MV xmv;
-    xmv = mvp->as_mv;
-
-    if (lf_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
-    {
-        xmv.row *= -1;
-        xmv.col *= -1;
-    }
-
-    mvp->as_mv = xmv;
-}
-
-static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
-{
-    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-}
-
 static void swap(int *x,int *y)
 {
    int tmp;
@@ -1613,7 +1574,7 @@
 }
 
 //The improved MV prediction
-static void vp8_mv_pred
+void vp8_mv_pred
 (
     VP8_COMP *cpi,
     MACROBLOCKD *xd,
@@ -1628,15 +1589,15 @@
     const MODE_INFO *above = here - xd->mode_info_stride;
     const MODE_INFO *left = here - 1;
     const MODE_INFO *aboveleft = above - 1;
-    int_mv           near_mvs[7];
-    int              near_ref[7];
+    int_mv           near_mvs[8];
+    int              near_ref[8];
     int_mv           mv;
     int              vcnt=0;
     int              find=0;
     int              mb_offset;
 
-    int              mvx[7];
-    int              mvy[7];
+    int              mvx[8];
+    int              mvy[8];
     int              i;
 
     mv.as_int = 0;
@@ -1643,14 +1604,14 @@
 
     if(here->mbmi.ref_frame != INTRA_FRAME)
     {
-        near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = 0;
-        near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = 0;
+        near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
+        near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
 
         // read in 3 nearby block's MVs from current frame as prediction candidates.
         if (above->mbmi.ref_frame != INTRA_FRAME)
         {
             near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
-            mv_bias(above, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
             near_ref[vcnt] =  above->mbmi.ref_frame;
         }
         vcnt++;
@@ -1657,7 +1618,7 @@
         if (left->mbmi.ref_frame != INTRA_FRAME)
         {
             near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
-            mv_bias(left, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
             near_ref[vcnt] =  left->mbmi.ref_frame;
         }
         vcnt++;
@@ -1664,31 +1625,31 @@
         if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
         {
             near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
-            mv_bias(aboveleft, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
             near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
         }
         vcnt++;
 
-        // read in 4 nearby block's MVs from last frame.
+        // read in 5 nearby block's MVs from last frame.
         if(cpi->common.last_frame_type != KEY_FRAME)
         {
-            mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride) + (-xd->mb_to_left_edge/128 +1) ;
+            mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
 
             // current in last frame
             if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
             {
                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
-                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
             }
             vcnt++;
 
             // above in last frame
-            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride] != INTRA_FRAME)
+            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
             {
-                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride].as_int;
-                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
-                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride];
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
+                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
             }
             vcnt++;
 
@@ -1696,19 +1657,28 @@
             if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
             {
                 near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
-                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
                 near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
             }
             vcnt++;
 
-            // aboveleft in last frame
-            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride -1] != INTRA_FRAME)
+            // right in last frame
+            if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
             {
-                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride -1].as_int;
-                lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
-                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride -1];
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
+                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
             }
             vcnt++;
+
+            // below in last frame
+            if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
+            {
+                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
+                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
+                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
+            }
+            vcnt++;
         }
 
         for(i=0; i< vcnt; i++)
@@ -1719,9 +1689,7 @@
                 {
                     mv.as_int = near_mvs[near_sadidx[i]].as_int;
                     find = 1;
-                    if(vcnt<2)
-                        *sr = 4;
-                    else if (vcnt<4)
+                    if (i < 3)
                         *sr = 3;
                     else
                         *sr = 2;
@@ -1791,8 +1759,8 @@
     int force_no_skip = 0;
 
     MV mvp;
-    int near_sad[7]; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf aboveleft
-    int near_sadidx[7] = {0, 1, 2, 3, 4, 5, 6};
+    int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
+    int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
     int saddone=0;
     int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
 
@@ -1964,36 +1932,29 @@
 
                 if(cpi->common.last_frame_type != KEY_FRAME)
                 {
-                    //calculate sad for last frame 4 nearby MBs.
+                    //calculate sad for last frame 5 nearby MBs.
                     unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
                     int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
 
-                    if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
-                    {
-                        near_sad[4] = near_sad[5] = near_sad[6] = INT_MAX;
-                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
-                    }else if(xd->mb_to_top_edge==0)
-                    {   //only has left MB for sad calculation.
-                        near_sad[4] = near_sad[6] = INT_MAX;
-                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
-                        near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
-                    }else if(xd->mb_to_left_edge ==0)
-                    {   //only has left MB for sad calculation.
-                        near_sad[5] = near_sad[6] = INT_MAX;
-                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+                    if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
+                    if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
+                    if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
+                    if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
+
+                    if(near_sad[4] != INT_MAX)
                         near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
-                    }else
-                    {
-                        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
-                        near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
+                    if(near_sad[5] != INT_MAX)
                         near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
-                        near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16 -16, pre_y_stride, 0x7fffffff);
-                    }
+                    near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
+                    if(near_sad[6] != INT_MAX)
+                        near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
+                    if(near_sad[7] != INT_MAX)
+                        near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
                 }
 
                 if(cpi->common.last_frame_type != KEY_FRAME)
                 {
-                    quicksortsad(near_sad, near_sadidx, 0, 6);
+                    quicksortsad(near_sad, near_sadidx, 0, 7);
                 }else
                 {
                     quicksortsad(near_sad, near_sadidx, 0, 2);
@@ -2578,4 +2539,3 @@
     return best_rd;
 }
 #endif
-
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -60,7 +60,7 @@
 vpx_vp8_enc_asm_offsets.asm: obj_int_extract
 vpx_vp8_enc_asm_offsets.asm: $(VP8_PREFIX)encoder/arm/vpx_vp8_enc_asm_offsets.c.o
 	./obj_int_extract rvds $< $(ADS2GAS) > $@
-OBJS-yes += $(VP8_PREFIX)encoder/arm/vpx_vp7_enc_asm_offsets.c.o
+OBJS-yes += $(VP8_PREFIX)encoder/arm/vpx_vp8_enc_asm_offsets.c.o
 CLEAN-OBJS += vpx_vp8_enc_asm_offsets.asm
 $(filter %$(ASM).o,$(OBJS-yes)): vpx_vp8_enc_asm_offsets.asm
 endif