shithub: libvpx

--- a/vp8/common/extend.c

+++ b/vp8/common/extend.c

@@ -38,7 +38,7 @@

     dest_ptr1 = d - el;

     dest_ptr2 = d + w;

-    for (i = 0; i < h - 0 + 1; i++)

+    for (i = 0; i < h; i++)

         vpx_memset(dest_ptr1, src_ptr1[0], el);

         vpx_memcpy(dest_ptr1 + el, src_ptr1, w);

--- a/vp8/common/x86/subpixel_ssse3.asm

+++ b/vp8/common/x86/subpixel_ssse3.asm

@@ -194,10 +194,6 @@

     mov         rdi, arg(2)                     ;output_ptr

-;;

-;;    cmp         esi, DWORD PTR [rax]

-;;    je          vp8_filter_block1d16_h4_ssse3

     mov         rsi, arg(0)                     ;src_ptr

     movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5

@@ -271,61 +267,7 @@

     pop rdi

     pop rsi

     RESTORE_GOT

-    UNSHADOW_ARGS

-    pop         rbp

-    ret

-vp8_filter_block1d16_h4_ssse3:

-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4

-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3

-    mov         rsi, arg(0)             ;src_ptr

-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line

-    movsxd      rcx, dword ptr arg(4)   ;output_height

-    movsxd      rdx, dword ptr arg(3)   ;output_pitch

-filter_block1d16_h4_rowloop_ssse3:

-    movdqu      xmm1,   XMMWORD PTR [rsi - 2]

-    movdqa      xmm2, xmm1

-    pshufb      xmm1, [GLOBAL(shuf2b)]

-    pshufb      xmm2, [GLOBAL(shuf3b)]

-    pmaddubsw   xmm1, xmm5

-    movdqu      xmm3,   XMMWORD PTR [rsi + 6]

-    pmaddubsw   xmm2, xmm6

-    movdqa      xmm0, xmm3

-    pshufb      xmm3, [GLOBAL(shuf3b)]

-    pshufb      xmm0, [GLOBAL(shuf2b)]

-    paddsw      xmm1, [GLOBAL(rd)]

-    paddsw      xmm1, xmm2

-    pmaddubsw   xmm0, xmm5

-    pmaddubsw   xmm3, xmm6

-    psraw       xmm1, 7

-    packuswb    xmm1, xmm1

-    lea         rsi,    [rsi + rax]

-    paddsw      xmm3, xmm0

-    paddsw      xmm3, [GLOBAL(rd)]

-    psraw       xmm3, 7

-    packuswb    xmm3, xmm3

-    punpcklqdq  xmm1, xmm3

-    movdqa      XMMWORD Ptr [rdi], xmm1

-    add         rdi, rdx

-    dec         rcx

-    jnz         filter_block1d16_h4_rowloop_ssse3

-    ; begin epilog

-    pop rdi

-    pop rsi

-    RESTORE_GOT

+    RESTORE_XMM

     UNSHADOW_ARGS

     pop         rbp

ret

--- a/vp8/encoder/mcomp.c

+++ b/vp8/encoder/mcomp.c

@@ -194,13 +194,13 @@

 #define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.

 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;

 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost

-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best

+#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse;}}, v=INT_MAX;)// checks if (r,c) has better score than previous best

 #define MIN(x,y) (((x)<(y))?(x):(y))

 #define MAX(x,y) (((x)>(y))?(x):(y))

 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }

-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)

     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;

     unsigned char *z = (*(b->base_src) + b->src);

@@ -214,6 +214,7 @@

     unsigned int whichdir;

     unsigned int halfiters = 4;

     unsigned int quarteriters = 4;

+    int thismse;

     int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));

     int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));

@@ -226,6 +227,7 @@

     // calculate central point error

     besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);

+    *distortion = besterr;

     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)

@@ -314,7 +316,7 @@

 #undef CHECK_BETTER

 #undef MIN

 #undef MAX

-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)

     int bestmse = INT_MAX;

     MV startmv;

@@ -325,6 +327,7 @@

     int left, right, up, down, diag;

     unsigned int sse;

     int whichdir ;

+    int thismse;

     // Trap uncodable vectors

@@ -332,6 +335,7 @@

         bestmv->row <<= 3;

         bestmv->col <<= 3;

+        *distortion = INT_MAX;

         return INT_MAX;

@@ -342,50 +346,55 @@

     // calculate central point error

     bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);

+    *distortion = bestmse;

     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

     // go left then right and check error

     this_mv.row = startmv.row;

     this_mv.col = ((startmv.col - 8) | 4);

-    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);

-    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);

+    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (left < bestmse)

         *bestmv = this_mv;

         bestmse = left;

+        *distortion = thismse;

     this_mv.col += 8;

-    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);

-    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);

+    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (right < bestmse)

         *bestmv = this_mv;

         bestmse = right;

+        *distortion = thismse;

     // go up then down and check error

     this_mv.col = startmv.col;

     this_mv.row = ((startmv.row - 8) | 4);

-    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse =  vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

+    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (up < bestmse)

         *bestmv = this_mv;

         bestmse = up;

+        *distortion = thismse;

     this_mv.row += 8;

-    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);

-    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);

+    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (down < bestmse)

         *bestmv = this_mv;

         bestmse = down;

+        *distortion = thismse;

@@ -400,32 +409,33 @@

     case 0:

         this_mv.col = (this_mv.col - 8) | 4;

         this_mv.row = (this_mv.row - 8) | 4;

-        diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

+        thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

         break;

     case 1:

         this_mv.col += 4;

         this_mv.row = (this_mv.row - 8) | 4;

-        diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

+        thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

         break;

     case 2:

         this_mv.col = (this_mv.col - 8) | 4;

         this_mv.row += 4;

-        diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);

+        thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);

         break;

     case 3:

     default:

         this_mv.col += 4;

         this_mv.row += 4;

-        diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);

+        thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);

         break;

-    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

         *bestmv = this_mv;

         bestmse = diag;

+        *distortion = thismse;

 //  }

@@ -448,30 +458,32 @@

     if (startmv.col & 7)

         this_mv.col = startmv.col - 2;

-        left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+        thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

     else

         this_mv.col = (startmv.col - 8) | 6;

-        left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);

+        thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);

-    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (left < bestmse)

         *bestmv = this_mv;

         bestmse = left;

+        *distortion = thismse;

     this_mv.col += 4;

-    right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (right < bestmse)

         *bestmv = this_mv;

         bestmse = right;

+        *distortion = thismse;

     // go up then down and check error

@@ -480,30 +492,32 @@

     if (startmv.row & 7)

         this_mv.row = startmv.row - 2;

-        up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+        thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

     else

         this_mv.row = (startmv.row - 8) | 6;

-        up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

+        thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

-    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (up < bestmse)

         *bestmv = this_mv;

         bestmse = up;

+        *distortion = thismse;

     this_mv.row += 4;

-    down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

-    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (down < bestmse)

         *bestmv = this_mv;

         bestmse = down;

+        *distortion = thismse;

@@ -525,12 +539,12 @@

             if (startmv.col & 7)

                 this_mv.col -= 2;

-                diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+                thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

             else

                 this_mv.col = (startmv.col - 8) | 6;

-                diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

+                thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

         else

@@ -540,12 +554,12 @@

             if (startmv.col & 7)

                 this_mv.col -= 2;

-                diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

+                thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

             else

                 this_mv.col = (startmv.col - 8) | 6;

-                diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);

+                thismse = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);

@@ -556,12 +570,12 @@

         if (startmv.row & 7)

             this_mv.row -= 2;

-            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+            thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

         else

             this_mv.row = (startmv.row - 8) | 6;

-            diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

+            thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);

         break;

@@ -571,12 +585,12 @@

         if (startmv.col & 7)

             this_mv.col -= 2;

-            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+            thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

         else

             this_mv.col = (startmv.col - 8) | 6;

-            diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

+            thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;

         break;

@@ -583,24 +597,23 @@

     case 3:

         this_mv.col += 2;

         this_mv.row += 2;

-        diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

+        thismse = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);

         break;

-    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

         *bestmv = this_mv;

         bestmse = diag;

+        *distortion = thismse;

-//  }

     return bestmse;

-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)

     int bestmse = INT_MAX;

     MV startmv;

@@ -610,6 +623,7 @@

     unsigned char *z = (*(b->base_src) + b->src);

     int left, right, up, down, diag;

     unsigned int sse;

+    int thismse;

     // Trap uncodable vectors

     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))

@@ -616,6 +630,7 @@

         bestmv->row <<= 3;

         bestmv->col <<= 3;

+        *distortion = INT_MAX;

         return INT_MAX;

@@ -626,50 +641,55 @@

     // calculate central point error

     bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);

+    *distortion = bestmse;

     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);

     // go left then right and check error

     this_mv.row = startmv.row;

     this_mv.col = ((startmv.col - 8) | 4);

-    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);

-    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);

+    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (left < bestmse)

         *bestmv = this_mv;

         bestmse = left;

+        *distortion = thismse;

     this_mv.col += 8;

-    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);

-    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);

+    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (right < bestmse)

         *bestmv = this_mv;

         bestmse = right;

+        *distortion = thismse;

     // go up then down and check error

     this_mv.col = startmv.col;

     this_mv.row = ((startmv.row - 8) | 4);

-    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

+    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (up < bestmse)

         *bestmv = this_mv;

         bestmse = up;

+        *distortion = thismse;

     this_mv.row += 8;

-    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);

-    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);

+    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (down < bestmse)

         *bestmv = this_mv;

         bestmse = down;

+        *distortion = thismse;

     // somewhat strangely not doing all the diagonals for half pel is slower than doing them.

@@ -713,44 +733,48 @@

 #else

     this_mv.col = (this_mv.col - 8) | 4;

     this_mv.row = (this_mv.row - 8) | 4;

-    diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

         *bestmv = this_mv;

         bestmse = diag;

+        *distortion = thismse;

     this_mv.col += 8;

-    diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

-    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);

+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

         *bestmv = this_mv;

         bestmse = diag;

+        *distortion = thismse;

     this_mv.col = (this_mv.col - 8) | 4;

     this_mv.row = startmv.row + 4;

-    diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);

-    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);

+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

         *bestmv = this_mv;

         bestmse = diag;

+        *distortion = thismse;

     this_mv.col += 8;

-    diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);

-    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

+    thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);

+    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);

     if (diag < bestmse)

         *bestmv = this_mv;

         bestmse = diag;

+        *distortion = thismse;

 #endif

--- a/vp8/encoder/mcomp.h

+++ b/vp8/encoder/mcomp.h

@@ -49,7 +49,7 @@

 typedef int (fractional_mv_step_fp)

     (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,

-     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]);

+     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion);

 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;

 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;

 extern fractional_mv_step_fp vp8_find_best_half_pixel_step;

--- a/vp8/encoder/pickinter.c

+++ b/vp8/encoder/pickinter.c

@@ -50,7 +50,7 @@

 extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);

-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])

+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)

     (void) b;

     (void) d;

@@ -58,6 +58,7 @@

     (void) error_per_bit;

     (void) vfp;

     (void) mvcost;

+    (void) distortion;

     bestmv->row <<= 3;

     bestmv->col <<= 3;

     return 0;

@@ -459,6 +460,8 @@

     int skip_mode[4] = {0, 0, 0, 0};

+    int have_subp_search = cpi->sf.half_pixel_search;  /* In real-time mode, when Speed >= 15, no sub-pixel search. */

     vpx_memset(mode_mv, 0, sizeof(mode_mv));

     vpx_memset(nearest_mv, 0, sizeof(nearest_mv));

     vpx_memset(near_mv, 0, sizeof(near_mv));

@@ -639,10 +642,10 @@

         switch (this_mode)

         case B_PRED:

-            distortion2 = *returndistortion;                    // Best so far passed in as breakout value to vp8_pick_intra4x4mby_modes

-            vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);

-            rate2 += rate;

-            distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);

+            // Pass best so far to vp8_pick_intra4x4mby_modes to use as breakout

+            distortion2 = *returndistortion;

+            vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x,

+                                         &rate, &distortion2);

             if (distortion2 == INT_MAX)

@@ -650,6 +653,11 @@

             else

+                rate2 += rate;

+                distortion2 = VARIANCE_INVOKE

+                                (&cpi->rtcd.variance, get16x16prederror)(

+                                    x->src.y_buffer, x->src.y_stride,

+                                    x->e_mbd.predictor, 16, 0x7fffffff);

                 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);

                 if (this_rd < best_intra_rd)

@@ -788,7 +796,7 @@

             if (bestsme < INT_MAX)

-                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);

+                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2);

             mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

             mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

@@ -818,7 +826,8 @@

             x->e_mbd.block[0].bmi.mode = this_mode;

             x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;

-            distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));

+            if((this_mode != NEWMV) || !(have_subp_search))

+                distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));

             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -1270,12 +1270,14 @@

                 if (bestsme < INT_MAX)

+                    int distortion;

                     if (!cpi->common.full_pixel)

                         cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],

-                                                     bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost);

+                                                     bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion);

                     else

                         vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],

-                                                    bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost);

+                                                    bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion);

             } /* NEW4X4 */

@@ -2253,8 +2255,10 @@

                 x->mv_row_max = tmp_row_max;

                 if (bestsme < INT_MAX)

-                    // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11

-                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);

+                    {

+                        int dis; /* TODO: use dis in distortion calculation later. */

+                        cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis);

+                }

                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;

                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;

--- a/vp8/encoder/temporal_filter.c

+++ b/vp8/encoder/temporal_filter.c

@@ -208,10 +208,11 @@

     // Try sub-pixel MC?

     //if (bestsme > error_thresh && bestsme < INT_MAX)

+        int distortion;

         bestsme = cpi->find_fractional_mv_step(x, b, d,

                     &d->bmi.mv.as_mv, &best_ref_mv1,

                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],

-                    mvcost);

+                    mvcost, &distortion);

 #endif

--

⑨