shithub: libvpx

--- a/vp8/common/findnearmv.c

+++ b/vp8/common/findnearmv.c

@@ -168,6 +168,7 @@

 #if CONFIG_NEWBESTREFMV

+#define SP(x) (((x) & 7) << 1)

 unsigned int vp8_sad3x16_c(

   const unsigned char *src_ptr,

   int  src_stride,

@@ -189,7 +190,6 @@

  * above and a number cols of pixels in the left to select the one with best

  * score to use as ref motion vector

*/

 void vp8_find_best_ref_mvs(MACROBLOCKD *xd,

                            unsigned char *ref_y_buffer,

                            int ref_y_stride,

@@ -203,6 +203,7 @@

   unsigned char *above_ref;

   unsigned char *left_ref;

   int sad;

+  int sse;

   int sad_scores[MAX_MV_REFS] = {0};

   int_mv sorted_mvs[MAX_MV_REFS];

   int zero_seen = FALSE;

@@ -211,16 +212,16 @@

   best_mv->as_int = nearest->as_int = near->as_int = 0;

   vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));

-  above_src = xd->dst.y_buffer - xd->dst.y_stride * 3;

-  left_src  = xd->dst.y_buffer - 3;

-  above_ref = ref_y_buffer - ref_y_stride * 3;

-  left_ref  = ref_y_buffer - 3;

+  above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;

+  left_src  = xd->dst.y_buffer - 2;

+  above_ref = ref_y_buffer - ref_y_stride * 2;

+  left_ref  = ref_y_buffer - 2;

   //for(i = 0; i < MAX_MV_REFS; ++i) {

   // Limit search to the predicted best 4

   for(i = 0; i < 4; ++i) {

     int_mv this_mv;

-    int offset=0;

+    int offset = 0;

     int row_offset, col_offset;

     this_mv.as_int = mvlist[i].as_int;

@@ -238,19 +239,23 @@

                  xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16,

                  xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);

-    row_offset = (this_mv.as_mv.row > 0) ?

-      ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3);

-    col_offset = (this_mv.as_mv.col > 0) ?

-      ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3);

+    row_offset = this_mv.as_mv.row >> 3;

+    col_offset = this_mv.as_mv.col >> 3;

     offset = ref_y_stride * row_offset + col_offset;

     sad = 0;

-    if (xd->up_available)

-      sad += vp8_sad16x3(above_src, xd->dst.y_stride,

-                           above_ref + offset, ref_y_stride, INT_MAX);

-    if (xd->left_available)

-      sad += vp8_sad3x16(left_src, xd->dst.y_stride,

-                           left_ref + offset, ref_y_stride, INT_MAX);

+    if (xd->up_available) {

+      vp8_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride,

+                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),

+                                   above_src, xd->dst.y_stride, &sse);

+      sad += sse;

+    }

+    if (xd->left_available) {

+      vp8_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,

+                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),

+                                   left_src, xd->dst.y_stride, &sse);

+      sad += sse;

+    }

     // Add the entry to our list and then resort the list on score.

     sad_scores[i] = sad;

     sorted_mvs[i].as_int = this_mv.as_int;

@@ -280,7 +285,7 @@

   // be more than one 0,0 entry in the sorted list.

   // The best ref mv is always set to the first entry (which gave the best

   // results. The nearest is set to the first non zero vector if available and

-  // near to the second non zero vector if avaialable.

+  // near to the second non zero vector if available.

   // We do not use 0,0 as a nearest or near as 0,0 has its own mode.

   if ( sorted_mvs[0].as_int ) {

     nearest->as_int = sorted_mvs[0].as_int;

--- a/vp8/encoder/variance_c.c

+++ b/vp8/encoder/variance_c.c

@@ -508,3 +508,80 @@

   return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);

+#if CONFIG_NEWBESTREFMV

+unsigned int vp8_variance2x16_c(

+  const unsigned char *src_ptr,

+  const int  source_stride,

+  const unsigned char *ref_ptr,

+  const int  recon_stride,

+  unsigned int *sse) {

+  unsigned int var;

+  int avg;

+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg);

+  *sse = var;

+  return (var - ((avg * avg) >> 5));

+}

+unsigned int vp8_variance16x2_c(

+  const unsigned char *src_ptr,

+  const int  source_stride,

+  const unsigned char *ref_ptr,

+  const int  recon_stride,

+  unsigned int *sse) {

+  unsigned int var;

+  int avg;

+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg);

+  *sse = var;

+  return (var - ((avg * avg) >> 5));

+}

+unsigned int vp8_sub_pixel_variance16x2_c

+(

+  const unsigned char  *src_ptr,

+  const int  src_pixels_per_line,

+  const int  xoffset,

+  const int  yoffset,

+  const unsigned char *dst_ptr,

+  const int dst_pixels_per_line,

+  unsigned int *sse

+) {

+  unsigned short FData3[16 * 3];  // Temp data bufffer used in filtering

+  unsigned char  temp2[20 * 16];

+  const short *HFilter, *VFilter;

+  HFilter = vp8_bilinear_filters[xoffset];

+  VFilter = vp8_bilinear_filters[yoffset];

+  var_filter_block2d_bil_first_pass(src_ptr, FData3,

+                                    src_pixels_per_line, 1, 3, 16, HFilter);

+  var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);

+  return vp8_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);

+}

+unsigned int vp8_sub_pixel_variance2x16_c

+(

+  const unsigned char  *src_ptr,

+  const int  src_pixels_per_line,

+  const int  xoffset,

+  const int  yoffset,

+  const unsigned char *dst_ptr,

+  const int dst_pixels_per_line,

+  unsigned int *sse

+) {

+  unsigned short FData3[2 * 17];  // Temp data bufffer used in filtering

+  unsigned char  temp2[2 * 16];

+  const short *HFilter, *VFilter;

+  HFilter = vp8_bilinear_filters[xoffset];

+  VFilter = vp8_bilinear_filters[yoffset];

+  var_filter_block2d_bil_first_pass(src_ptr, FData3,

+                                    src_pixels_per_line, 1, 17, 2, HFilter);

+  var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);

+  return vp8_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);

+}

+#endif

--

⑨