shithub: libvpx

Download patch

ref: cc9c637d0726fa9cb416f4ef4256e32d8e3c62a3
parent: 23f038a13fbc3f61bd3a4b67f180862bc16e145f
author: Yaowu Xu <yaowu@google.com>
date: Fri Oct 26 05:14:15 EDT 2012

Improves subpixel reference mv evaluation

Previously, in evaluating reference motion vectors, MVs are always
rounded to integer pixel position and SADs are calculated.  This
commit takes into account the subpixel portion of the mvs, and uses
bilinear interpolation to produce reference pixel values in subpixel
postions. In addition, SSE is used in place of SAD. Pixels used are
16x2 above and 2x16 to the left.

This commmit intends to test the potential of this line of work in
term of compression improvement, obviously, the change would increase
decoder complexity significantly.

Test results
std-hd: 1.738%(avg) 1.779%(glb), 1.663%(ssim)
derf: 0.472%(avg) 0.477%(glb), 0.418%(ssim)

Change-Id: I3ae1b098f6289df78891134d9a5e4bb2fde87a0b

--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -168,6 +168,7 @@
 }
 
 #if CONFIG_NEWBESTREFMV
+#define SP(x) (((x) & 7) << 1)
 unsigned int vp8_sad3x16_c(
   const unsigned char *src_ptr,
   int  src_stride,
@@ -189,7 +190,6 @@
  * above and a number cols of pixels in the left to select the one with best
  * score to use as ref motion vector
  */
-
 void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
                            unsigned char *ref_y_buffer,
                            int ref_y_stride,
@@ -203,6 +203,7 @@
   unsigned char *above_ref;
   unsigned char *left_ref;
   int sad;
+  int sse;
   int sad_scores[MAX_MV_REFS] = {0};
   int_mv sorted_mvs[MAX_MV_REFS];
   int zero_seen = FALSE;
@@ -211,16 +212,16 @@
   best_mv->as_int = nearest->as_int = near->as_int = 0;
   vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));
 
-  above_src = xd->dst.y_buffer - xd->dst.y_stride * 3;
-  left_src  = xd->dst.y_buffer - 3;
-  above_ref = ref_y_buffer - ref_y_stride * 3;
-  left_ref  = ref_y_buffer - 3;
+  above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
+  left_src  = xd->dst.y_buffer - 2;
+  above_ref = ref_y_buffer - ref_y_stride * 2;
+  left_ref  = ref_y_buffer - 2;
 
   //for(i = 0; i < MAX_MV_REFS; ++i) {
   // Limit search to the predicted best 4
   for(i = 0; i < 4; ++i) {
     int_mv this_mv;
-    int offset=0;
+    int offset = 0;
     int row_offset, col_offset;
 
     this_mv.as_int = mvlist[i].as_int;
@@ -238,19 +239,23 @@
                  xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16,
                  xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 
-    row_offset = (this_mv.as_mv.row > 0) ?
-      ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3);
-    col_offset = (this_mv.as_mv.col > 0) ?
-      ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3);
+    row_offset = this_mv.as_mv.row >> 3;
+    col_offset = this_mv.as_mv.col >> 3;
     offset = ref_y_stride * row_offset + col_offset;
 
     sad = 0;
-    if (xd->up_available)
-      sad += vp8_sad16x3(above_src, xd->dst.y_stride,
-                           above_ref + offset, ref_y_stride, INT_MAX);
-    if (xd->left_available)
-      sad += vp8_sad3x16(left_src, xd->dst.y_stride,
-                           left_ref + offset, ref_y_stride, INT_MAX);
+    if (xd->up_available) {
+      vp8_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride,
+                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+                                   above_src, xd->dst.y_stride, &sse);
+      sad += sse;
+    }
+    if (xd->left_available) {
+      vp8_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
+                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+                                   left_src, xd->dst.y_stride, &sse);
+      sad += sse;
+    }
     // Add the entry to our list and then resort the list on score.
     sad_scores[i] = sad;
     sorted_mvs[i].as_int = this_mv.as_int;
@@ -280,7 +285,7 @@
   // be more than one 0,0 entry in the sorted list.
   // The best ref mv is always set to the first entry (which gave the best
   // results. The nearest is set to the first non zero vector if available and
-  // near to the second non zero vector if avaialable.
+  // near to the second non zero vector if available.
   // We do not use 0,0 as a nearest or near as 0,0 has its own mode.
   if ( sorted_mvs[0].as_int ) {
     nearest->as_int = sorted_mvs[0].as_int;
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -508,3 +508,80 @@
 
   return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 }
+#if CONFIG_NEWBESTREFMV
+unsigned int vp8_variance2x16_c(
+  const unsigned char *src_ptr,
+  const int  source_stride,
+  const unsigned char *ref_ptr,
+  const int  recon_stride,
+  unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg);
+  *sse = var;
+  return (var - ((avg * avg) >> 5));
+}
+
+unsigned int vp8_variance16x2_c(
+  const unsigned char *src_ptr,
+  const int  source_stride,
+  const unsigned char *ref_ptr,
+  const int  recon_stride,
+  unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg);
+  *sse = var;
+  return (var - ((avg * avg) >> 5));
+}
+
+unsigned int vp8_sub_pixel_variance16x2_c
+(
+  const unsigned char  *src_ptr,
+  const int  src_pixels_per_line,
+  const int  xoffset,
+  const int  yoffset,
+  const unsigned char *dst_ptr,
+  const int dst_pixels_per_line,
+  unsigned int *sse
+) {
+  unsigned short FData3[16 * 3];  // Temp data bufffer used in filtering
+  unsigned char  temp2[20 * 16];
+  const short *HFilter, *VFilter;
+
+  HFilter = vp8_bilinear_filters[xoffset];
+  VFilter = vp8_bilinear_filters[yoffset];
+
+  var_filter_block2d_bil_first_pass(src_ptr, FData3,
+                                    src_pixels_per_line, 1, 3, 16, HFilter);
+  var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);
+
+  return vp8_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp8_sub_pixel_variance2x16_c
+(
+  const unsigned char  *src_ptr,
+  const int  src_pixels_per_line,
+  const int  xoffset,
+  const int  yoffset,
+  const unsigned char *dst_ptr,
+  const int dst_pixels_per_line,
+  unsigned int *sse
+) {
+  unsigned short FData3[2 * 17];  // Temp data bufffer used in filtering
+  unsigned char  temp2[2 * 16];
+  const short *HFilter, *VFilter;
+
+  HFilter = vp8_bilinear_filters[xoffset];
+  VFilter = vp8_bilinear_filters[yoffset];
+
+  var_filter_block2d_bil_first_pass(src_ptr, FData3,
+                                    src_pixels_per_line, 1, 17, 2, HFilter);
+  var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);
+
+  return vp8_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
+}
+#endif