shithub: libvpx

Download patch

ref: c79ae1713cc18f0a7952bffa3c244ab1d07aa249
parent: 722972454c3e26e953e2ff578fea021c4665158e
author: Ronald S. Bultje <rbultje@google.com>
date: Mon Nov 12 11:18:35 EST 2012

Use full 32-pixel edge for superblock bestrefmv motion vector ordering.

Change-Id: I417e39867c020a17d85370972446a8ce2bbe9a6d

--- a/vp9/common/findnearmv.c
+++ b/vp9/common/findnearmv.c
@@ -42,23 +42,39 @@
 }
 
 #define SP(x) (((x) & 7) << 1)
-unsigned int vp9_sad3x16_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int max_sad) {
+unsigned int vp9_sad3x16_c(const unsigned char *src_ptr,
+                           int  src_stride,
+                           const unsigned char *ref_ptr,
+                           int  ref_stride,
+                           int max_sad) {
   return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16);
 }
-unsigned int vp9_sad16x3_c(
-  const unsigned char *src_ptr,
-  int  src_stride,
-  const unsigned char *ref_ptr,
-  int  ref_stride,
-  int max_sad) {
+unsigned int vp9_sad16x3_c(const unsigned char *src_ptr,
+                           int  src_stride,
+                           const unsigned char *ref_ptr,
+                           int  ref_stride,
+                           int max_sad) {
   return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
 }
 
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_sad3x32_c(const unsigned char *src_ptr,
+                           int  src_stride,
+                           const unsigned char *ref_ptr,
+                           int  ref_stride,
+                           int max_sad) {
+  return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 32);
+}
+
+unsigned int vp9_sad32x3_c(const unsigned char *src_ptr,
+                           int  src_stride,
+                           const unsigned char *ref_ptr,
+                           int  ref_stride,
+                           int max_sad) {
+  return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 3);
+}
+#endif
+
 #if CONFIG_SUBPELREFMV
 unsigned int vp9_variance2x16_c(const unsigned char *src_ptr,
                                 const int  source_stride,
@@ -93,7 +109,7 @@
                                           const unsigned char *dst_ptr,
                                           const int dst_pixels_per_line,
                                           unsigned int *sse) {
-  unsigned short FData3[16 * 3];  // Temp data bufffer used in filtering
+  unsigned short FData3[16 * 3];  // Temp data buffer used in filtering
   unsigned char  temp2[20 * 16];
   const short *HFilter, *VFilter;
 
@@ -114,7 +130,7 @@
                                           const unsigned char *dst_ptr,
                                           const int dst_pixels_per_line,
                                           unsigned int *sse) {
-  unsigned short FData3[2 * 17];  // Temp data bufffer used in filtering
+  unsigned short FData3[2 * 17];  // Temp data buffer used in filtering
   unsigned char  temp2[2 * 16];
   const short *HFilter, *VFilter;
 
@@ -127,7 +143,77 @@
 
   return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
 }
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp9_variance2x32_c(const unsigned char *src_ptr,
+                                const int  source_stride,
+                                const unsigned char *ref_ptr,
+                                const int  recon_stride,
+                                unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 32, &var, &avg);
+  *sse = var;
+  return (var - (((unsigned int)avg * avg) >> 5));
+}
+
+unsigned int vp9_variance32x2_c(const unsigned char *src_ptr,
+                                const int  source_stride,
+                                const unsigned char *ref_ptr,
+                                const int  recon_stride,
+                                unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 2, &var, &avg);
+  *sse = var;
+  return (var - (((unsigned int)avg * avg) >> 5));
+}
+
+unsigned int vp9_sub_pixel_variance32x2_c(const unsigned char  *src_ptr,
+                                          const int  src_pixels_per_line,
+                                          const int  xoffset,
+                                          const int  yoffset,
+                                          const unsigned char *dst_ptr,
+                                          const int dst_pixels_per_line,
+                                          unsigned int *sse) {
+  unsigned short FData3[32 * 3];  // Temp data buffer used in filtering
+  unsigned char  temp2[20 * 32];
+  const short *HFilter, *VFilter;
+
+  HFilter = vp9_bilinear_filters[xoffset];
+  VFilter = vp9_bilinear_filters[yoffset];
+
+  var_filter_block2d_bil_first_pass(src_ptr, FData3,
+                                    src_pixels_per_line, 1, 3, 32, HFilter);
+  var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 2, 32, VFilter);
+
+  return vp9_variance16x2_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char  *src_ptr,
+                                          const int  src_pixels_per_line,
+                                          const int  xoffset,
+                                          const int  yoffset,
+                                          const unsigned char *dst_ptr,
+                                          const int dst_pixels_per_line,
+                                          unsigned int *sse) {
+  unsigned short FData3[2 * 33];  // Temp data buffer used in filtering
+  unsigned char  temp2[2 * 32];
+  const short *HFilter, *VFilter;
+
+  HFilter = vp9_bilinear_filters[xoffset];
+  VFilter = vp9_bilinear_filters[yoffset];
+
+  var_filter_block2d_bil_first_pass(src_ptr, FData3,
+                                    src_pixels_per_line, 1, 33, 2, HFilter);
+  var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 32, 2, VFilter);
+
+  return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
+}
 #endif
+#endif
 
 /* check a list of motion vectors by sad score using a number rows of pixels
  * above and a number cols of pixels in the left to select the one with best
@@ -194,15 +280,39 @@
     offset = ref_y_stride * row_offset + col_offset;
     score = 0;
     if (xd->up_available) {
-      vp9_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride,
-                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                                   above_src, xd->dst.y_stride, &sse);
+#if CONFIG_SUPERBLOCKS
+      if (xd->mode_info_context->mbmi.encoded_as_sb) {
+        vp9_sub_pixel_variance32x2_c(above_ref + offset, ref_y_stride,
+                                     SP(this_mv.as_mv.col),
+                                     SP(this_mv.as_mv.row),
+                                     above_src, xd->dst.y_stride, &sse);
+      } else {
+#endif
+        vp9_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride,
+                                     SP(this_mv.as_mv.col),
+                                     SP(this_mv.as_mv.row),
+                                     above_src, xd->dst.y_stride, &sse);
+#if CONFIG_SUPERBLOCKS
+      }
+#endif
       score += sse;
     }
     if (xd->left_available) {
-      vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
-                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
-                                   left_src, xd->dst.y_stride, &sse);
+#if CONFIG_SUPERBLOCKS
+      if (xd->mode_info_context->mbmi.encoded_as_sb) {
+        vp9_sub_pixel_variance2x32_c(left_ref + offset, ref_y_stride,
+                                     SP(this_mv.as_mv.col),
+                                     SP(this_mv.as_mv.row),
+                                     left_src, xd->dst.y_stride, &sse);
+      } else {
+#endif
+        vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
+                                     SP(this_mv.as_mv.col),
+                                     SP(this_mv.as_mv.row),
+                                     left_src, xd->dst.y_stride, &sse);
+#if CONFIG_SUPERBLOCKS
+      }
+#endif
       score += sse;
     }
 #else
@@ -213,12 +323,30 @@
     offset = ref_y_stride * row_offset + col_offset;
     score = 0;
     if (xd->up_available) {
-      score += vp9_sad16x3(above_src, xd->dst.y_stride,
-                           above_ref + offset, ref_y_stride, INT_MAX);
+#if CONFIG_SUPERBLOCKS
+      if (xd->mode_info_context->mbmi.encoded_as_sb) {
+        score += vp9_sad32x3(above_src, xd->dst.y_stride,
+                             above_ref + offset, ref_y_stride, INT_MAX);
+      } else {
+#endif
+        score += vp9_sad16x3(above_src, xd->dst.y_stride,
+                             above_ref + offset, ref_y_stride, INT_MAX);
+#if CONFIG_SUPERBLOCKS
+      }
+#endif
     }
     if (xd->left_available) {
-      score += vp9_sad3x16(left_src, xd->dst.y_stride,
-                           left_ref + offset, ref_y_stride, INT_MAX);
+#if CONFIG_SUPERBLOCKS
+      if (xd->mode_info_context->mbmi.encoded_as_sb) {
+        score += vp9_sad3x32(left_src, xd->dst.y_stride,
+                             left_ref + offset, ref_y_stride, INT_MAX);
+      } else {
+#endif
+        score += vp9_sad3x16(left_src, xd->dst.y_stride,
+                             left_ref + offset, ref_y_stride, INT_MAX);
+#if CONFIG_SUPERBLOCKS
+      }
+#endif
     }
 #endif
     // Add the entry to our list and then resort the list on score.
--- a/vp9/common/rtcd_defs.sh
+++ b/vp9/common/rtcd_defs.sh
@@ -233,6 +233,16 @@
 prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
 specialize vp9_sad3x16 sse2
 
+if [ "$CONFIG_SUPERBLOCKS" = "yes" ]; then
+
+prototype unsigned int vp9_sad32x3 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp9_sad32x3
+
+prototype unsigned int vp9_sad3x32 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
+specialize vp9_sad3x32
+
+fi
+
 #
 # Encoder functions below this point.
 #