shithub: libvpx

Download patch

ref: 7b8e7f0f3ae13ebf29200324b0c4d7fe64780a58
parent: 4561109a69338336d7ab1875fd2fd62f80392b14
author: Yunqing Wang <yunqingwang@google.com>
date: Wed Mar 9 06:16:30 EST 2011

Add vp8_sub_pixel_variance16x8_ssse3 function

Added SSSE3 function

Change-Id: I8c304c92458618d93fda3a2f62bd09ccb63e75ad

--- a/vp8/encoder/x86/variance_ssse3.c
+++ b/vp8/encoder/x86/variance_ssse3.c
@@ -76,8 +76,8 @@
     unsigned int *sse
 )
 {
-    int xsum0, xsum1;
-    unsigned int xxsum0, xxsum1;
+    int xsum0;
+    unsigned int xxsum0;
 
     // note we could avoid these if statements if the calling function
     // just called the appropriate functions inside.
@@ -113,4 +113,53 @@
 
     *sse = xxsum0;
     return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+unsigned int vp8_sub_pixel_variance16x8_ssse3
+(
+    const unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    const unsigned char *dst_ptr,
+    int dst_pixels_per_line,
+    unsigned int *sse
+
+)
+{
+    int xsum0;
+    unsigned int xxsum0;
+
+    if (xoffset == 4 && yoffset == 0)
+    {
+        vp8_half_horiz_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum0, &xxsum0);
+    }
+    else if (xoffset == 0 && yoffset == 4)
+    {
+        vp8_half_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum0, &xxsum0);
+    }
+    else if (xoffset == 4 && yoffset == 4)
+    {
+        vp8_half_horiz_vert_variance16x_h_sse2(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            &xsum0, &xxsum0);
+    }
+    else
+    {
+        vp8_filter_block2d_bil_var_ssse3(
+            src_ptr, src_pixels_per_line,
+            dst_ptr, dst_pixels_per_line, 8,
+            xoffset, yoffset,
+            &xsum0, &xxsum0);
+    }
+
+    *sse = xxsum0;
+    return (xxsum0 - ((xsum0 * xsum0) >> 7));
 }
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -286,6 +286,7 @@
 #if HAVE_SSSE3
 extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
 extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
+extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
@@ -294,6 +295,9 @@
 
 #undef  vp8_variance_sad16x8x3
 #define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
+
+#undef  vp8_variance_subpixvar16x8
+#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3
 
 #undef  vp8_variance_subpixvar16x16
 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -334,6 +334,7 @@
         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
 
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3;
         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;
 
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3;
--