shithub: libvpx

Download patch

ref: 0bb49c4e30c90d5deb7049ea1c6039a3046831ea
parent: 642ac924abfd24664335910d540742b4b71d58dd
parent: 0c481f4d1824866527a82ab60f28d7c9d304679d
author: Ronald S. Bultje <rbultje@google.com>
date: Wed Apr 17 07:22:32 EDT 2013

Merge "Add SSE2 versions for rectangular sad and sad4d functions." into experimental

--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -376,7 +376,8 @@
 # variance
 [ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
 
-#if CONFIG_SBSEGMENT
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
 prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_variance32x16
 
@@ -388,8 +389,9 @@
 
 prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_variance32x64
-#endif
 
+fi
+
 prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_variance32x32
 
@@ -424,7 +426,8 @@
 prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_sub_pixel_variance64x64 sse2
 
-#if CONFIG_SBSEGMENT
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
 prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_sub_pixel_variance32x64
 
@@ -436,8 +439,9 @@
 
 prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_sub_pixel_variance16x32
-#endif
 
+fi
+
 prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_sub_pixel_variance32x32 sse2
 
@@ -464,20 +468,22 @@
 prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
 specialize vp9_sad64x64 sse2
 
-#if CONFIG_SBSEGMENT
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
 prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x64
+specialize vp9_sad32x64 sse2
 
 prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x32
+specialize vp9_sad64x32 sse2
 
 prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x16
+specialize vp9_sad32x16 sse2
 
 prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x32
-#endif
+specialize vp9_sad16x32 sse2
 
+fi
+
 prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
 specialize vp9_sad32x32 sse2
 
@@ -571,19 +577,21 @@
 prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp9_sad64x64x4d sse2
 
-#if CONFIG_SBSEGMENT
+if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
+
 prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x64x4d
+specialize vp9_sad32x64x4d sse2
 
 prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x32x4d
+specialize vp9_sad64x32x4d sse2
 
 prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x16x4d
+specialize vp9_sad32x16x4d sse2
 
 prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x32x4d
-#endif
+specialize vp9_sad16x32x4d sse2
+
+fi
 
 prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
 specialize vp9_sad32x32x4d sse2
--- a/vp9/encoder/x86/vp9_sad4d_sse2.asm
+++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm
@@ -215,7 +215,11 @@
 
 INIT_XMM sse2
 SADNXN4D 64, 64
+SADNXN4D 64, 32
+SADNXN4D 32, 64
 SADNXN4D 32, 32
+SADNXN4D 32, 16
+SADNXN4D 16, 32
 SADNXN4D 16, 16
 SADNXN4D 16,  8
 SADNXN4D  8, 16
--- a/vp9/encoder/x86/vp9_sad_sse2.asm
+++ b/vp9/encoder/x86/vp9_sad_sse2.asm
@@ -14,11 +14,11 @@
 
 ; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride,
 ;                                uint8_t *ref, int ref_stride);
-INIT_XMM sse2
-cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+%macro SAD64XN 1
+cglobal sad64x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
   movsxdifnidn src_strideq, src_strided
   movsxdifnidn ref_strideq, ref_strided
-  mov              n_rowsd, 64
+  mov              n_rowsd, %1
   pxor                  m0, m0
 .loop:
   movu                  m1, [refq]
@@ -42,14 +42,19 @@
   paddd                 m0, m1
   movd                 eax, m0
   RET
+%endmacro
 
+INIT_XMM sse2
+SAD64XN 64 ; sad64x64_sse2
+SAD64XN 32 ; sad64x32_sse2
+
 ; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride,
 ;                                uint8_t *ref, int ref_stride);
-INIT_XMM sse2
-cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+%macro SAD32XN 1
+cglobal sad32x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
   movsxdifnidn src_strideq, src_strided
   movsxdifnidn ref_strideq, ref_strided
-  mov              n_rowsd, 16
+  mov              n_rowsd, %1/2
   pxor                  m0, m0
 
 .loop:
@@ -74,7 +79,13 @@
   paddd                 m0, m1
   movd                 eax, m0
   RET
+%endmacro
 
+INIT_XMM sse2
+SAD32XN 64 ; sad32x64_sse2
+SAD32XN 32 ; sad32x32_sse2
+SAD32XN 16 ; sad32x16_sse2
+
 ; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
 ;                                    uint8_t *ref, int ref_stride);
 %macro SAD16XN 1
@@ -112,6 +123,7 @@
 %endmacro
 
 INIT_XMM sse2
+SAD16XN 32 ; sad16x32_sse2
 SAD16XN 16 ; sad16x16_sse2
 SAD16XN  8 ; sad16x8_sse2