shithub: libvpx

Download patch

ref: 5b307886fb965a23d28509c7740a599c703fbd11
parent: 6eb1254b883cae2d6c63af2263eaf8e18ba8f61a
author: Jim Bankoski <jimbankoski@google.com>
date: Tue Aug 6 04:25:10 EDT 2013

variance x86inc guards

also fixed bug in sad calcs

Change-Id: I6571fcbe37556c16ae32be66dc0fd879852aac1d

--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -494,6 +494,7 @@
 INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
 
 #if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
 const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
 const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
 const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
@@ -519,6 +520,7 @@
                         make_tuple(8, 4, sad_8x4x4d_sse2)));
 #endif
 #endif
+#endif
 
 #if HAVE_SSE3
 #if CONFIG_VP8_ENCODER
@@ -537,9 +539,11 @@
 #endif
 
 #if HAVE_SSSE3
+#if CONFIG_USE_X86INC
 const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
 INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
                         make_tuple(16, 16, sad_16x16_sse3)));
+#endif
 #endif
 
 }  // namespace
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -483,6 +483,7 @@
 #endif
 
 #if HAVE_SSE2
+#if CONFIG_USE_X86INC
 const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
 const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
 const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
@@ -596,8 +597,11 @@
                       make_tuple(6, 5, subpel_avg_variance64x32_sse2),
                       make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
 #endif
+#endif
 
 #if HAVE_SSSE3
+#if CONFIG_USE_X86INC
+
 const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
     vp9_sub_pixel_variance4x4_ssse3;
 const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
@@ -681,6 +685,7 @@
                       make_tuple(5, 6, subpel_avg_variance32x64_ssse3),
                       make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
                       make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
+#endif
 #endif
 #endif  // CONFIG_VP9_ENCODER
 
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -357,34 +357,34 @@
 
 # variance
 prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x16 sse2
+specialize vp9_variance32x16 $sse2_x86inc
 
 prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x32 sse2
+specialize vp9_variance16x32 $sse2_x86inc
 
 prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x32 sse2
+specialize vp9_variance64x32 $sse2_x86inc
 
 prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x64 sse2
+specialize vp9_variance32x64 $sse2_x86inc
 
 prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x32 sse2
+specialize vp9_variance32x32 $sse2_x86inc
 
 prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x64 sse2
+specialize vp9_variance64x64 $sse2_x86inc
 
 prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x16 mmx sse2
+specialize vp9_variance16x16 mmx $sse2_x86inc
 
 prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x8 mmx sse2
+specialize vp9_variance16x8 mmx $sse2_x86inc
 
 prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x16 mmx sse2
+specialize vp9_variance8x16 mmx $sse2_x86inc
 
 prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x8 mmx sse2
+specialize vp9_variance8x8 mmx $sse2_x86inc
 
 prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"
 specialize vp9_get_sse_sum_8x8 sse2
@@ -391,93 +391,93 @@
 vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2
 
 prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x4 sse2
+specialize vp9_variance8x4 $sse2_x86inc
 
 prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x8 sse2
+specialize vp9_variance4x8 $sse2_x86inc
 
 prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x4 mmx sse2
+specialize vp9_variance4x4 mmx $sse2_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64 sse2 ssse3
+specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x64 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x64 sse2 ssse3
+specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x64 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance32x64 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x32 sse2 ssse3
+specialize vp9_sub_pixel_variance64x32 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x32 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance64x32 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x16 sse2 ssse3
+specialize vp9_sub_pixel_variance32x16 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x16 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance32x16 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x32 sse2 ssse3
+specialize vp9_sub_pixel_variance16x32 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x32 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32 sse2 ssse3
+specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x32 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x16 sse2 ssse3
+specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x16 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance16x16 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x16 sse2 ssse3
+specialize vp9_sub_pixel_variance8x16 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x16 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance8x16 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x8 sse2 ssse3
+specialize vp9_sub_pixel_variance16x8 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x8 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance16x8 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x8 sse2 ssse3
+specialize vp9_sub_pixel_variance8x8 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x8 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance8x8 $sse2_x86inc $ssse3_x86inc
 
 # TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
 prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x4 sse2 ssse3
+specialize vp9_sub_pixel_variance8x4 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x4 sse2 ssse3
+specialize vp9_sub_pixel_avg_variance8x4 $sse2_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x8 sse ssse3
+specialize vp9_sub_pixel_variance4x8 $sse_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x8 sse ssse3
+specialize vp9_sub_pixel_avg_variance4x8 $sse_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x4 sse ssse3
+specialize vp9_sub_pixel_variance4x4 $sse_x86inc $ssse3_x86inc
 #vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
 
 prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x4 sse ssse3
+specialize vp9_sub_pixel_avg_variance4x4 $sse_x86inc $ssse3_x86inc
 
 prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
 specialize vp9_sad64x64 $sse2_x86inc
@@ -558,16 +558,13 @@
 specialize vp9_sad4x4_avg $sse_x86inc
 
 prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_h sse2
-vp9_variance_halfpixvar16x16_h_sse2=vp9_variance_halfpixvar16x16_h_wmt
+specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc
 
 prototype unsigned int vp9_variance_halfpixvar16x16_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_v sse2
-vp9_variance_halfpixvar16x16_v_sse2=vp9_variance_halfpixvar16x16_v_wmt
+specialize vp9_variance_halfpixvar16x16_v $sse2_x86inc
 
 prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_hv sse2
-vp9_variance_halfpixvar16x16_hv_sse2=vp9_variance_halfpixvar16x16_hv_wmt
+specialize vp9_variance_halfpixvar16x16_hv $sse2_x86inc
 
 prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_variance_halfpixvar64x64_h
@@ -679,8 +676,7 @@
 #specialize vp9_sub_pixel_mse16x16 sse2 mmx
 
 prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse16x16 mmx sse2
-vp9_mse16x16_sse2=vp9_mse16x16_wmt
+specialize vp9_mse16x16 mmx $sse2_x86inc
 
 prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
 specialize vp9_mse8x16
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -244,7 +244,7 @@
   return (var - (((unsigned int)avg * avg) >> 8));
 }
 
-unsigned int vp9_mse16x16_wmt(
+unsigned int vp9_mse16x16_sse2(
   const unsigned char *src_ptr,
   int  source_stride,
   const unsigned char *ref_ptr,
@@ -500,7 +500,7 @@
 #undef FNS
 #undef FN
 
-unsigned int vp9_variance_halfpixvar16x16_h_wmt(
+unsigned int vp9_variance_halfpixvar16x16_h_sse2(
   const unsigned char *src_ptr,
   int  src_pixels_per_line,
   const unsigned char *dst_ptr,
@@ -519,7 +519,7 @@
 }
 
 
-unsigned int vp9_variance_halfpixvar16x16_v_wmt(
+unsigned int vp9_variance_halfpixvar16x16_v_sse2(
   const unsigned char *src_ptr,
   int  src_pixels_per_line,
   const unsigned char *dst_ptr,
@@ -537,7 +537,7 @@
 }
 
 
-unsigned int vp9_variance_halfpixvar16x16_hv_wmt(
+unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
   const unsigned char *src_ptr,
   int  src_pixels_per_line,
   const unsigned char *dst_ptr,
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -78,10 +78,8 @@
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
@@ -90,6 +88,8 @@
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
 endif
 
 ifeq ($(ARCH_X86_64),yes)