shithub: libvpx

Download patch

ref: 8645a5303971a0e9ac7dc0c589dc493cfd0be69e
parent: 89ffda0ddf6c8abc044b308cf36dcec883b36977
author: Johann <johannkoenig@google.com>
date: Wed Sep 10 06:27:58 EDT 2014

Allow specifying opt dependencies

If optimizations use more than one cpu feature, allow
specifying them so that '--disable-X' still works

https://code.google.com/p/webm/issues/detail?id=854

Change-Id: I3108ea37b397371a2be84dd5f2380b304db23f18

--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -49,7 +49,7 @@
 
 my %config = ();
 while (<CONFIG_FILE>) {
-  next if !/^CONFIG_/;
+  next if !/^(?:CONFIG_|HAVE_)/;
   chomp;
   my @pair = split /=/;
   $config{$pair[0]} = $pair[1];
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -645,7 +645,7 @@
     make_tuple(64, 64, &convolve8_ssse3)));
 #endif
 
-#if HAVE_AVX2
+#if HAVE_AVX2 && HAVE_SSSE3
 const ConvolveFunctions convolve8_avx2(
     vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
     vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
@@ -665,7 +665,7 @@
     make_tuple(64, 32, &convolve8_avx2),
     make_tuple(32, 64, &convolve8_avx2),
     make_tuple(64, 64, &convolve8_avx2)));
-#endif
+#endif  // HAVE_AVX2 && HAVE_SSSE3
 
 #if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -45,6 +45,13 @@
   $avx_x86_64 = $avx2_x86_64 = '';
 }
 
+# optimizations which depend on multiple features
+if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
+  $avx2_ssse3 = 'avx2';
+} else {
+  $avx2_ssse3 = '';
+}
+
 #
 # RECON
 #
@@ -296,15 +303,15 @@
 $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
 
 add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2 avx2/;
+specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3";
 $vp9_convolve8_neon_asm=vp9_convolve8_neon;
 
 add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2 avx2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3";
 $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
 
 add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2 avx2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3";
 $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
 
 add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -139,11 +139,11 @@
                            filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
   } \
 }
-#if HAVE_AVX2
+#if HAVE_AVX2 && HAVE_SSSE3
 filter8_1dfunction vp9_filter_block1d16_v8_avx2;
 filter8_1dfunction vp9_filter_block1d16_h8_avx2;
 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-#if (ARCH_X86_64)
+#if ARCH_X86_64
 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
@@ -150,7 +150,7 @@
 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
-#else
+#else  // ARCH_X86
 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
@@ -157,7 +157,7 @@
 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
-#endif
+#endif  // ARCH_X86_64 / ARCH_X86
 filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
 filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
 filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
@@ -190,9 +190,9 @@
 //                          const int16_t *filter_y, int y_step_q4,
 //                          int w, int h);
 FUN_CONV_2D(, avx2);
-#endif
+#endif  // HAVE_AX2 && HAVE_SSSE3
 #if HAVE_SSSE3
-#if (ARCH_X86_64)
+#if ARCH_X86_64
 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
@@ -204,7 +204,7 @@
 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
-#else
+#else  // ARCH_X86
 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
@@ -211,7 +211,7 @@
 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#endif
+#endif  // ARCH_X86_64 / ARCH_X86
 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
@@ -270,7 +270,7 @@
 //                              int w, int h);
 FUN_CONV_2D(, ssse3);
 FUN_CONV_2D(avg_ , ssse3);
-#endif
+#endif  // HAVE_SSSE3
 
 #if HAVE_SSE2
 filter8_1dfunction vp9_filter_block1d16_v8_sse2;
@@ -336,4 +336,4 @@
 //                             int w, int h);
 FUN_CONV_2D(, sse2);
 FUN_CONV_2D(avg_ , sse2);
-#endif
+#endif  // HAVE_SSE2