ref: 927de8453c502586c03e25c169ec08f2a93ebc02
parent: ddb2c97242f80e9e468001d174025d6f76d01d10
author: Marcus Asteborg <maastebo@microsoft.com>
date: Fri Mar 13 09:31:29 EDT 2020
cmake - intinsics fixes, only compile SSE source with SSE flags on non-windows when target use runtime check (GH #154). For windows we only use /arch flag when target is presumed to support SSE to avoid AVX function pollution (GH #132). Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,7 @@
endif()
endif()
+
if(OPUS_CPU_X86 OR OPUS_CPU_X64)
cmake_dependent_option(OPUS_X86_MAY_HAVE_SSE
"Does runtime check for SSE1 support"
@@ -77,6 +78,7 @@
"AVX_SUPPORTED"
OFF)
+ # PRESUME depends on MAY HAVE, but PRESUME will override runtime detection
if(OPUS_CPU_X64) # Assume 64 bit has SSE2 support
cmake_dependent_option(OPUS_X86_PRESUME_SSE
"Assume target CPU has SSE1 support"
@@ -151,14 +153,14 @@
"does runtime check for SSE4_1 support")
add_feature_info(X86_MAY_HAVE_AVX OPUS_X86_MAY_HAVE_AVX
"does runtime check for AVX support")
- add_feature_info(X86_PRESUME_SSE OPUS_X86_PRESUME_SSE
- "assume target CPU has SSE1 support")
- add_feature_info(X86_PRESUME_SSE2 OPUS_X86_PRESUME_SSE2
- "assume target CPU has SSE2 support")
- add_feature_info(X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1
- "assume target CPU has SSE4_1 support")
- add_feature_info(X86_PRESUME_AVX OPUS_X86_PRESUME_AVX
- "assume target CPU has AVX support")
+ add_feature_info(OPUS_X86_PRESUME_SSE OPUS_X86_PRESUME_SSE
+ "assume target CPU has SSE1 support will override the runtime check")
+ add_feature_info(OPUS_X86_PRESUME_SSE2 OPUS_X86_PRESUME_SSE2
+ "assume target CPU has SSE2 support will override the runtime check")
+ add_feature_info(OPUS_X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1
+ "assume target CPU has SSE4_1 support will override the runtime check")
+ add_feature_info(OPUS_X86_PRESUME_AVX OPUS_X86_PRESUME_AVX
+ "assume target CPU has AVX support will override the runtime check")
endif()
feature_summary(WHAT ALL)
@@ -242,39 +244,100 @@
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
endif()
-if(OPUS_X86_MAY_HAVE_SSE
- OR OPUS_X86_MAY_HAVE_SSE2
- OR OPUS_X86_MAY_HAVE_SSE4_1
- OR OPUS_X86_MAY_HAVE_AVX)
+#[[Build flags for SSE will be set the following way:
+MSVC: If OPUS_X86_PRESUME_X is set then we will set the highest possible /arch:X
+we won't set any ARCH flag for OPUS_X86_MAY_HAVE_SSE due to:
+https://randomascii.wordpress.com/2016/12/05/vc-archavx-option-unsafe-at-any-speed/
+For non MSVC: we will set the compiler flags on per file basis for OPUS_X86_MAY_HAVE_SSE
+for OPUS_X86_PRESUME_X we will set it for the target]]
+
+if((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR
+ (OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR
+ (OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR
+ (OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX))
target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD)
endif()
-if(OPUS_X86_MAY_HAVE_SSE)
- add_sources_group(opus celt ${celt_sources_sse})
- target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE)
+if(SSE1_SUPPORTED)
+ if(OPUS_X86_MAY_HAVE_SSE)
+ add_sources_group(opus celt ${celt_sources_sse})
+ target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE)
+ if(NOT MSVC)
+ set_source_files_properties(${celt_sources_sse} PROPERTIES COMPILE_FLAGS -msse)
+ endif()
+ endif()
+ if(OPUS_X86_PRESUME_SSE)
+ target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE)
+ if(NOT MSVC)
+ target_compile_options(opus PRIVATE -msse)
+ endif()
+ endif()
endif()
-if(OPUS_X86_PRESUME_SSE)
- target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE)
+
+if(SSE2_SUPPORTED)
+ if(OPUS_X86_MAY_HAVE_SSE2)
+ add_sources_group(opus celt ${celt_sources_sse2})
+ target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
+ if(NOT MSVC)
+ set_source_files_properties(${celt_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
+ endif()
+ endif()
+ if(OPUS_X86_PRESUME_SSE2)
+ target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE2)
+ if(NOT MSVC)
+ target_compile_options(opus PRIVATE -msse2)
+ endif()
+ endif()
endif()
-if(OPUS_X86_MAY_HAVE_SSE2)
- add_sources_group(opus celt ${celt_sources_sse2})
- target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
+if(SSE4_1_SUPPORTED)
+ if(OPUS_X86_MAY_HAVE_SSE4_1)
+ add_sources_group(opus celt ${celt_sources_sse4_1})
+ add_sources_group(opus silk ${silk_sources_sse4_1})
+ target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
+ if(NOT MSVC)
+ set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
+ endif()
+
+ if(OPUS_FIXED_POINT)
+ add_sources_group(opus silk ${silk_sources_fixed_sse4_1})
+ if(NOT MSVC)
+ set_source_files_properties(${silk_sources_fixed_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
+ endif()
+ endif()
+ endif()
+ if(OPUS_X86_PRESUME_SSE4_1)
+ target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1)
+ if(NOT MSVC)
+ target_compile_options(opus PRIVATE -msse4.1)
+ endif()
+ endif()
endif()
-if(OPUS_X86_PRESUME_SSE2)
- target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE2)
-endif()
-if(OPUS_X86_MAY_HAVE_SSE)
- add_sources_group(opus celt ${celt_sources_sse4_1})
- add_sources_group(opus silk ${silk_sources_sse4_1})
- if(OPUS_FIXED_POINT)
- add_sources_group(opus silk ${silk_sources_fixed_sse4_1})
+if(AVX_SUPPORTED)
+ # mostly placeholder in case of avx intrinsics is added
+ if(OPUS_X86_MAY_HAVE_AVX)
+ target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX)
endif()
- target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
+ if(OPUS_X86_PRESUME_AVX)
+ target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX)
+ if(NOT MSVC)
+ target_compile_options(opus PRIVATE -mavx)
+ endif()
+ endif()
endif()
-if(OPUS_X86_PRESUME_SSE4_1)
- target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1)
+
+if(MSVC)
+ if(AVX_SUPPORTED AND OPUS_X86_PRESUME_AVX) # on 64 bit and 32 bits
+ add_definitions(/arch:AVX)
+ elseif(OPUS_CPU_X86) # if AVX not supported then set SSE flag
+ if((SSE4_1_SUPPORTED AND OPUS_X86_PRESUME_SSE4_1)
+ OR (SSE2_SUPPORTED AND OPUS_X86_PRESUME_SSE2))
+ target_compile_definitions(opus PRIVATE /arch:SSE2)
+ elseif(SSE1_SUPPORTED AND OPUS_X86_PRESUME_SSE)
+ target_compile_definitions(opus PRIVATE /arch:SSE)
+ endif()
+ endif()
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(armv7-a)")
--- a/opus_functions.cmake
+++ b/opus_functions.cmake
@@ -120,13 +120,17 @@
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
check_flag(SSE1 /arch:SSE)
else()
- set(SSE1_SUPPORTED 1 PARENT_SCOPE)
+ set(SSE1_SUPPORTED
+ 1
+ PARENT_SCOPE)
endif()
else()
- check_and_set_flag(SSE1 -msse)
+ check_flag(SSE1 -msse)
endif()
else()
- set(SSE1_SUPPORTED 0 PARENT_SCOPE)
+ set(SSE1_SUPPORTED
+ 0
+ PARENT_SCOPE)
endif()
check_include_file(emmintrin.h HAVE_EMMINTRIN_H) # SSE2
@@ -135,13 +139,17 @@
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
check_flag(SSE2 /arch:SSE2)
else()
- set(SSE2_SUPPORTED 1 PARENT_SCOPE)
+ set(SSE2_SUPPORTED
+ 1
+ PARENT_SCOPE)
endif()
else()
- check_and_set_flag(SSE2 -msse2)
+ check_flag(SSE2 -msse2)
endif()
else()
- set(SSE2_SUPPORTED 0 PARENT_SCOPE)
+ set(SSE2_SUPPORTED
+ 0
+ PARENT_SCOPE)
endif()
check_include_file(smmintrin.h HAVE_SMMINTRIN_H) # SSE4.1
@@ -150,13 +158,17 @@
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
check_flag(SSE4_1 /arch:SSE2) # SSE2 and above
else()
- set(SSE4_1_SUPPORTED 1 PARENT_SCOPE)
+ set(SSE4_1_SUPPORTED
+ 1
+ PARENT_SCOPE)
endif()
else()
- check_and_set_flag(SSE4_1 -msse4.1)
+ check_flag(SSE4_1 -msse4.1)
endif()
else()
- set(SSE4_1_SUPPORTED 0 PARENT_SCOPE)
+ set(SSE4_1_SUPPORTED
+ 0
+ PARENT_SCOPE)
endif()
check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX
@@ -164,22 +176,12 @@
if(MSVC)
check_flag(AVX /arch:AVX)
else()
- check_and_set_flag(AVX -mavx)
+ check_flag(AVX -mavx)
endif()
else()
- set(AVX_SUPPORTED 0 PARENT_SCOPE)
- endif()
-
- if(MSVC) # To avoid warning D9025 of overriding compiler options
- if(AVX_SUPPORTED) # on 64 bit and 32 bits
- add_definitions(/arch:AVX)
- elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) # if AVX not supported then set SSE flag
- if(SSE4_1_SUPPORTED OR SSE2_SUPPORTED)
- add_definitions(/arch:SSE2)
- elseif(SSE1_SUPPORTED)
- add_definitions(/arch:SSE)
- endif()
- endif()
+ set(AVX_SUPPORTED
+ 0
+ PARENT_SCOPE)
endif()
if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX_SUPPORTED)