shithub: dav1d

Download patch

ref: 0f25db1ab9fad46e134a037b85342a0d886d2ec7
parent: 276973ecebea1fa7657253af26f6af3ac1e85513
author: Henrik Gramner <gramner@twoorioles.com>
date: Wed Dec 19 19:43:42 EST 2018

Use 16-byte stack alignment on 32-bit x86

We don't support AVX2 on x86-32 so having 32-byte alignment is redundant.

--- a/include/common/attributes.h
+++ b/include/common/attributes.h
@@ -38,15 +38,16 @@
 #define ATTR_ALIAS
 #endif
 
-#if ARCH_X86
+#if ARCH_X86_64
+/* x86-64 needs 32-byte alignment for AVX2. */
 #define ALIGN_32_VAL 32
 #define ALIGN_16_VAL 16
-#elif ARCH_ARM || ARCH_AARCH64
-// ARM doesn't benefit from anything more than 16 byte alignment.
+#elif ARCH_X86_32 || ARCH_ARM || ARCH_AARCH64
+/* ARM doesn't benefit from anything more than 16-byte alignment. */
 #define ALIGN_32_VAL 16
 #define ALIGN_16_VAL 16
 #else
-// No need for extra alignment on platforms without assembly.
+/* No need for extra alignment on platforms without assembly. */
 #define ALIGN_32_VAL 8
 #define ALIGN_16_VAL 8
 #endif
--- a/meson.build
+++ b/meson.build
@@ -194,30 +194,35 @@
 stackrealign_flag = []
 
 if host_machine.cpu_family().startswith('x86')
-    if cc.has_argument('-mpreferred-stack-boundary=5')
-        stackalign_flag = ['-mpreferred-stack-boundary=5']
-        stackrealign_flag = ['-mincoming-stack-boundary=4']
-        cdata_asm.set('STACK_ALIGNMENT', 32)
-        cdata.set('STACK_ALIGNMENT', 32)
-    elif cc.has_argument('-mpreferred-stack-boundary=4')
-        stackalign_flag = ['-mpreferred-stack-boundary=4']
-        stackrealign_flag = ['-mincoming-stack-boundary=4']
-        cdata_asm.set('STACK_ALIGNMENT', 16)
-        cdata.set('STACK_ALIGNMENT', 16)
-    elif cc.has_argument('-mstack-alignment=32')
-        stackalign_flag = ['-mstack-alignment=32']
-        stackrealign_flag = ['-mstackrealign']
-        cdata_asm.set('STACK_ALIGNMENT', 32)
-        cdata.set('STACK_ALIGNMENT', 32)
+    if host_machine.cpu_family() == 'x86_64'
+        if cc.has_argument('-mpreferred-stack-boundary=5')
+            stackalign_flag = ['-mpreferred-stack-boundary=5']
+            stackrealign_flag = ['-mincoming-stack-boundary=4']
+            stack_alignment = 32
+        elif cc.has_argument('-mstack-alignment=32')
+            stackalign_flag = ['-mstack-alignment=32']
+            stackrealign_flag = ['-mstackrealign']
+            stack_alignment = 32
+        else
+            stack_alignment = 16
+        endif
     else
-        if host_machine.cpu_family() == 'x86_64'
-            cdata_asm.set('STACK_ALIGNMENT', 16)
-            cdata.set('STACK_ALIGNMENT', 16)
+        if host_machine.system() == 'linux' or host_machine.system() == 'darwin'
+            stack_alignment = 16
+        elif cc.has_argument('-mpreferred-stack-boundary=4')
+            stackalign_flag = ['-mpreferred-stack-boundary=4']
+            stackrealign_flag = ['-mincoming-stack-boundary=2']
+            stack_alignment = 16
+        elif cc.has_argument('-mstack-alignment=16')
+            stackalign_flag = ['-mstack-alignment=16']
+            stackrealign_flag = ['-mstackrealign']
+            stack_alignment = 16
         else
-            cdata_asm.set('STACK_ALIGNMENT', 4)
-            cdata.set('STACK_ALIGNMENT', 4)
+            stack_alignment = 4
         endif
     endif
+    cdata_asm.set('STACK_ALIGNMENT', stack_alignment)
+    cdata.set('STACK_ALIGNMENT', stack_alignment)
 endif
 
 cdata.set10('ARCH_AARCH64', host_machine.cpu_family() == 'aarch64')
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -25,6 +25,8 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "config.h"
+
 #include <stdint.h>
 
 #include "src/x86/cpu.h"
@@ -47,6 +49,8 @@
         if (info[2] & (1 <<  9)) flags |= DAV1D_X86_CPU_FLAG_SSSE3;
         if (info[2] & (1 << 19)) flags |= DAV1D_X86_CPU_FLAG_SSE41;
         if (info[2] & (1 << 20)) flags |= DAV1D_X86_CPU_FLAG_SSE42;
+#if ARCH_X86_64
+        /* We only support >128-bit SIMD on x86-64. */
         if (info[2] & (1 << 27)) /* OSXSAVE */ {
             uint64_t xcr = dav1d_cpu_xgetbv(0);
             if ((xcr & 0x00000006) == 0x00000006) /* XMM/YMM */ {
@@ -61,6 +65,7 @@
                 }
             }
         }
+#endif
     }
 
     return flags;