ref: 3a6a55d8d5883cf094bc3a140e568420014bbf47
parent: fb348f64dc0bcd4c0aaf686afbdd810484a5536f
author: Henrik Gramner <gramner@twoorioles.com>
date: Wed Mar 4 18:05:45 EST 2020
x86: Fix crash in AVX2 cdef_filter with <32-byte stack alignment
--- a/src/x86/cdef.asm
+++ b/src/x86/cdef.asm
@@ -446,7 +446,7 @@
PUSH r11
%if %2 == 4
%assign regs_used 12
- %if WIN64
+ %if STACK_ALIGNMENT < 32
PUSH r%+regs_used
%assign regs_used regs_used+1
%endif
@@ -458,24 +458,24 @@
movu [rsp+0x10], m0
movu [rsp+0x28], m1
movu [rsp+0x40], m2
-%else
+%elif %1 == 4
PUSH r12
- %if %1 == 4
- %assign regs_used 13
- %if WIN64
+ %assign regs_used 13
+ %if STACK_ALIGNMENT < 32
PUSH r%+regs_used
%assign regs_used regs_used+1
- %endif
+ %endif
ALLOC_STACK 8*2+%1*%2*1, 16
pmovzxwd m0, [leftq]
mova [rsp+0x10], m0
- %else
+%else
+ PUSH r12
PUSH r13
- %assign regs_used 14
- %if WIN64
+ %assign regs_used 14
+ %if STACK_ALIGNMENT < 32
PUSH r%+regs_used
- %assign regs_used regs_used+1
- %endif
+ %assign regs_used regs_used+1
+ %endif
ALLOC_STACK 8*2+%1*%2*2+32, 16
lea r11, [strideq*3]
movu xm4, [dstq+strideq*2]
@@ -489,7 +489,6 @@
mova [rsp+0x50], m2
mova [rsp+0x70], m3
mova [rsp+0x90], m4
- %endif
%endif
DEFINE_ARGS dst, stride, left, top, pri, secdmp, zero, pridmp, damping
@@ -1255,7 +1254,7 @@
%else
%assign regs_used 9
%endif
-%if WIN64
+%if STACK_ALIGNMENT < 32
PUSH r%+regs_used
%assign regs_used regs_used+1
%endif