ref: 63ea8705eb0b4609b1c87968817d18421f051641
parent: 8389f1967c5f8b3819cca80705b1b4ba04132b93
author: Makoto Kato <makoto.kt@gmail.com>
date: Fri Jun 11 14:32:28 EDT 2010
some XMM registers are non-volatile on windows x64 ABI XMM6 to XMM15 are non-volatile on Windows x64 ABI. We have to save these registers. Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -17,6 +17,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
+ SAVE_XMM
push rsi
push rdi
; end prolog
@@ -101,6 +102,7 @@
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -26,6 +26,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -212,6 +213,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -231,6 +233,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -652,6 +655,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -671,6 +675,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -1002,6 +1007,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1021,6 +1027,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -1564,6 +1571,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1583,6 +1591,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -1679,6 +1688,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1698,6 +1708,7 @@
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
@@ -1942,6 +1953,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -26,6 +26,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -240,6 +241,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -254,6 +256,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -439,6 +442,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -452,6 +456,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -612,6 +617,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -67,6 +67,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM
push rsi
push rdi
; end prolog
@@ -119,6 +120,7 @@
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -37,6 +37,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -129,6 +130,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -155,6 +157,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -304,6 +307,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -329,6 +333,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -397,6 +402,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -510,6 +516,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -641,6 +648,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -876,6 +884,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -894,6 +903,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -933,6 +943,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -953,6 +964,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -215,6 +215,25 @@
%define UNSHADOW_ARGS mov rsp, rbp
%endif
+; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
+; rsp register has to be aligned
+%ifidn __OUTPUT_FORMAT__,x64
+%macro SAVE_XMM 0
+ sub rsp, 32
+ movdqa XMMWORD PTR [rsp], xmm6
+ movdqa XMMWORD PTR [rsp+16], xmm7
+%endmacro
+%macro RESTORE_XMM 0
+ movdqa xmm6, XMMWORD PTR [rsp]
+ movdqa xmm7, XMMWORD PTR [rsp+16]
+ add rsp, 32
+%endmacro
+%else
+%macro SAVE_XMM 0
+%endmacro
+%macro RESTORE_XMM 0
+%endmacro
+%endif
; Name of the rodata section
;