shithub: libvpx

Download patch

ref: 63ea8705eb0b4609b1c87968817d18421f051641
parent: 8389f1967c5f8b3819cca80705b1b4ba04132b93
author: Makoto Kato <makoto.kt@gmail.com>
date: Fri Jun 11 14:32:28 EDT 2010

some XMM registers are non-volatile on windows x64 ABI

XMM6 to XMM15 are non-volatile on Windows x64 ABI.  We have to save
these registers.

Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076

--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -17,6 +17,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 2
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -101,6 +102,7 @@
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -26,6 +26,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -212,6 +213,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -231,6 +233,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -652,6 +655,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -671,6 +675,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1002,6 +1007,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1021,6 +1027,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1564,6 +1571,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1583,6 +1591,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1679,6 +1688,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1698,6 +1708,7 @@
     push        rbp         ; save old base pointer value.
     mov         rbp, rsp    ; set new base pointer value.
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx         ; save callee-saved reg
     push        rsi
     push        rdi
@@ -1942,6 +1953,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -26,6 +26,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -240,6 +241,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -254,6 +256,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -439,6 +442,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -452,6 +456,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -612,6 +617,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -67,6 +67,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -119,6 +120,7 @@
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -37,6 +37,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -129,6 +130,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -155,6 +157,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -304,6 +307,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -329,6 +333,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 8
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -397,6 +402,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -510,6 +516,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -641,6 +648,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -876,6 +884,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -894,6 +903,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -933,6 +943,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -953,6 +964,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -215,6 +215,25 @@
   %define UNSHADOW_ARGS mov rsp, rbp
 %endif
 
+; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
+; rsp register has to be aligned
+%ifidn __OUTPUT_FORMAT__,x64
+%macro SAVE_XMM 0
+  sub rsp, 32
+  movdqa XMMWORD PTR [rsp], xmm6
+  movdqa XMMWORD PTR [rsp+16], xmm7
+%endmacro
+%macro RESTORE_XMM 0
+  movdqa xmm6, XMMWORD PTR [rsp]
+  movdqa xmm7, XMMWORD PTR [rsp+16]
+  add rsp, 32
+%endmacro
+%else
+%macro SAVE_XMM 0
+%endmacro
+%macro RESTORE_XMM 0
+%endmacro
+%endif
 
 ; Name of the rodata section
 ;