ref: 534ea700bd690e8f3f6e0d9db930b3bf65a08cac
parent: f0c22a3f333ad9a3e1218b8387879766bfea6d42
parent: b843aa4eda473577e7d22cd4045fb59df521898f
author: Yunqing Wang <yunqingwang@google.com>
date: Tue Mar 29 05:04:22 EDT 2011
Merge "Fix a crash while enabling shared (--enable-shared)"
--- a/vp8/encoder/x86/variance_impl_ssse3.asm
+++ b/vp8/encoder/x86/variance_impl_ssse3.asm
@@ -38,7 +38,6 @@
GET_GOT rbx
push rsi
push rdi
- push rbx
; end prolog
pxor xmm6, xmm6
@@ -81,10 +80,12 @@
packuswb xmm0, xmm2
- movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line
- lea rsi, [rsi + rbx]
-%if ABI_IS_32BIT=0
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+ lea rsi, [rsi + r8]
%endif
filter_block2d_bil_var_ssse3_loop:
@@ -132,10 +133,11 @@
paddd xmm7, xmm2
paddd xmm7, xmm3
- lea rsi, [rsi + rbx] ;ref_pixels_per_line
%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line
add rdi, dword ptr arg(3) ;src_pixels_per_line
%else
+ lea rsi, [rsi + r8]
lea rdi, [rdi + r9]
%endif
@@ -161,7 +163,10 @@
movdqu xmm1, XMMWORD PTR [rsi]
movdqa xmm0, xmm1
- movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
+%if ABI_IS_32BIT=0
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
lea rsi, [rsi + rax]
filter_block2d_bil_sp_only_loop:
@@ -196,8 +201,13 @@
movdqa xmm1, xmm0
lea rsi, [rsi + rax] ;ref_pixels_per_line
- lea rdi, [rdi + rbx] ;src_pixels_per_line
+%if ABI_IS_32BIT
+ add rdi, dword ptr arg(3) ;src_pixels_per_line
+%else
+ lea rdi, [rdi + r9]
+%endif
+
sub rcx, 1
jnz filter_block2d_bil_sp_only_loop
@@ -208,7 +218,7 @@
mov rdi, arg(2) ;src_ptr
movsxd rcx, dword ptr arg(4) ;Height
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
pxor xmm0, xmm0
filter_block2d_bil_full_pixel_loop:
@@ -232,7 +242,7 @@
paddd xmm7, xmm2
lea rsi, [rsi + rax] ;ref_pixels_per_line
- lea rdi, [rdi + rbx] ;src_pixels_per_line
+ lea rdi, [rdi + rdx] ;src_pixels_per_line
sub rcx, 1
jnz filter_block2d_bil_full_pixel_loop
@@ -245,8 +255,11 @@
movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line
pxor xmm0, xmm0
- movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
+%if ABI_IS_32BIT=0
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+%endif
+
filter_block2d_bil_fp_only_loop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rsi+1]
@@ -278,7 +291,11 @@
paddd xmm7, xmm3
lea rsi, [rsi + rdx]
- lea rdi, [rdi + rbx] ;src_pixels_per_line
+%if ABI_IS_32BIT
+ add rdi, dword ptr arg(3) ;src_pixels_per_line
+%else
+ lea rdi, [rdi + r9]
+%endif
sub rcx, 1
jnz filter_block2d_bil_fp_only_loop
@@ -322,7 +339,6 @@
movd [rdi], xmm6
; begin epilog
- pop rbx
pop rdi
pop rsi
RESTORE_GOT