ref: 46997bde8819da6d8f55c38bc0bd22622e4c4567
parent: a7ef4564537667ea2bc13df7dc8732baeb6cda57
parent: 3f210f10ebaff0ef15c6a3537d082f1e2d09aaac
author: Ronald S. Bultje <rbultje@google.com>
date: Wed Jul 10 13:08:46 EDT 2013
Merge "Remove unused iwalsh4x4 MMX/SSE2 functions."
--- a/vp9/common/x86/vp9_iwalsh_mmx.asm
+++ /dev/null
@@ -1,173 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp9_short_inv_walsh4x4_1_mmx(short *input, short *output)
-global sym(vp9_short_inv_walsh4x4_1_mmx) PRIVATE
-sym(vp9_short_inv_walsh4x4_1_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 2
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0)
- mov rax, 3
-
- mov rdi, arg(1)
- add rax, [rsi] ;input[0] + 3
-
- movd mm0, eax
-
- punpcklwd mm0, mm0 ;x x val val
-
- punpckldq mm0, mm0 ;val val val val
-
- psraw mm0, 3 ;(input[0] + 3) >> 3
-
- movq [rdi + 0], mm0
- movq [rdi + 8], mm0
- movq [rdi + 16], mm0
- movq [rdi + 24], mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_short_inv_walsh4x4_mmx(short *input, short *output)
-global sym(vp9_short_inv_walsh4x4_mmx) PRIVATE
-sym(vp9_short_inv_walsh4x4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 2
- push rsi
- push rdi
- ; end prolog
-
- mov rax, 3
- mov rsi, arg(0)
- mov rdi, arg(1)
- shl rax, 16
-
- movq mm0, [rsi + 0] ;ip[0]
- movq mm1, [rsi + 8] ;ip[4]
- or rax, 3 ;00030003h
-
- movq mm2, [rsi + 16] ;ip[8]
- movq mm3, [rsi + 24] ;ip[12]
-
- movq mm7, rax
- movq mm4, mm0
-
- punpcklwd mm7, mm7 ;0003000300030003h
- movq mm5, mm1
-
- paddw mm4, mm3 ;ip[0] + ip[12] aka al
- paddw mm5, mm2 ;ip[4] + ip[8] aka bl
-
- movq mm6, mm4 ;temp al
-
- paddw mm4, mm5 ;al + bl
- psubw mm6, mm5 ;al - bl
-
- psubw mm0, mm3 ;ip[0] - ip[12] aka d1
- psubw mm1, mm2 ;ip[4] - ip[8] aka c1
-
- movq mm5, mm0 ;temp dl
-
- paddw mm0, mm1 ;dl + cl
- psubw mm5, mm1 ;dl - cl
-
- ; 03 02 01 00
- ; 13 12 11 10
- ; 23 22 21 20
- ; 33 32 31 30
-
- movq mm3, mm4 ; 03 02 01 00
- punpcklwd mm4, mm0 ; 11 01 10 00
- punpckhwd mm3, mm0 ; 13 03 12 02
-
- movq mm1, mm6 ; 23 22 21 20
- punpcklwd mm6, mm5 ; 31 21 30 20
- punpckhwd mm1, mm5 ; 33 23 32 22
-
- movq mm0, mm4 ; 11 01 10 00
- movq mm2, mm3 ; 13 03 12 02
-
- punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0]
- punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4]
-
- punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8]
- punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12]
-;~~~~~~~~~~~~~~~~~~~~~
- movq mm1, mm0
- movq mm5, mm4
-
- paddw mm1, mm3 ;ip[0] + ip[12] aka al
- paddw mm5, mm2 ;ip[4] + ip[8] aka bl
-
- movq mm6, mm1 ;temp al
-
- paddw mm1, mm5 ;al + bl
- psubw mm6, mm5 ;al - bl
-
- psubw mm0, mm3 ;ip[0] - ip[12] aka d1
- psubw mm4, mm2 ;ip[4] - ip[8] aka c1
-
- movq mm5, mm0 ;temp dl
-
- paddw mm0, mm4 ;dl + cl
- psubw mm5, mm4 ;dl - cl
-;~~~~~~~~~~~~~~~~~~~~~
- movq mm3, mm1 ; 03 02 01 00
- punpcklwd mm1, mm0 ; 11 01 10 00
- punpckhwd mm3, mm0 ; 13 03 12 02
-
- movq mm4, mm6 ; 23 22 21 20
- punpcklwd mm6, mm5 ; 31 21 30 20
- punpckhwd mm4, mm5 ; 33 23 32 22
-
- movq mm0, mm1 ; 11 01 10 00
- movq mm2, mm3 ; 13 03 12 02
-
- punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0]
- punpckhdq mm1, mm6 ; 31 21 11 01 aka ip[4]
-
- punpckldq mm2, mm4 ; 32 22 12 02 aka ip[8]
- punpckhdq mm3, mm4 ; 33 23 13 03 aka ip[12]
-
- paddw mm0, mm7
- paddw mm1, mm7
- paddw mm2, mm7
- paddw mm3, mm7
-
- psraw mm0, 3
- psraw mm1, 3
- psraw mm2, 3
- psraw mm3, 3
-
- movq [rdi + 0], mm0
- movq [rdi + 8], mm1
- movq [rdi + 16], mm2
- movq [rdi + 24], mm3
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
--- a/vp9/common/x86/vp9_iwalsh_sse2.asm
+++ /dev/null
@@ -1,119 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp9_short_inv_walsh4x4_sse2(short *input, short *output)
-global sym(vp9_short_inv_walsh4x4_sse2) PRIVATE
-sym(vp9_short_inv_walsh4x4_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 2
- SAVE_XMM 6
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0)
- mov rdi, arg(1)
- mov rax, 3
-
- movdqa xmm0, [rsi + 0] ;ip[4] ip[0]
- movdqa xmm1, [rsi + 16] ;ip[12] ip[8]
-
- shl rax, 16
- or rax, 3 ;00030003h
-
- pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
- movdqa xmm3, xmm0 ;ip[4] ip[0]
-
- paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
- psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
-
- movdqa xmm4, xmm0
- punpcklqdq xmm0, xmm3 ;d1 a1
- punpckhqdq xmm4, xmm3 ;c1 b1
- movd xmm6, eax
-
- movdqa xmm1, xmm4 ;c1 b1
- paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
- psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
-
-;;;temp output
-;; movdqu [rdi + 0], xmm4
-;; movdqu [rdi + 16], xmm3
-
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ; 13 12 11 10 03 02 01 00
- ;
- ; 33 32 31 30 23 22 21 20
- ;
- movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
- punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
- punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
- movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
- punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
- punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
- ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
- movdqa xmm3, xmm4 ;ip[4] ip[0]
-
- pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
-
- paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
- psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
-
- movdqa xmm5, xmm4
- punpcklqdq xmm4, xmm3 ;d1 a1
- punpckhqdq xmm5, xmm3 ;c1 b1
-
- movdqa xmm1, xmm5 ;c1 b1
- paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
- psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ; 13 12 11 10 03 02 01 00
- ;
- ; 33 32 31 30 23 22 21 20
- ;
- movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00
- punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00
- punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10
- movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00
- punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
- punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- paddw xmm5, xmm6
- paddw xmm1, xmm6
-
- psraw xmm5, 3
- psraw xmm1, 3
-
- movdqa [rdi + 0], xmm5
- movdqa [rdi + 16], xmm1
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-x_s1sqr2:
- times 4 dw 0x8A8C
-align 16
-x_c1sqr2less1:
- times 4 dw 0x4E7B
-align 16
-fours:
- times 4 dw 0x0004
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -75,10 +75,8 @@
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h
VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c
-VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
--
⑨