shithub: libvpx

Download patch

ref: 49f6ff11032df0d6696da8cf544d0292e185d00c
parent: 5dccce558c9e5a77e19a794e0cca1f7c918dbe6c
author: Jingning Han <jingning@google.com>
date: Wed Aug 19 05:14:21 EDT 2015

Rename inv_txfm_sse2.asm to inv_wht_sse2.asm

Change-Id: I43bcc70680503e4c18d8f021097307778cf9ea70

--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -185,7 +185,7 @@
 DSP_SRCS-$(HAVE_SSE2)   += x86/inv_txfm_sse2.h
 DSP_SRCS-$(HAVE_SSE2)   += x86/inv_txfm_sse2.c
 ifeq ($(CONFIG_USE_X86INC),yes)
-DSP_SRCS-$(HAVE_SSE2)   += x86/inv_txfm_sse2.asm
+DSP_SRCS-$(HAVE_SSE2)   += x86/inv_wht_sse2.asm
 ifeq ($(ARCH_X86_64),yes)
 DSP_SRCS-$(HAVE_SSSE3)  += x86/inv_txfm_ssse3_x86_64.asm
 endif  # ARCH_X86_64
--- a/vpx_dsp/x86/inv_txfm_sse2.asm
+++ /dev/null
@@ -1,103 +1,0 @@
-;
-;  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro REORDER_INPUTS 0
-  ; a c d b  to  a b c d
-  SWAP 1, 3, 2
-%endmacro
-
-%macro TRANSFORM_COLS 0
-  ; input:
-  ; m0 a
-  ; m1 b
-  ; m2 c
-  ; m3 d
-  paddw           m0,        m2
-  psubw           m3,        m1
-
-  ; wide subtract
-  punpcklwd       m4,        m0
-  punpcklwd       m5,        m3
-  psrad           m4,        16
-  psrad           m5,        16
-  psubd           m4,        m5
-  psrad           m4,        1
-  packssdw        m4,        m4             ; e
-
-  psubw           m5,        m4,        m1  ; b
-  psubw           m4,        m2             ; c
-  psubw           m0,        m5
-  paddw           m3,        m4
-                                ; m0 a
-  SWAP            1,         5  ; m1 b
-  SWAP            2,         4  ; m2 c
-                                ; m3 d
-%endmacro
-
-%macro TRANSPOSE_4X4 0
-  punpcklwd       m0,        m2
-  punpcklwd       m1,        m3
-  mova            m2,        m0
-  punpcklwd       m0,        m1
-  punpckhwd       m2,        m1
-  pshufd          m1,        m0, 0x0e
-  pshufd          m3,        m2, 0x0e
-%endmacro
-
-; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
-%macro TRANSPOSE_4X4_WIDE 0
-  mova            m3, m0
-  punpcklwd       m0, m1
-  punpckhwd       m3, m1
-  mova            m2, m0
-  punpcklwd       m0, m3
-  punpckhwd       m2, m3
-  pshufd          m1, m0, 0x0e
-  pshufd          m3, m2, 0x0e
-%endmacro
-
-%macro ADD_STORE_4P_2X 5  ; src1, src2, tmp1, tmp2, zero
-  movd            m%3,       [outputq]
-  movd            m%4,       [outputq + strideq]
-  punpcklbw       m%3,       m%5
-  punpcklbw       m%4,       m%5
-  paddw           m%1,       m%3
-  paddw           m%2,       m%4
-  packuswb        m%1,       m%5
-  packuswb        m%2,       m%5
-  movd            [outputq], m%1
-  movd            [outputq + strideq], m%2
-%endmacro
-
-INIT_XMM sse2
-cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
-  mova            m0,        [inputq +  0]
-  mova            m1,        [inputq + 16]
-
-  psraw           m0,        2
-  psraw           m1,        2
-
-  TRANSPOSE_4X4_WIDE
-  REORDER_INPUTS
-  TRANSFORM_COLS
-  TRANSPOSE_4X4
-  REORDER_INPUTS
-  TRANSFORM_COLS
-
-  pxor            m4, m4
-  ADD_STORE_4P_2X  0, 1, 5, 6, 4
-  lea             outputq, [outputq + 2 * strideq]
-  ADD_STORE_4P_2X  2, 3, 5, 6, 4
-
-  RET
--- /dev/null
+++ b/vpx_dsp/x86/inv_wht_sse2.asm
@@ -1,0 +1,103 @@
+;
+;  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro REORDER_INPUTS 0
+  ; a c d b  to  a b c d
+  SWAP 1, 3, 2
+%endmacro
+
+%macro TRANSFORM_COLS 0
+  ; input:
+  ; m0 a
+  ; m1 b
+  ; m2 c
+  ; m3 d
+  paddw           m0,        m2
+  psubw           m3,        m1
+
+  ; wide subtract
+  punpcklwd       m4,        m0
+  punpcklwd       m5,        m3
+  psrad           m4,        16
+  psrad           m5,        16
+  psubd           m4,        m5
+  psrad           m4,        1
+  packssdw        m4,        m4             ; e
+
+  psubw           m5,        m4,        m1  ; b
+  psubw           m4,        m2             ; c
+  psubw           m0,        m5
+  paddw           m3,        m4
+                                ; m0 a
+  SWAP            1,         5  ; m1 b
+  SWAP            2,         4  ; m2 c
+                                ; m3 d
+%endmacro
+
+%macro TRANSPOSE_4X4 0
+  punpcklwd       m0,        m2
+  punpcklwd       m1,        m3
+  mova            m2,        m0
+  punpcklwd       m0,        m1
+  punpckhwd       m2,        m1
+  pshufd          m1,        m0, 0x0e
+  pshufd          m3,        m2, 0x0e
+%endmacro
+
+; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
+%macro TRANSPOSE_4X4_WIDE 0
+  mova            m3, m0
+  punpcklwd       m0, m1
+  punpckhwd       m3, m1
+  mova            m2, m0
+  punpcklwd       m0, m3
+  punpckhwd       m2, m3
+  pshufd          m1, m0, 0x0e
+  pshufd          m3, m2, 0x0e
+%endmacro
+
+%macro ADD_STORE_4P_2X 5  ; src1, src2, tmp1, tmp2, zero
+  movd            m%3,       [outputq]
+  movd            m%4,       [outputq + strideq]
+  punpcklbw       m%3,       m%5
+  punpcklbw       m%4,       m%5
+  paddw           m%1,       m%3
+  paddw           m%2,       m%4
+  packuswb        m%1,       m%5
+  packuswb        m%2,       m%5
+  movd            [outputq], m%1
+  movd            [outputq + strideq], m%2
+%endmacro
+
+INIT_XMM sse2
+cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
+  mova            m0,        [inputq +  0]
+  mova            m1,        [inputq + 16]
+
+  psraw           m0,        2
+  psraw           m1,        2
+
+  TRANSPOSE_4X4_WIDE
+  REORDER_INPUTS
+  TRANSFORM_COLS
+  TRANSPOSE_4X4
+  REORDER_INPUTS
+  TRANSFORM_COLS
+
+  pxor            m4, m4
+  ADD_STORE_4P_2X  0, 1, 5, 6, 4
+  lea             outputq, [outputq + 2 * strideq]
+  ADD_STORE_4P_2X  2, 3, 5, 6, 4
+
+  RET