shithub: libvpx

Download patch

ref: e6f955251f936e90444f83ffcbc87d3a3d4fe24c
parent: 6a60249071f9bed09d9b0033064d1c1511a1ca13
parent: 7fd643264a80dcde9c994237b2b39433d9ce96b3
author: Ronald S. Bultje <rbultje@google.com>
date: Wed Jul 10 10:52:23 EDT 2013

Merge "SSSE3 assembly for 4x4/8x8/16x16/32x32 H intra prediction."

--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -64,7 +64,7 @@
 specialize vp9_d63_predictor_4x4
 
 prototype void vp9_h_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_4x4
+specialize vp9_h_predictor_4x4 ssse3
 
 prototype void vp9_d117_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
 specialize vp9_d117_predictor_4x4
@@ -103,7 +103,7 @@
 specialize vp9_d63_predictor_8x8
 
 prototype void vp9_h_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_8x8
+specialize vp9_h_predictor_8x8 ssse3
 
 prototype void vp9_d117_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
 specialize vp9_d117_predictor_8x8
@@ -142,7 +142,7 @@
 specialize vp9_d63_predictor_16x16
 
 prototype void vp9_h_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_16x16
+specialize vp9_h_predictor_16x16 ssse3
 
 prototype void vp9_d117_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
 specialize vp9_d117_predictor_16x16
@@ -181,7 +181,7 @@
 specialize vp9_d63_predictor_32x32
 
 prototype void vp9_h_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
-specialize vp9_h_predictor_32x32
+specialize vp9_h_predictor_32x32 ssse3
 
 prototype void vp9_d117_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
 specialize vp9_d117_predictor_32x32
--- /dev/null
+++ b/vp9/common/x86/vp9_intrapred_ssse3.asm
@@ -1,0 +1,87 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+INIT_MMX ssse3
+cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
+  movifnidn          leftq, leftmp
+  add                leftq, 4
+  mov                lineq, -2
+  pxor                  m0, m0
+.loop:
+  movd                  m1, [leftq+lineq*2  ]
+  movd                  m2, [leftq+lineq*2+1]
+  pshufb                m1, m0
+  pshufb                m2, m0
+  movd      [dstq        ], m1
+  movd      [dstq+strideq], m2
+  lea                 dstq, [dstq+strideq*2]
+  inc                lineq
+  jnz .loop
+  REP_RET
+
+INIT_MMX ssse3
+cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left
+  movifnidn          leftq, leftmp
+  add                leftq, 8
+  mov                lineq, -4
+  pxor                  m0, m0
+.loop:
+  movd                  m1, [leftq+lineq*2  ]
+  movd                  m2, [leftq+lineq*2+1]
+  pshufb                m1, m0
+  pshufb                m2, m0
+  movq      [dstq        ], m1
+  movq      [dstq+strideq], m2
+  lea                 dstq, [dstq+strideq*2]
+  inc                lineq
+  jnz .loop
+  REP_RET
+
+INIT_XMM ssse3
+cglobal h_predictor_16x16, 2, 4, 3, dst, stride, line, left
+  movifnidn          leftq, leftmp
+  add                leftq, 16
+  mov                lineq, -8
+  pxor                  m0, m0
+.loop:
+  movd                  m1, [leftq+lineq*2  ]
+  movd                  m2, [leftq+lineq*2+1]
+  pshufb                m1, m0
+  pshufb                m2, m0
+  mova      [dstq        ], m1
+  mova      [dstq+strideq], m2
+  lea                 dstq, [dstq+strideq*2]
+  inc                lineq
+  jnz .loop
+  REP_RET
+
+INIT_XMM ssse3
+cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
+  movifnidn          leftq, leftmp
+  add                leftq, 32
+  mov                lineq, -16
+  pxor                  m0, m0
+.loop:
+  movd                  m1, [leftq+lineq*2  ]
+  movd                  m2, [leftq+lineq*2+1]
+  pshufb                m1, m0
+  pshufb                m2, m0
+  mova   [dstq           ], m1
+  mova   [dstq        +16], m1
+  mova   [dstq+strideq   ], m2
+  mova   [dstq+strideq+16], m2
+  lea                 dstq, [dstq+strideq*2]
+  inc                lineq
+  jnz .loop
+  REP_RET
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -82,6 +82,7 @@
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
 VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
 ifeq ($(CONFIG_POSTPROC),yes)
 VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm