shithub: libvpx

Download patch

ref: d76032ae87e535be5b924d9e88bbd67189380534
parent: f3f6b6fe3e960959489db2568d9942aeca261daa
author: Jian Zhou <zhoujian@google.com>
date: Thu Nov 19 06:34:22 EST 2015

Speed up h_predictor_4x4

Modify h_predictor_4x4 with XMM registers.
Speed up by ~25% in ./test_intra_pred_speed.

Change-Id: Id01c34c48e75b9d56dfc2e93af12cf0c0326a279

--- a/vpx_dsp/x86/intrapred_ssse3.asm
+++ b/vpx_dsp/x86/intrapred_ssse3.asm
@@ -33,23 +33,20 @@
 
 SECTION .text
 
-INIT_MMX ssse3
+INIT_XMM ssse3
 cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
-  movifnidn          leftq, leftmp
-  add                leftq, 4
-  mov                lineq, -2
-  pxor                  m0, m0
-.loop:
-  movd                  m1, [leftq+lineq*2  ]
-  movd                  m2, [leftq+lineq*2+1]
-  pshufb                m1, m0
-  pshufb                m2, m0
-  movd      [dstq        ], m1
-  movd      [dstq+strideq], m2
+  movd                  m0, [leftq]
+  punpcklbw             m0, m0
+  punpcklbw             m0, m0
+  movd      [dstq        ], m0
+  psrldq                m0, 4
+  movd      [dstq+strideq], m0
   lea                 dstq, [dstq+strideq*2]
-  inc                lineq
-  jnz .loop
-  REP_RET
+  psrldq                m0, 4
+  movd      [dstq        ], m0
+  psrldq                m0, 4
+  movd      [dstq+strideq], m0
+  RET
 
 INIT_MMX ssse3
 cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left