shithub: libvpx

Download patch

ref: 220daa00e0753fb7c2b7346c5557624df9055f21
parent: 9484bf7f57f9908d87f0447b093443056a77d009
author: Jim Bankoski <jimbankoski@google.com>
date: Wed Jun 16 08:36:53 EDT 2010

vp8_block_error_xmm: remove unnecessary instructions

Remove a couple instructions from this function which weren't
necessary for correct execution.

Change-Id: Ib649674f140689f7e5c1530c35686241688a3151

--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -11,7 +11,6 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-
 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_xmm)
 sym(vp8_block_error_xmm):
@@ -20,11 +19,9 @@
     SHADOW_ARGS_TO_STACK 2
     push rsi
     push rdi
-    ; end prolog
+    ; end prologue
 
-
         mov         rsi,        arg(0) ;coeff_ptr
-        pxor        xmm7,       xmm7
 
         mov         rdi,        arg(1) ;dcoef_ptr
         movdqa      xmm3,       [rsi]
@@ -33,31 +30,25 @@
         movdqa      xmm5,       [rsi+16]
 
         movdqa      xmm6,       [rdi+16]
-        pxor        xmm1,       xmm1    ; from movd xmm1, dc; dc=0
+        psubw       xmm3,       xmm4
 
-        movdqa      xmm2,       xmm7
         psubw       xmm5,       xmm6
-
-        por         xmm1,       xmm2
+        pmaddwd     xmm3,       xmm3
         pmaddwd     xmm5,       xmm5
 
-        pcmpeqw     xmm1,       xmm7
-        psubw       xmm3,       xmm4
+        paddd       xmm3,       xmm5
 
-        pand        xmm1,       xmm3
-        pmaddwd     xmm1,       xmm1
+        pxor        xmm7,       xmm7
+        movdqa      xmm0,       xmm3
 
-        paddd       xmm1,       xmm5
-        movdqa      xmm0,       xmm1
-
         punpckldq   xmm0,       xmm7
-        punpckhdq   xmm1,       xmm7
+        punpckhdq   xmm3,       xmm7
 
-        paddd       xmm0,       xmm1
-        movdqa      xmm1,       xmm0
+        paddd       xmm0,       xmm3
+        movdqa      xmm3,       xmm0
 
         psrldq      xmm0,       8
-        paddd       xmm0,       xmm1
+        paddd       xmm0,       xmm3
 
         movd        rax,        xmm0
 
@@ -67,7 +58,6 @@
     UNSHADOW_ARGS
     pop         rbp
     ret
-
 
 ;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_mmx)