shithub: libvpx

Download patch

ref: f2cf3c06a0835bf436bb25c5a2f28265c287ec70
parent: 1470529f62729b99d623e3c2f36a588ceb85ddb7
author: Jingning Han <jingning@google.com>
date: Wed Apr 1 05:19:13 EDT 2015

Reduce required xmm number by one in block_error_fp

Use 6 xmms instead of 8.

Change-Id: If976ad85d09191d2fb0565399d690f2869dbbcc7

--- a/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/vp9/encoder/x86/vp9_error_sse2.asm
@@ -78,7 +78,7 @@
 ;                            intptr_t block_size)
 
 INIT_XMM sse2
-cglobal block_error_fp, 3, 3, 8, uqc, dqc, size
+cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
   pxor      m4, m4                 ; sse accumulator
   pxor      m5, m5                 ; dedicated zero register
   lea     uqcq, [uqcq+sizeq*2]
@@ -96,13 +96,13 @@
   pmaddwd   m0, m0
   pmaddwd   m1, m1
   ; accumulate in 64bit
-  punpckldq m7, m0, m5
+  punpckldq m3, m0, m5
   punpckhdq m0, m5
-  paddq     m4, m7
-  punpckldq m7, m1, m5
+  paddq     m4, m3
+  punpckldq m3, m1, m5
   paddq     m4, m0
   punpckhdq m1, m5
-  paddq     m4, m7
+  paddq     m4, m3
   paddq     m4, m1
   add    sizeq, mmsize
   jl .loop