shithub: libvpx

Download patch

ref: 00fc0e3ff5e34472efc652686703493387fe36ee
parent: 3316918b3b5ea21c8adeee8e961e01f977726a0b
author: Jingning Han <jingning@google.com>
date: Mon Jul 7 07:04:37 EDT 2014

Tune SSSE3 implementation of fast path quantization

This commit further simplifies the SSSE3 implementation of the fast
path quantization process.

Change-Id: I5be3286ec0f1bd81d1cf5be3168fece6384fb9ca

--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -264,7 +264,6 @@
   pabsw                           m6, m9                   ; m6 = abs(m9)
   pabsw                          m11, m10                  ; m11 = abs(m10)
   pcmpeqw                         m7, m7
-  pcmpeqw                        m12, m12
 
   paddsw                          m6, m1                   ; m6 += round
   punpckhqdq                      m1, m1
@@ -296,7 +295,7 @@
   mova                            m6, [  iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
   mova                           m11, [  iscanq+ncoeffq*2+16] ; m11 = scan[i]
   psubw                           m6, m7                   ; m6 = scan[i] + 1
-  psubw                          m11, m12                  ; m11 = scan[i] + 1
+  psubw                          m11, m7                   ; m11 = scan[i] + 1
   pandn                           m8, m6                   ; m8 = max(eob)
   pandn                          m13, m11                  ; m13 = max(eob)
   pmaxsw                          m8, m13
@@ -309,10 +308,9 @@
   pabsw                           m6, m9                   ; m6 = abs(m9)
   pabsw                          m11, m10                  ; m11 = abs(m10)
   pcmpeqw                         m7, m7
-  pcmpeqw                        m12, m12
 %ifidn %1, b_32x32
   pmovmskb                        r6, m7
-  pmovmskb                        r2, m12
+  pmovmskb                        r2, m7
   or                              r6, r2
   jz .skip_iter
 %endif
@@ -343,7 +341,7 @@
   mova                            m6, [  iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
   mova                           m11, [  iscanq+ncoeffq*2+16] ; m11 = scan[i]
   psubw                           m6, m7                   ; m6 = scan[i] + 1
-  psubw                          m11, m12                  ; m11 = scan[i] + 1
+  psubw                          m11, m7                   ; m11 = scan[i] + 1
   pandn                          m14, m6                   ; m14 = max(eob)
   pandn                          m13, m11                  ; m13 = max(eob)
   pmaxsw                          m8, m14
--