shithub: libvpx

Download patch

ref: 4fbc1210ed81705b14f00f02c4508a0c413b04dc
parent: 272c82c13a463acd0418ab04a5facebe75231e1e
author: Yunqing Wang <yunqingwang@google.com>
date: Wed Nov 20 07:52:56 EST 2013

Correct ssse3 8/16-pixel wide sub-pixel filter calculation

Although no mismatch was indicated for 8/16 wide sub-pixel filters
in issue 661, they had similar problems that could cause mismatch
potentially. This patch fixed calculations in HORIZx8/16
and VERTx8/16.

Change-Id: Ib85412d690bea5609a51f0e50e7c858406b8ff9e

--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -158,10 +158,13 @@
     pmaddubsw   xmm6, k6k7
 
     paddsw      xmm0, xmm6
-    paddsw      xmm0, xmm2
+    movdqa      xmm1, xmm2
+    pmaxsw      xmm2, xmm4
+    pminsw      xmm4, xmm1
     paddsw      xmm0, xmm4
-    paddsw      xmm0, krd
+    paddsw      xmm0, xmm2
 
+    paddsw      xmm0, krd
     psraw       xmm0, 7
     packuswb    xmm0, xmm0
 
@@ -243,10 +246,13 @@
     pmaddubsw   xmm6, k6k7
 
     paddsw      xmm0, xmm6
-    paddsw      xmm0, xmm2
+    movdqa      xmm1, xmm2
+    pmaxsw      xmm2, xmm4
+    pminsw      xmm4, xmm1
     paddsw      xmm0, xmm4
-    paddsw      xmm0, krd
+    paddsw      xmm0, xmm2
 
+    paddsw      xmm0, krd
     psraw       xmm0, 7
     packuswb    xmm0, xmm0
 %if %1
@@ -635,9 +641,13 @@
     pmaddubsw   %3,   k4k5
     pmaddubsw   %4,   k6k7
 
-    paddsw      %1,   %2
     paddsw      %1,   %4
+    movdqa      %4,   %2
+    pmaxsw      %2,   %3
+    pminsw      %3,   %4
     paddsw      %1,   %3
+    paddsw      %1,   %2
+
     paddsw      %1,   krd
     psraw       %1,   7
     packuswb    %1,   %1
@@ -783,12 +793,19 @@
     pmaddubsw   xmm6,   k4k5
     pmaddubsw   xmm7,   k6k7
 
-    paddsw      xmm0,   xmm1
     paddsw      xmm0,   xmm3
+    movdqa      xmm3,   xmm1
+    pmaxsw      xmm1,   xmm2
+    pminsw      xmm2,   xmm3
     paddsw      xmm0,   xmm2
-    paddsw      xmm4,   xmm5
+    paddsw      xmm0,   xmm1
+
     paddsw      xmm4,   xmm7
+    movdqa      xmm7,   xmm5
+    pmaxsw      xmm5,   xmm6
+    pminsw      xmm6,   xmm7
     paddsw      xmm4,   xmm6
+    paddsw      xmm4,   xmm5
 
     paddsw      xmm0,   krd
     paddsw      xmm4,   krd