shithub: dav1d

Download patch

ref: 3db194267e46239bd5ceaa526b4dd433ee14bbb9
parent: ded8ed3f49c022359107fa7f4752b84389716b00
author: Victorien Le Couviour--Tuffet <victorien.lecouviour.tuffet@gmail.com>
date: Wed Feb 13 12:36:19 EST 2019

x86: improve AVX2 cdef_filter macro consistency

- consistently use %3 instead of hardcoded value for tmp stride
- also correct a comment

--- a/src/x86/cdef.asm
+++ b/src/x86/cdef.asm
@@ -104,8 +104,8 @@
     pminsw         m12, m10
     psignw         m11, m5                      ; constrain(diff_p0)
     psignw         m12, m6                      ; constrain(diff_p1)
-    pmullw         m11, %4                      ; constrain(diff_p0) * pri_taps
-    pmullw         m12, %4                      ; constrain(diff_p1) * pri_taps
+    pmullw         m11, %4                      ; constrain(diff_p0) * taps
+    pmullw         m12, %4                      ; constrain(diff_p1) * taps
     paddw          m15, m11
     paddw          m15, m12
 %endmacro
@@ -258,15 +258,15 @@
 %if %2 == 8
     pmovzxbw       xm2, [leftq+ 8]
 %endif
-    movd   [px+0*32-4], xm1
-    pextrd [px+1*32-4], xm1, 1
-    pextrd [px+2*32-4], xm1, 2
-    pextrd [px+3*32-4], xm1, 3
+    movd   [px+0*%3-4], xm1
+    pextrd [px+1*%3-4], xm1, 1
+    pextrd [px+2*%3-4], xm1, 2
+    pextrd [px+3*%3-4], xm1, 3
 %if %2 == 8
-    movd   [px+4*32-4], xm2
-    pextrd [px+5*32-4], xm2, 1
-    pextrd [px+6*32-4], xm2, 2
-    pextrd [px+7*32-4], xm2, 3
+    movd   [px+4*%3-4], xm2
+    pextrd [px+5*%3-4], xm2, 1
+    pextrd [px+6*%3-4], xm2, 2
+    pextrd [px+7*%3-4], xm2, 3
 %endif
     jmp .left_done
 .no_left: