shithub: dav1d

Download patch

ref: 23517a3e4d2aaebcd45f2afd50ad0f6df36c040c
parent: 41cd4199f149760a4d16326342d646b2eb66e8b0
author: Martin Storsjö <martin@martin.st>
date: Thu Apr 2 10:07:00 EDT 2020

x86: mc: Skip checks for zero leftext/rightext within the need_left_ext/need_right_ext blocks

If leftext/rightext are zero, we invoke a version of v_loop with
the whole need_left_ext/need_right_ext parts left out altogether,
so these checks seem to be redundant.

--- a/src/x86/mc.asm
+++ b/src/x86/mc.asm
@@ -4883,9 +4883,6 @@
 %macro v_loop 3 ; need_left_ext, need_right_ext, suffix
 .v_loop_%3:
 %if %1
-    test           leftextq, leftextq
-    jz .body_%3
-
     ; left extension
     xor                  r3, r3
     vpbroadcastb         m0, [srcq]
@@ -4896,7 +4893,6 @@
     jl .left_loop_%3
 
     ; body
-.body_%3:
     lea                 r12, [dstq+leftextq]
 %endif
     xor                  r3, r3
@@ -4913,8 +4909,6 @@
 
 %if %2
     ; right extension
-    test          rightextq, rightextq
-    jz .body_loop_end_%3
 %if %1
     add                 r12, centerwq
 %else
@@ -4928,7 +4922,6 @@
     cmp                  r3, rightextq
     jl .right_loop_%3
 
-.body_loop_end_%3:
 %endif
     add                dstq, dstrideq
     add                srcq, sstrideq
--- a/src/x86/mc_ssse3.asm
+++ b/src/x86/mc_ssse3.asm
@@ -4881,8 +4881,6 @@
     mov                  r1, r1m
   %endif
 %if %1
-    test           leftextq, leftextq
-    jz .body_%3
     ; left extension
   %if ARCH_X86_64
     movd                 m0, [srcq]
@@ -4898,7 +4896,6 @@
     cmp                  r3, leftextq
     jl .left_loop_%3
     ; body
-.body_%3:
     lea             reg_tmp, [dstq+leftextq]
 %endif
     xor                  r3, r3
@@ -4919,13 +4916,6 @@
     jl .body_loop_%3
 %if %2
     ; right extension
-  %if ARCH_X86_64
-    test          rightextq, rightextq
-  %else
-    mov                  r1, r3m
-    test                 r1, r1
-  %endif
-    jz .body_loop_end_%3
 %if %1
     add             reg_tmp, centerwq
 %else
@@ -4948,7 +4938,6 @@
     cmp                  r3, r3m
   %endif
     jl .right_loop_%3
-.body_loop_end_%3:
 %endif
   %if ARCH_X86_64
     add                dstq, dstrideq