shithub: dav1d

Download patch

ref: ad4d1c4383b7705157807f2c364fca3ee3d713ef
parent: 8a52189f0dfc18ccfa7a17f0e5d97ca4434806f1
author: Martin Storsjö <martin@martin.st>
date: Fri Feb 1 08:22:58 EST 2019

arm64: looprestoration: Optimize loop termination checks in copy_narrow_neon

--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -498,9 +498,8 @@
         add             x7,  x0,  x1
         lsl             x1,  x1,  #1
 18:
-        cmp             w4,  #8
-        b.lt            110f
         subs            w4,  w4,  #8
+        b.lt            110f
         ld1             {v0.8b}, [x2], #8
         st1             {v0.b}[0], [x0], x1
         st1             {v0.b}[1], [x7], x1
@@ -513,6 +512,7 @@
         b.le            0f
         b               18b
 110:
+        add             w4,  w4,  #8
         asr             x1,  x1,  #1
 11:
         subs            w4,  w4,  #1
@@ -526,9 +526,8 @@
         add             x7,  x0,  x1
         lsl             x1,  x1,  #1
 24:
-        cmp             w4,  #4
-        b.lt            210f
         subs            w4,  w4,  #4
+        b.lt            210f
         ld1             {v0.4h}, [x2], #8
         st1             {v0.h}[0], [x0], x1
         st1             {v0.h}[1], [x7], x1
@@ -537,6 +536,7 @@
         b.le            0f
         b               24b
 210:
+        add             w4,  w4,  #4
         asr             x1,  x1,  #1
 22:
         subs            w4,  w4,  #1
@@ -561,9 +561,8 @@
         add             x7,  x0,  x1
         lsl             x1,  x1,  #1
 42:
-        cmp             w4,  #2
-        b.lt            41f
         subs            w4,  w4,  #2
+        b.lt            41f
         ld1             {v0.2s}, [x2], #8
         st1             {v0.s}[0], [x0], x1
         st1             {v0.s}[1], [x7], x1