shithub: dav1d

Download patch

ref: 2a448fde52f8ae0f6958e8e89ae6311e0b14a1d8
parent: c1a5e445d1da12c71f5407a032292ea89f1205c1
author: Martin Storsjö <martin@martin.st>
date: Wed Dec 9 06:44:19 EST 2020

arm64: loopfilter16: Fix conditions for skipping parts of the filtering

As the arm64 16 bpc loopfilter operates on a 8 pixel region at a time,
inspect 2 bits (corresponding to 4 pixels each) from these registers,
as we also shift them down by 2 bits at the end of the loop.

This should allow skipping the loopfilter altogether (or using a
smaller filter) in more cases.

--- a/src/arm/64/loopfilter16.S
+++ b/src/arm/64/loopfilter16.S
@@ -785,7 +785,7 @@
         orr             w6,  w6,  w7             // vmask[0] |= vmask[1]
 
 1:
-        tst             w6,  #0x0f
+        tst             w6,  #0x03
 .ifc \dir, v
         ld1             {v0.8b}, [x4], #8
         ld1             {v1.8b}, [x3], #8
@@ -847,7 +847,7 @@
         ushl            v10.8h,  v10.8h,  v31.8h
 
 .ifc \type, y
-        tst             w2,  #0x0f
+        tst             w2,  #0x03
         b.eq            2f
         // wd16
         bl              lpf_\dir\()_16_8_neon
@@ -854,7 +854,7 @@
         b               8f
 2:
 .endif
-        tst             w7,  #0x0f
+        tst             w7,  #0x03
         b.eq            3f
 .ifc \type, y
         // wd8