ref: 2a448fde52f8ae0f6958e8e89ae6311e0b14a1d8
parent: c1a5e445d1da12c71f5407a032292ea89f1205c1
author: Martin Storsjö <martin@martin.st>
date: Wed Dec 9 06:44:19 EST 2020
arm64: loopfilter16: Fix conditions for skipping parts of the filtering As the arm64 16 bpc loopfilter operates on a 8 pixel region at a time, inspect 2 bits (corresponding to 4 pixels each) from these registers, as we also shift them down by 2 bits at the end of the loop. This should allow skipping the loopfilter altogether (or using a smaller filter) in more cases.
--- a/src/arm/64/loopfilter16.S
+++ b/src/arm/64/loopfilter16.S
@@ -785,7 +785,7 @@
orr w6, w6, w7 // vmask[0] |= vmask[1]
1:
- tst w6, #0x0f
+ tst w6, #0x03
.ifc \dir, v
ld1 {v0.8b}, [x4], #8
ld1 {v1.8b}, [x3], #8
@@ -847,7 +847,7 @@
ushl v10.8h, v10.8h, v31.8h
.ifc \type, y
- tst w2, #0x0f
+ tst w2, #0x03
b.eq 2f
// wd16
bl lpf_\dir\()_16_8_neon
@@ -854,7 +854,7 @@
b 8f
2:
.endif
- tst w7, #0x0f
+ tst w7, #0x03
b.eq 3f
.ifc \type, y
// wd8