ref: 8e00403915e543e6d15e751903265494eb51a94e
parent: 5fe20ec7dd89c88453a61cd47d5d01e49d6cc6c2
author: Martin Storsjö <martin@martin.st>
date: Mon Jun 22 10:25:54 EDT 2020
arm64: ipred: Improve scheduling a tiny bit in the entry in smooth
--- a/src/arm/64/ipred.S
+++ b/src/arm/64/ipred.S
@@ -884,10 +884,10 @@
lsl x1, x1, #1
br x5
40:
- sub x2, x2, #4
- mov x7, #-4
ld1r {v6.2s}, [x8] // top
ld1r {v7.2s}, [x10] // weights_hor
+ sub x2, x2, #4
+ mov x7, #-4
dup v5.16b, v6.b[3] // right
usubl v6.8h, v6.8b, v4.8b // top-bottom
uxtl v7.8h, v7.8b // weights_hor
@@ -922,10 +922,10 @@
b.gt 4b
ret
80:
- sub x2, x2, #4
- mov x7, #-4
ld1 {v6.8b}, [x8] // top
ld1 {v7.8b}, [x10] // weights_hor
+ sub x2, x2, #4
+ mov x7, #-4
dup v5.16b, v6.b[7] // right
usubl v6.8h, v6.8b, v4.8b // top-bottom
uxtl v7.8h, v7.8b // weights_hor
--- a/src/arm/64/ipred16.S
+++ b/src/arm/64/ipred16.S
@@ -920,10 +920,10 @@
lsl x1, x1, #1
br x5
40:
- sub x2, x2, #8
- mov x7, #-8
ld1r {v6.2d}, [x8] // top
ld1r {v7.2s}, [x10] // weights_hor
+ sub x2, x2, #8
+ mov x7, #-8
dup v5.8h, v6.h[3] // right
sub v6.8h, v6.8h, v4.8h // top-bottom
uxtl v7.8h, v7.8b // weights_hor
@@ -963,10 +963,10 @@
b.gt 4b
ret
80:
- sub x2, x2, #8
- mov x7, #-8
ld1 {v6.8h}, [x8] // top
ld1 {v7.8b}, [x10] // weights_hor
+ sub x2, x2, #8
+ mov x7, #-8
dup v5.8h, v6.h[7] // right
sub v6.8h, v6.8h, v4.8h // top-bottom
uxtl v7.8h, v7.8b // weights_hor