ref: 632b4876e3869aea085427cc79f5d08487d848de
parent: 65ba279b5393382a98ddd6844e0c0753f63e749f
author: B Krishnan Iyer <krishnaniyer97@gmail.com>
date: Mon Jul 1 17:17:35 EDT 2019
arm: mc: neon: Improvement in blend_v function A73 A53 Earlier Now Earlier Now blend_v_w2_8bpc_neon: 122.1 121.3 195.5 195.5 blend_v_w4_8bpc_neon: 248.2 247.5 375.6 358.5 blend_v_w8_8bpc_neon: 210.3 205.2 375.6 358.5 blend_v_w16_8bpc_neon: 252.7 237.1 579.2 590.5 blend_v_w32_8bpc_neon: 347 345.8 997.4 994.1
--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -527,6 +527,7 @@
add r12, r0, r1
lsl r1, r1, #1
vsub.i8 d5, d22, d4
+ sub r1, r1, #3
4:
vld1.32 {d2[]}, [r2], r3
vld1.32 {d0[]}, [r0]
@@ -540,8 +541,6 @@
vst1.16 {d20[2]}, [r12]!
vst1.8 {d20[2]}, [r0]!
vst1.8 {d20[6]}, [r12]!
- sub r0, r0, #3
- sub r12, r12, #3
add r0, r0, r1
add r12, r12, r1
bgt 4b
@@ -552,6 +551,7 @@
add r12, r0, r1
lsl r1, r1, #1
vsub.i8 d17, d16, d2
+ sub r1, r1, #6
8:
vld1.u8 {d4}, [r2], r3
vld1.u8 {d0}, [r0]
@@ -568,8 +568,6 @@
vst1.32 {d23[0]}, [r12]!
vst1.16 {d22[2]}, [r0]!
vst1.16 {d23[2]}, [r12]!
- sub r0, r0, #6
- sub r12, r12, #6
add r0, r0, r1
add r12, r12, r1
bgt 8b
@@ -580,6 +578,7 @@
add r12, r0, r1
lsl r1, r1, #1
vsub.i8 q11, q12, q2
+ sub r1, r1, #12
16:
vld1.u8 {q1}, [r2], r3
vld1.u8 {q0}, [r0]
@@ -602,8 +601,6 @@
vst1.u8 {d20}, [r12]!
vst1.32 {d19[0]}, [r0]!
vst1.32 {d21[0]}, [r12]!
- sub r0, r0, #12
- sub r12, r12, #12
add r0, r0, r1
add r12, r12, r1
bgt 16b