ref: d48a90da0da99b87d4f4db38a380217b34b28a0b
parent: 02e824d1253cdf8800c51fb39a359a062cbb0f45
parent: ea4bb892aa149695b43e0b4a90bdb638b4b8842d
author: guangwei <GuangweiWang@users.noreply.github.com>
date: Wed Apr 12 06:16:41 EDT 2017
Merge pull request #2721 from mstorsjo/fix-arm-rounding Fix arm downsampler to add horizontally first
--- a/codec/processing/src/arm/down_sample_neon.S
+++ b/codec/processing/src/arm/down_sample_neon.S
@@ -57,10 +57,11 @@
vld1.8 {q0,q1}, [r2]!
vld1.8 {q2,q3}, [r7]!
- vrhadd.u8 q0, q0, q2
- vrhadd.u8 q1, q1, q3
vuzp.8 q0, q1
+ vuzp.8 q2, q3
vrhadd.u8 q0, q0, q1
+ vrhadd.u8 q2, q2, q3
+ vrhadd.u8 q0, q0, q2
vst1.32 {q0}, [r0]!
add lr, #32
@@ -188,10 +189,11 @@
vld1.8 {q0,q1}, [r2]!
vld1.8 {q2,q3}, [r7]!
- vrhadd.u8 q0, q0, q2
- vrhadd.u8 q1, q1, q3
vuzp.8 q0, q1
+ vuzp.8 q2, q3
vrhadd.u8 q0, q0, q1
+ vrhadd.u8 q2, q2, q3
+ vrhadd.u8 q0, q0, q2
vst1.32 {q0}, [r0]!
subs r6, #1
bne comp_ds_bilinear_w_x32_loop1
--- a/codec/processing/src/arm64/down_sample_aarch64_neon.S
+++ b/codec/processing/src/arm64/down_sample_aarch64_neon.S
@@ -51,11 +51,13 @@
ld1 {v0.16b, v1.16b}, [x2], #32
ld1 {v2.16b, v3.16b}, [x7], #32
- urhadd v0.16b, v0.16b, v2.16b
- urhadd v1.16b, v1.16b, v3.16b
- uzp1 v2.16b, v0.16b, v1.16b
- uzp2 v3.16b, v0.16b, v1.16b
- urhadd v2.16b, v2.16b, v3.16b
+ uzp1 v4.16b, v0.16b, v1.16b
+ uzp2 v5.16b, v0.16b, v1.16b
+ uzp1 v6.16b, v2.16b, v3.16b
+ uzp2 v7.16b, v2.16b, v3.16b
+ urhadd v0.16b, v4.16b, v5.16b
+ urhadd v1.16b, v6.16b, v7.16b
+ urhadd v2.16b, v0.16b, v1.16b
st1 {v2.16b}, [x0], #16
add w9, w9, #32
@@ -92,11 +94,13 @@
ld1 {v0.16b, v1.16b}, [x2], #32
ld1 {v2.16b, v3.16b}, [x7], #32
- urhadd v0.16b, v0.16b, v2.16b
- urhadd v1.16b, v1.16b, v3.16b
- uzp1 v2.16b, v0.16b, v1.16b
- uzp2 v3.16b, v0.16b, v1.16b
- urhadd v2.16b, v2.16b, v3.16b
+ uzp1 v4.16b, v0.16b, v1.16b
+ uzp2 v5.16b, v0.16b, v1.16b
+ uzp1 v6.16b, v2.16b, v3.16b
+ uzp2 v7.16b, v2.16b, v3.16b
+ urhadd v0.16b, v4.16b, v5.16b
+ urhadd v1.16b, v6.16b, v7.16b
+ urhadd v2.16b, v0.16b, v1.16b
st1 {v2.16b}, [x0], #16
sub w6, w6, #1