ref: db6dad446c7d7f97ee3a81e50253fd0459fa99d6
parent: cc11c078cd8e1baf642ef0f1d2deaa98af596581
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sun Nov 26 17:21:29 EST 2023
Fix ARMv7 optimizations for DNN code
--- a/dnn/vec_neon.h
+++ b/dnn/vec_neon.h
@@ -39,6 +39,14 @@
static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
}
+
+static OPUS_INLINE int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
+ return vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)), vpadd_s16(vget_low_s16(b), vget_high_s16(b)));
+}
+
+static OPUS_INLINE int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b) {
+ return vmull_s8(vget_high_s8(a), vget_high_s8(b));
+}
#endif
#ifndef LPCNET_TEST
--
⑨