ref: cc11c078cd8e1baf642ef0f1d2deaa98af596581
parent: c9af8f80f7976a7694c710f1426d816a67364a56
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Nov 25 22:36:46 EST 2023
First step towards DNN optimization for ARMv7 Neon Still missing some intrinsics
--- a/Makefile.am
+++ b/Makefile.am
@@ -450,6 +450,7 @@
if HAVE_ARM_NEON_INTR
ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
$(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
+ $(DNN_SOURCES_NEON:.c=.lo) \
$(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
$(ARM_NEON_INTR_OBJ): CFLAGS += \
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
--- a/dnn/vec_neon.h
+++ b/dnn/vec_neon.h
@@ -34,6 +34,13 @@
#include <arm_neon.h>
#include "os_support.h"
+#if defined(__arm__) && !defined(__aarch64__)
+/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */
+static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
+ return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
+}
+#endif
+
#ifndef LPCNET_TEST
static inline float32x4_t exp4_approx(float32x4_t x) {
int32x4_t i;
--
⑨