shithub: opus

Download patch

ref: cc11c078cd8e1baf642ef0f1d2deaa98af596581
parent: c9af8f80f7976a7694c710f1426d816a67364a56
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Nov 25 22:36:46 EST 2023

First step towards DNN optimization for ARMv7 Neon

Still missing some intrinsics

--- a/Makefile.am
+++ b/Makefile.am
@@ -450,6 +450,7 @@
 if HAVE_ARM_NEON_INTR
 ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
                     $(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
+                    $(DNN_SOURCES_NEON:.c=.lo) \
                     $(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
 $(ARM_NEON_INTR_OBJ): CFLAGS += \
  $(OPUS_ARM_NEON_INTR_CFLAGS)  $(NE10_CFLAGS)
--- a/dnn/vec_neon.h
+++ b/dnn/vec_neon.h
@@ -34,6 +34,13 @@
 #include <arm_neon.h>
 #include "os_support.h"
 
+#if defined(__arm__) && !defined(__aarch64__)
+/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */
+static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
+  return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
+}
+#endif
+
 #ifndef LPCNET_TEST
 static inline float32x4_t exp4_approx(float32x4_t x) {
   int32x4_t i;
--