shithub: opus

Download patch

ref: c143b72c4c321b5907cbe839079efd3b2455ac9a
parent: ee1bb69f2d7c086df72a01632b0563c92896e8b1
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Mon Nov 27 13:08:20 EST 2023

Enabling DNN optimizations for ARMv7

Adds RTCD tables for compute_activation() and compute_conv2d()

--- a/dnn/arm/arm_dnn_map.c
+++ b/dnn/arm/arm_dnn_map.c
@@ -50,5 +50,39 @@
 
 #endif
 
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
+
+void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+         float *output,
+         const float *input,
+         int N,
+         int activation
+) = {
+    compute_activation_c,                /* default */
+    compute_activation_c,
+    compute_activation_c,
+    MAY_HAVE_NEON(compute_activation),   /* neon  */
+    MAY_HAVE_DOTPROD(compute_activation) /* dotprod  */
+};
+
+void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+         const Conv2dLayer *conv,
+         float *out,
+         float *mem,
+         const float *in,
+         int height,
+         int hstride,
+         int activation
+) = {
+    compute_conv2d_c,                /* default */
+    compute_conv2d_c,
+    compute_conv2d_c,
+    MAY_HAVE_NEON(compute_conv2d),   /* neon  */
+    MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod  */
+};
+
+
+#endif
+
 
 #endif
--- a/dnn/arm/dnn_arm.h
+++ b/dnn/arm/dnn_arm.h
@@ -34,7 +34,12 @@
 void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
 void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
 
+void compute_activation_neon(float *output, const float *input, int N, int activation);
+void compute_activation_dotprod(float *output, const float *input, int N, int activation);
 
+void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+
 #if defined(OPUS_ARM_PRESUME_DOTPROD)
 
 #define OVERRIDE_COMPUTE_LINEAR
@@ -59,6 +64,41 @@
 
 #endif
 
+#if defined(OPUS_ARM_PRESUME_NEON)
+
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+                    float *output,
+                    const float *input,
+                    int N,
+                    int activation
+                    );
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) \
+    ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
+
+
+extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+                    const Conv2dLayer *conv,
+                    float *out,
+                    float *mem,
+                    const float *in,
+                    int height,
+                    int hstride,
+                    int activation
+                    );
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
+    ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
+
+
+#endif
 
 
 #endif /* DNN_ARM_H */
--