ref: c143b72c4c321b5907cbe839079efd3b2455ac9a
parent: ee1bb69f2d7c086df72a01632b0563c92896e8b1
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Mon Nov 27 13:08:20 EST 2023
Enabling DNN optimizations for ARMv7 Adds RTCD tables for compute_activation() and compute_conv2d()
--- a/dnn/arm/arm_dnn_map.c
+++ b/dnn/arm/arm_dnn_map.c
@@ -50,5 +50,39 @@
#endif
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
+
+void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+ float *output,
+ const float *input,
+ int N,
+ int activation
+) = {
+ compute_activation_c, /* default */
+ compute_activation_c,
+ compute_activation_c,
+ MAY_HAVE_NEON(compute_activation), /* neon */
+ MAY_HAVE_DOTPROD(compute_activation) /* dotprod */
+};
+
+void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+ const Conv2dLayer *conv,
+ float *out,
+ float *mem,
+ const float *in,
+ int height,
+ int hstride,
+ int activation
+) = {
+ compute_conv2d_c, /* default */
+ compute_conv2d_c,
+ compute_conv2d_c,
+ MAY_HAVE_NEON(compute_conv2d), /* neon */
+ MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */
+};
+
+
+#endif
+
#endif
--- a/dnn/arm/dnn_arm.h
+++ b/dnn/arm/dnn_arm.h
@@ -34,7 +34,12 @@
void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
+void compute_activation_neon(float *output, const float *input, int N, int activation);
+void compute_activation_dotprod(float *output, const float *input, int N, int activation);
+void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+
#if defined(OPUS_ARM_PRESUME_DOTPROD)
#define OVERRIDE_COMPUTE_LINEAR
@@ -59,6 +64,41 @@
#endif
+#if defined(OPUS_ARM_PRESUME_NEON)
+
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+ float *output,
+ const float *input,
+ int N,
+ int activation
+ );
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) \
+ ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
+
+
+extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+ const Conv2dLayer *conv,
+ float *out,
+ float *mem,
+ const float *in,
+ int height,
+ int hstride,
+ int activation
+ );
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
+ ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
+
+
+#endif
#endif /* DNN_ARM_H */
--
⑨