shithub: opus

Download patch

ref: 5c3795b2879108f897d465f36885408d5325b77b
parent: 984f35b313d57280e3e1b108ba3418e7e6232e22
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Nov 22 14:05:52 EST 2023

Adding dotprod instruction to ARM rtcd

Used for DNN matrix multiplies

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -524,6 +524,7 @@
 
     add_sources_group(opus celt ${celt_sources_arm_neon_intr})
     add_sources_group(opus silk ${silk_sources_arm_neon_intr})
+    add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
 
     # silk arm neon depends on main_Fix.h
     target_include_directories(opus PRIVATE silk/fixed)
--- a/Makefile.am
+++ b/Makefile.am
@@ -85,9 +85,21 @@
 if HAVE_RTCD
 CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD)
 SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD)
+if ENABLE_DEEP_PLC
+LPCNET_SOURCES += $(DNN_SOURCES_ARM_RTCD)
 endif
+endif
 
+if ENABLE_DEEP_PLC
+if HAVE_ARM_DOTPROD
+LPCNET_SOURCES += $(DNN_SOURCES_DOTPROD)
+endif
 if HAVE_ARM_NEON_INTR
+LPCNET_SOURCES += $(DNN_SOURCES_NEON)
+endif
+endif
+
+if HAVE_ARM_NEON_INTR
 CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
 SILK_SOURCES += $(SILK_SOURCES_ARM_NEON_INTR)
 endif
@@ -441,4 +453,9 @@
                     $(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
 $(ARM_NEON_INTR_OBJ): CFLAGS += \
  $(OPUS_ARM_NEON_INTR_CFLAGS)  $(NE10_CFLAGS)
+endif
+
+if HAVE_ARM_DOTPROD
+ARM_DOTPROD_OBJ = $(DNN_SOURCES_DOTPROD:.c=.lo)
+$(ARM_DOTPROD_OBJ): CFLAGS += $(ARM_DOTPROD_INTR_CFLAGS)
 endif
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -40,7 +40,8 @@
   celt_inner_prod_c,   /* ARMv4 */
   celt_inner_prod_c,   /* EDSP */
   celt_inner_prod_c,   /* Media */
-  celt_inner_prod_neon /* NEON */
+  celt_inner_prod_neon,/* NEON */
+  celt_inner_prod_neon /* DOTPROD */
 };
 
 void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
@@ -48,7 +49,8 @@
   dual_inner_prod_c,   /* ARMv4 */
   dual_inner_prod_c,   /* EDSP */
   dual_inner_prod_c,   /* Media */
-  dual_inner_prod_neon /* NEON */
+  dual_inner_prod_neon,/* NEON */
+  dual_inner_prod_neon /* DOTPROD */
 };
 # endif
 
@@ -61,7 +63,8 @@
   celt_pitch_xcorr_c,               /* ARMv4 */
   MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
-  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+  MAY_HAVE_NEON(celt_pitch_xcorr),  /* NEON */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* DOTPROD */
 };
 
 #  endif
@@ -72,7 +75,8 @@
   celt_pitch_xcorr_c,              /* ARMv4 */
   celt_pitch_xcorr_c,              /* EDSP */
   celt_pitch_xcorr_c,              /* Media */
-  celt_pitch_xcorr_float_neon      /* Neon */
+  celt_pitch_xcorr_float_neon,     /* Neon */
+  celt_pitch_xcorr_float_neon      /* DOTPROD */
 };
 #  endif
 # endif /* FIXED_POINT */
@@ -90,6 +94,7 @@
   xcorr_kernel_c,                /* EDSP */
   xcorr_kernel_c,                /* Media */
   xcorr_kernel_neon_fixed,       /* Neon */
+  xcorr_kernel_neon_fixed        /* DOTPROD */
 };
 
 #endif
@@ -101,7 +106,8 @@
    opus_fft_alloc_arch_c,        /* ARMv4 */
    opus_fft_alloc_arch_c,        /* EDSP */
    opus_fft_alloc_arch_c,        /* Media */
-   opus_fft_alloc_arm_neon       /* Neon with NE10 library support */
+   opus_fft_alloc_arm_neon,      /* Neon with NE10 library support */
+   opus_fft_alloc_arm_neon       /* DOTPROD with NE10 library support */
 };
 
 void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
@@ -108,7 +114,8 @@
    opus_fft_free_arch_c,         /* ARMv4 */
    opus_fft_free_arch_c,         /* EDSP */
    opus_fft_free_arch_c,         /* Media */
-   opus_fft_free_arm_neon        /* Neon with NE10 */
+   opus_fft_free_arm_neon,       /* Neon with NE10 */
+   opus_fft_free_arm_neon        /* DOTPROD with NE10 */
 };
 #   endif /* CUSTOM_MODES */
 
@@ -118,7 +125,8 @@
    opus_fft_c,                   /* ARMv4 */
    opus_fft_c,                   /* EDSP */
    opus_fft_c,                   /* Media */
-   opus_fft_neon                 /* Neon with NE10 */
+   opus_fft_neon,                /* Neon with NE10 */
+   opus_fft_neon                 /* DOTPROD with NE10 */
 };
 
 void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@@ -127,7 +135,8 @@
    opus_ifft_c,                   /* ARMv4 */
    opus_ifft_c,                   /* EDSP */
    opus_ifft_c,                   /* Media */
-   opus_ifft_neon                 /* Neon with NE10 */
+   opus_ifft_neon,                /* Neon with NE10 */
+   opus_ifft_neon                 /* DOTPROD with NE10 */
 };
 
 void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -139,7 +148,8 @@
    clt_mdct_forward_c,           /* ARMv4 */
    clt_mdct_forward_c,           /* EDSP */
    clt_mdct_forward_c,           /* Media */
-   clt_mdct_forward_neon         /* Neon with NE10 */
+   clt_mdct_forward_neon,        /* Neon with NE10 */
+   clt_mdct_forward_neon         /* DOTPROD with NE10 */
 };
 
 void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -151,7 +161,8 @@
    clt_mdct_backward_c,           /* ARMv4 */
    clt_mdct_backward_c,           /* EDSP */
    clt_mdct_backward_c,           /* Media */
-   clt_mdct_backward_neon         /* Neon with NE10 */
+   clt_mdct_backward_neon,        /* Neon with NE10 */
+   clt_mdct_backward_neon         /* DOTPROD with NE10 */
 };
 
 #  endif /* HAVE_ARM_NE10 */
--- a/celt/arm/armcpu.c
+++ b/celt/arm/armcpu.c
@@ -43,6 +43,7 @@
 #define OPUS_CPU_ARM_EDSP_FLAG  (1<<OPUS_ARCH_ARM_EDSP)
 #define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
 #define OPUS_CPU_ARM_NEON_FLAG  (1<<OPUS_ARCH_ARM_NEON)
+#define OPUS_CPU_ARM_DOTPROD_FLAG  (1<<OPUS_ARCH_ARM_DOTPROD)
 
 #if defined(_MSC_VER)
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@@ -127,6 +128,11 @@
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
           flags |= OPUS_CPU_ARM_NEON_FLAG;
 #  endif
+#  if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+        p = strstr(buf, " asimddp");
+        if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
+          flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+#  endif
       }
 # endif
 
@@ -144,6 +150,13 @@
 # endif
     }
 
+#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
+    flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+    flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
+#endif
+
     fclose(cpuinfo);
   }
   return flags;
@@ -180,7 +193,13 @@
   }
   arch++;
 
-  celt_assert(arch == OPUS_ARCH_ARM_NEON);
+  if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_NEON);
+    return arch;
+  }
+  arch++;
+
+  celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
   return arch;
 }
 
--- a/celt/arm/armcpu.h
+++ b/celt/arm/armcpu.h
@@ -46,6 +46,12 @@
 #  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
 # endif
 
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+#  define MAY_HAVE_DOTPROD(name) name ## _dotprod
+# else
+#  define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
+# endif
+
 # if defined(OPUS_ARM_PRESUME_EDSP)
 #  define PRESUME_EDSP(name) name ## _edsp
 # else
@@ -64,6 +70,12 @@
 #  define PRESUME_NEON(name) PRESUME_MEDIA(name)
 # endif
 
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+#  define PRESUME_DOTPROD(name) name ## _dotprod
+# else
+#  define PRESUME_DOTPROD(name) PRESUME_NEON(name)
+# endif
+
 # if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);
 
@@ -71,6 +83,7 @@
 #define OPUS_ARCH_ARM_EDSP  (1)
 #define OPUS_ARCH_ARM_MEDIA (2)
 #define OPUS_ARCH_ARM_NEON  (3)
+#define OPUS_ARCH_ARM_DOTPROD  (4)
 
 # endif
 
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -35,13 +35,14 @@
   (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
 #include "arm/armcpu.h"
 
-/* We currently support 4 ARM variants:
+/* We currently support 5 ARM variants:
  * arch[0] -> ARMv4
  * arch[1] -> ARMv5E
  * arch[2] -> ARMv6
  * arch[3] -> NEON
+ * arch[4] -> NEON+DOTPROD
  */
-#define OPUS_ARCHMASK 3
+#define OPUS_ARCHMASK 7
 
 #elif defined(OPUS_HAVE_RTCD) && \
   ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
--- a/cmake/OpusSources.cmake
+++ b/cmake/OpusSources.cmake
@@ -48,6 +48,8 @@
 get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
 get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
 get_opus_sources(DNN_SOURCES_AVX2 lpcnet_sources.mk dnn_sources_avx2)
+get_opus_sources(DNN_SOURCES_NEON lpcnet_sources.mk dnn_sources_arm_neon)
+get_opus_sources(DNN_SOURCES_DOTPROD lpcnet_sources.mk dnn_sources_arm_dotprod)
 
 get_opus_sources(opus_demo_SOURCES Makefile.am opus_demo_sources)
 get_opus_sources(opus_custom_demo_SOURCES Makefile.am opus_custom_demo_sources)
--- a/configure.ac
+++ b/configure.ac
@@ -334,6 +334,18 @@
                     )
                 ])
                 AC_SUBST(OPUS_ARM_MAY_HAVE_NEON)
+                AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],[
+                    AC_DEFINE(OPUS_ARM_MAY_HAVE_DOTPROD, 1,
+                      [Define if compiler supports DOTPROD instructions])
+                    AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], [
+                        AC_DEFINE(OPUS_ARM_PRESUME_DOTPROD, 1,
+                          [Define if binary requires DOTPROD instruction support])
+                        asm_optimization="$asm_optimization (DOTPROD)"
+                    ],
+                        [rtcd_support="$rtcd_support (DOTPROD)"]
+                    )
+                ])
+                AC_SUBST(OPUS_ARM_MAY_HAVE_DOTPROD)
                 dnl Make sure turning on RTCD gets us at least one
                 dnl instruction set.
                 AS_IF([test x"$rtcd_support" != x""],
@@ -377,6 +389,7 @@
 m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
 m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -mavx2])
 m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
+m4_define([DEFAULT_ARM_DOTPROD_INTR_CFLAGS], ["-march=armv8.2-a+dotprod"])
 # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
 # -mfloat-abi=softfp for -mfpu=neon to work.  However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
 # this option will break things.
@@ -394,6 +407,7 @@
 AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
 AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
 AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
+AC_ARG_VAR([ARM_DOTPROD_INTR_CFLAGS], [C compiler flags to compile ARM DOTPROD intrinsics @<:@default=]DEFAULT_ARM_DOTPROD_INTR_CFLAGS[@:>@])
 
 AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
 AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
@@ -400,6 +414,7 @@
 AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
 AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
 AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
+AS_VAR_SET_IF([ARM_DOTPROD_INTR_CFLAGS], [], [AS_VAR_SET([ARM_DOTPROD_INTR_CFLAGS], ["DEFAULT_ARM_DOTPROD_INTR_CFLAGS"])])
 
 AC_DEFUN([OPUS_PATH_NE10],
    [
@@ -543,6 +558,46 @@
             intrinsics_support="$intrinsics_support (NEON [Aarch64])"
          ])
 
+         OPUS_CHECK_INTRINSICS(
+            [Aarch64 dotprod],
+	    [$ARM_DOTPROD_INTR_CFLAGS],
+            [OPUS_ARM_MAY_HAVE_DOTPROD],
+            [OPUS_ARM_PRESUME_DOTPROD],
+            [[#include <arm_neon.h>
+            ]],
+            [[
+               static int32x4_t acc;
+               static int8x16_t a, b;
+               acc = vdotq_s32(acc, a, b);
+            ]]
+         )
+         AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
+             [
+                OPUS_ARM_DOTPROD_INTR_CFLAGS="$ARM_NEON_DOTPROD_CFLAGS"
+                AC_SUBST([OPUS_ARM_DOTPROD_INTR_CFLAGS])
+             ]
+         )
+
+         AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],
+             [
+                AC_DEFINE([OPUS_ARM_MAY_HAVE_DOTPROD], 1, [Compiler supports Aarch64 DOTPROD Intrinsics])
+                intrinsics_support="$intrinsics_support (DOTPROD)"
+
+                AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"],
+                [
+                   AC_DEFINE([OPUS_ARM_PRESUME_DOTPROD], 1, [Define if binary requires Aarch64 dotprod Intrinsics])
+                   intrinsics_support="$intrinsics_support (DOTPROD [Aarch64])"
+                ])
+
+                AS_IF([test x"$enable_rtcd" != x"no" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
+                   [AS_IF([test x"$rtcd_support" = x"no"],
+                      [rtcd_support="ARM (DOTPROD Intrinsics)"],
+                      [rtcd_support="$rtcd_support (DOTPROD Intrinsics)"])])
+
+             ]
+         )
+
+
          AS_IF([test x"$intrinsics_support" = x""],
             [intrinsics_support=no],
             [intrinsics_support="ARM$intrinsics_support"])
@@ -760,6 +815,8 @@
 ])
 
 AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
+AM_CONDITIONAL([HAVE_ARM_DOTPROD],
+    [test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"])
 AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
     [test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
 AM_CONDITIONAL([HAVE_ARM_NE10],
--- /dev/null
+++ b/dnn/arm/arm_dnn_map.c
@@ -1,0 +1,54 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arm/armcpu.h"
+#include "nnet.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
+
+void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+         const LinearLayer *linear,
+         float *out,
+         const float *in
+) = {
+  compute_linear_c,                /* default */
+  compute_linear_c,
+  compute_linear_c,
+  MAY_HAVE_NEON(compute_linear),   /* neon  */
+  MAY_HAVE_DOTPROD(compute_linear) /* dotprod  */
+};
+
+#endif
+
+
+#endif
--- /dev/null
+++ b/dnn/arm/dnn_arm.h
@@ -1,0 +1,64 @@
+/* Copyright (c) 2011-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DNN_ARM_H
+#define DNN_ARM_H
+
+#include "cpu_support.h"
+#include "opus_types.h"
+
+void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
+void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
+
+
+#if defined(OPUS_ARM_PRESUME_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
+
+#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+                    const LinearLayer *linear,
+                    float *out,
+                    const float *in
+                    );
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) \
+    ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
+
+
+#endif
+
+
+
+#endif /* DNN_ARM_H */
--- /dev/null
+++ b/dnn/arm/nnet_dotprod.c
@@ -1,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __ARM_FEATURE_DOTPROD
+#error nnet_dotprod.c is being compiled without DOTPROD enabled
+#endif
+
+#define RTCD_ARCH dotprod
+
+#include "nnet_arch.h"
--- /dev/null
+++ b/dnn/arm/nnet_neon.c
@@ -1,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+                 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
+#error nnet_neon.c is being compiled without Neon enabled
+#endif
+
+#define RTCD_ARCH neon
+
+#include "nnet_arch.h"
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -191,6 +191,10 @@
 void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
 
 
+#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#include "arm/dnn_arm.h"
+#endif
+
 #if defined(OPUS_X86_MAY_HAVE_SSE2)
 #include "x86/dnn_x86.h"
 #endif
--- a/lpcnet_headers.mk
+++ b/lpcnet_headers.mk
@@ -14,7 +14,8 @@
 dnn/pitchdnn.h \
 dnn/pitchdnn_data.h \
 dnn/x86/dnn_x86.h \
-dnn/nnet_arch.h
+dnn/nnet_arch.h \
+dnn/arm/dnn_arm.h
 
 DRED_HEAD = \
 silk/dred_coding.h \
--- a/lpcnet_sources.mk
+++ b/lpcnet_sources.mk
@@ -27,3 +27,7 @@
 DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
 DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
 DNN_SOURCES_SSE2 = dnn/x86/nnet_sse2.c
+
+DNN_SOURCES_ARM_RTCD = dnn/arm/arm_dnn_map.c
+DNN_SOURCES_DOTPROD = dnn/arm/nnet_dotprod.c
+DNN_SOURCES_NEON = dnn/arm/nnet_neon.c
--