shithub: opus

--- a/celt/arm/celt_neon_intr.c

+++ b/celt/arm/celt_neon_intr.c

@@ -97,6 +97,14 @@

 #else

+#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)

+/* If we can, force the compiler to use an FMA instruction rather than break

+ *    vmlaq_f32() into fmul/fadd. */

+#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)

+#endif

/*

  * Function: xcorr_kernel_neon_float

  * ---------------------------------

--- a/celt/arm/pitch_neon_intr.c

+++ b/celt/arm/pitch_neon_intr.c

@@ -130,6 +130,13 @@

 /* ========================================================================== */

+#ifdef __ARM_FEATURE_FMA

+/* If we can, force the compiler to use an FMA instruction rather than break

+   vmlaq_f32() into fmul/fadd. */

+#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)

+#endif

 #ifdef OPUS_CHECK_ASM

 /* This part of code simulates floating-point NEON operations. */

--- a/dnn/vec_neon.h

+++ b/dnn/vec_neon.h

@@ -49,6 +49,12 @@

 #endif

+#ifdef __ARM_FEATURE_FMA

+/* If we can, force the compiler to use an FMA instruction rather than break

+   vmlaq_f32() into fmul/fadd. */

+#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)

+#endif

 #ifndef LPCNET_TEST

 static inline float32x4_t exp4_approx(float32x4_t x) {

   int32x4_t i;

--

⑨