ref: ea8f175fa4fda3ae8f6e1c0d07d4a831b288d16b
parent: cb20faaaacef7e597d38329b703bb2a7a188fdc1
author: Siarhei Volkau <lis8215@gmail.com>
date: Sat Aug 23 13:22:40 EDT 2025
MIPS: optimize comb_filter for MIPS32r1-r5 MIPS32 since release 1 has support for multiply-accumulate pattern with 32x32=>64 bit data, although in constrast to DSP extension it has only one accumulator register and no builtin functions in GCC. Signed-off-by: Siarhei Volkau <lis8215@gmail.com> Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -54,7 +54,7 @@
#define PACKAGE_VERSION "unknown"
#endif
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips)
#include "mips/celt_mipsr1.h"
#endif
--- a/celt/mips/celt_mipsr1.h
+++ b/celt/mips/celt_mipsr1.h
@@ -36,6 +36,16 @@
#define CELT_C
+#if defined (__mips_dsp) && __mips == 32
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+#endif
+
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@@ -53,8 +63,43 @@
#include "celt_lpc.h"
#include "vq.h"
-#define OVERRIDE_COMB_FILTER_CONST
-#define OVERRIDE_comb_filter
+#if defined (__mips_dsp) && __mips == 32
+
+#define MIPS_MULT __builtin_mips_mult
+#define MIPS_MADD __builtin_mips_madd
+#define MIPS_EXTR __builtin_mips_extr_w
+
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+static inline long long MIPS_MULT(int a, int b) {
+ long long acc;
+
+ asm volatile (
+ "mult %[a], %[b] \n"
+ : [acc] "=x"(acc)
+ : [a] "r"(a), [b] "r"(b)
+ :
+ );
+ return acc;
+}
+
+static inline long long MIPS_MADD(long long acc, int a, int b) {
+ asm volatile (
+ "madd %[a], %[b] \n"
+ : [acc] "+x"(acc)
+ : [a] "r"(a), [b] "r"(b)
+ :
+ );
+ return acc;
+}
+
+static inline opus_val32 MIPS_EXTR(long long acc, int shift) {
+ return (opus_val32)(acc >> shift);
+}
+
+#endif
+
+#if defined (OVERRIDE_comb_filter)
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap, int arch)
@@ -101,13 +146,13 @@
f = MULT16_16_Q15(window[i],window[i]);
x0= x[i-T1+2];
- acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
- acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
- acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
- acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
- acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
- acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
- res = __builtin_mips_extr_w(acc, 15);
+ acc = MIPS_MULT((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
+ acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
+ acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
+ acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
+ acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
+ acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
+ res = MIPS_EXTR(acc, 15);
y[i] = x[i] + res;
@@ -136,10 +181,10 @@
long long acc;
x0=x[i-T1+2];
- acc = __builtin_mips_mult((int)g10, (int)x2);
- acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1));
- acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0));
- res = __builtin_mips_extr_w(acc, 15);
+ acc = MIPS_MULT((int)g10, (int)x2);
+ acc = MIPS_MADD(acc, (int)g11, (int)ADD32(x3,x1));
+ acc = MIPS_MADD(acc, (int)g12, (int)ADD32(x4,x0));
+ res = MIPS_EXTR(acc, 15);
y[i] = x[i] + res;
x4=x3;
@@ -148,5 +193,6 @@
x1=x0;
}
}
+#endif /* OVERRIDE_comb_filter */
#endif /* CELT_MIPSR1_H__ */
--
⑨