ref: ab9082c00bf02763d47bab09698b37e4ef32d12f
parent: ea8f175fa4fda3ae8f6e1c0d07d4a831b288d16b
author: Siarhei Volkau <lis8215@gmail.com>
date: Sat Aug 23 13:26:29 EDT 2025
MIPS: unroll fft_downshift loops for performance Simple loop with shift by 1 unrolled 2 times. More complex loop unrolled 4 times. Signed-off-by: Siarhei Volkau <lis8215@gmail.com> Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -535,6 +535,7 @@
#endif /* CUSTOM_MODES */
#ifdef FIXED_POINT
+#ifndef OVERRIDE_fft_downshift
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
int shift;
shift = IMIN(step, *total);
@@ -553,6 +554,7 @@
}
}
}
+#endif /* OVERRIDE_fft_downshift */
#else
#define fft_downshift(x, N, total, step)
#endif
--- a/celt/mips/kiss_fft_mipsr1.h
+++ b/celt/mips/kiss_fft_mipsr1.h
@@ -213,4 +213,59 @@
#endif /* defined(OVERRIDE_kf_bfly5) */
+#define OVERRIDE_fft_downshift
+/* Just unroll tight loop, should be ok for any mips */
+static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
+ int shift;
+ shift = IMIN(step, *total);
+ *total -= shift;
+ if (shift == 1) {
+ int i;
+ for (i = 0; i < N - 1; i += 2) {
+ x[i].r = SHR32(x[i].r, 1);
+ x[i].i = SHR32(x[i].i, 1);
+ x[i+1].r = SHR32(x[i+1].r, 1);
+ x[i+1].i = SHR32(x[i+1].i, 1);
+ }
+ if (N & 1) {
+ x[i].r = SHR32(x[i].r, 1);
+ x[i].i = SHR32(x[i].i, 1);
+ }
+ } else if (shift > 0) {
+ int i;
+ for (i = 0; i < N - 3; i += 4) {
+ x[i].r = PSHR32(x[i].r, shift);
+ x[i].i = PSHR32(x[i].i, shift);
+ x[i+1].r = PSHR32(x[i+1].r, shift);
+ x[i+1].i = PSHR32(x[i+1].i, shift);
+ x[i+2].r = PSHR32(x[i+2].r, shift);
+ x[i+2].i = PSHR32(x[i+2].i, shift);
+ x[i+3].r = PSHR32(x[i+3].r, shift);
+ x[i+3].i = PSHR32(x[i+3].i, shift);
+ }
+ switch (N & 3) {
+ case 3:
+ x[i].r = PSHR32(x[i].r, shift);
+ x[i].i = PSHR32(x[i].i, shift);
+ x[i+1].r = PSHR32(x[i+1].r, shift);
+ x[i+1].i = PSHR32(x[i+1].i, shift);
+ x[i+2].r = PSHR32(x[i+2].r, shift);
+ x[i+2].i = PSHR32(x[i+2].i, shift);
+ break;
+ case 2:
+ x[i].r = PSHR32(x[i].r, shift);
+ x[i].i = PSHR32(x[i].i, shift);
+ x[i+1].r = PSHR32(x[i+1].r, shift);
+ x[i+1].i = PSHR32(x[i+1].i, shift);
+ break;
+ case 1:
+ x[i].r = PSHR32(x[i].r, shift);
+ x[i].i = PSHR32(x[i].i, shift);
+ break;
+ case 0:
+ break;
+ }
+ }
+}
+
#endif /* KISS_FFT_MIPSR1_H */
--
⑨