ref: 528d5005f13cfefab9c73d3423b9e4cb6316662d
parent: d3bfd6c0e4da61ea0d423ef1837a098190944c1d
author: knik <knik>
date: Wed Aug 21 12:52:25 EDT 2002
new simplier and faster fft routine and correct real fft new real fft is just a complex fft wrapper so it is slower than optimal but by surprise it seems to be at least as fast as the old buggy function
--- a/libfaac/fft.c
+++ b/libfaac/fft.c
@@ -1,6 +1,7 @@
/*
* FAAC - Freeware Advanced Audio Coder
- * Copyright (C) 2001 Menno Bakker
+ * $Id: fft.c,v 1.7 2002/08/21 16:52:25 knik Exp $
+ * Copyright (C) 2002 Krzysztof Nikiel
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -16,430 +17,192 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * $Id: fft.c,v 1.6 2002/08/07 18:10:39 knik Exp $
*/
#include <math.h>
#include <stdlib.h>
+#include <stdio.h>
#include "fft.h"
#include "util.h"
-#define MAXLOGM 11 /* max FFT length = 2^MAXLOGM */
-#define TWOPI 6.28318530717958647692
-#define SQHALF 0.707106781186547524401
+#define MAXLOGM 11
-static double *tabr[MAXLOGM]; /* tables of butterfly angles */
+typedef float fftfloat;
-static void build_table(int logm)
+static fftfloat *costbl[MAXLOGM + 1] = {NULL}; // size/2
+static fftfloat *negsintbl[MAXLOGM + 1] = {NULL}; // size/2
+static unsigned short *reordertbl[MAXLOGM + 1] = {NULL}; //size
+
+static void reorder(double *x, int logm)
{
- static int m, m2, m4, m8, nel, n;
- static double *cn, *spcn, *smcn;
- static double ang, c, s;
+ int i;
+ int size = 1 << logm;
+ unsigned short *r; //size
- /* Compute a few constants */
- m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
- /* Allocate memory for tables */
- nel = m4 - 2;
- tabr[logm-4] = (double *)AllocMemory(3 * nel * sizeof(double));
-/*
- if ((tab[logm-4] = (double *) AllocMemory(3 * nel * sizeof(double))) == NULL) {
- printf("Error : RSFFT : not enough memory for cosine tables.\n");
- error_exit();
- }
-*/
+ if (!reordertbl[logm])
+ // create bit reversing table
+ {
+ reordertbl[logm] = AllocMemory(size * sizeof(*(reordertbl[0])));
- /* Initialize pointers */
- cn = tabr[logm-4]; spcn = cn + nel; smcn = spcn + nel;
+ for (i = 0; i < size; i++)
+ {
+ int reversed = 0;
+ int b0;
+ int tmp = i;
- /* Compute tables */
- for (n = 1; n < m4; n++) {
- if (n == m8) continue;
- ang = n * TWOPI / m;
- c = cos(ang); s = sin(ang);
- *cn++ = c; *spcn++ = - (s + c); *smcn++ = s - c;
- }
-}
-
-/*--------------------------------------------------------------------*
- * Recursive part of the RSFFT algorithm. Not externally *
- * callable. *
- *--------------------------------------------------------------------*/
-
-static void rsrec(double *x, int logm)
-{
- static int m, m2, m4, m8, nel, n;
- static double *xr1, *xr2, *xi1;
- static double *cn, *spcn, *smcn;
- static double tmp1, tmp2;
-
- /* Compute trivial cases */
- if (logm < 2) {
- if (logm == 1) { /* length m = 2 */
- xr2 = x + 1;
- tmp1 = *x + *xr2;
- *xr2 = *x - *xr2;
- *x = tmp1;
- return;
+ for (b0 = 0; b0 < logm; b0++)
+ {
+ reversed = (reversed << 1) | (tmp & 1);
+ tmp >>= 1;
}
- else if (logm == 0) return; /* length m = 1 */
- }
+ reordertbl[logm][i] = reversed;
+ }
+ }
- /* Compute a few constants */
- m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
+ r = reordertbl[logm];
- /* Build tables of butterfly coefficients, if necessary */
- if ((logm >= 4) && (tabr[logm-4] == NULL))
- build_table(logm);
+ for (i = 0; i < size; i++)
+ {
+ int j = r[i];
+ double tmp;
- /* Step 1 */
- xr1 = x; xr2 = xr1 + m2;
- for (n = 0; n < m2; n++) {
- tmp1 = *xr1 + *xr2;
- *xr2 = *xr1 - *xr2;
- *xr1 = tmp1;
- xr1++; xr2++;
- }
+ if (j <= i)
+ continue;
- /* Step 2 */
- xr1 = x + m2 + m4;
- for (n = 0; n < m4; n++) {
- *xr1 = - *xr1;
- xr1++;
- }
+ tmp = x[i];
+ x[i] = x[j];
+ x[j] = tmp;
+ }
+}
- /* Step 3: multiplication by W_M^n */
- xr1 = x + m2; xi1 = xr1 + m4;
- if (logm >= 4) {
- nel = m4 - 2;
- cn = tabr[logm-4]; spcn = cn + nel; smcn = spcn + nel;
- }
- xr1++; xi1++;
- for (n = 1; n < m4; n++) {
- if (n == m8) {
- tmp1 = SQHALF * (*xr1 + *xi1);
- *xi1 = SQHALF * (*xi1 - *xr1);
- *xr1 = tmp1;
- } else {
- tmp2 = *cn++ * (*xr1 + *xi1);
- tmp1 = *spcn++ * *xr1 + tmp2;
- *xr1 = *smcn++ * *xi1 + tmp2;
- *xi1 = tmp1;
- }
- xr1++; xi1++;
- }
+static void fft_proc(double *xr, double *xi,
+ fftfloat *refac, fftfloat *imfac, int size)
+{
+ int step, shift, pos;
+ int exp, estep;
- /* Call rsrec again with half DFT length */
- rsrec(x, logm-1);
+ estep = size;
+ for (step = 1; step < size; step *= 2)
+ {
+ int x1;
+ int x2 = 0;
+ estep >>= 1;
+ for (pos = 0; pos < size; pos += (2 * step))
+ {
+ x1 = x2;
+ x2 += step;
+ exp = 0;
+ for (shift = 0; shift < step; shift++)
+ {
+ double v2r, v2i;
- /* Step 4: Call complex DFT routine, with quarter DFT length.
- Constants have to be recomputed, because they are static! */
- m = 1 << logm; m2 = m / 2; m4 = 3 * (m / 4);
- srrec(x + m2, x + m4, logm-2);
+ v2r = xr[x2] * refac[exp] - xi[x2] * imfac[exp];
+ v2i = xr[x2] * imfac[exp] + xi[x2] * refac[exp];
- /* Step 5: sign change & data reordering */
- m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 / 2;
- xr1 = x + m2 + m4;
- xr2 = x + m - 1;
- for (n = 0; n < m8; n++) {
- tmp1 = *xr1;
- *xr1++ = - *xr2;
- *xr2-- = - tmp1;
- }
- xr1 = x + m2 + 1;
- xr2 = x + m - 2;
- for (n = 0; n < m8; n++) {
- tmp1 = *xr1;
- *xr1++ = - *xr2;
- *xr2-- = tmp1;
- xr1++;
- xr2--;
- }
- if (logm == 2) x[3] = -x[3];
-}
+ xr[x2] = xr[x1] - v2r;
+ xr[x1] += v2r;
-/*--------------------------------------------------------------------*
- * Direct transform for real inputs *
- *--------------------------------------------------------------------*/
+ xi[x2] = xi[x1] - v2i;
-/*--------------------------------------------------------------------*
- * Data unshuffling according to bit-reversed indexing. *
- * *
- * Bit reversal is done using Evan's algorithm (Ref: D. M. W. *
- * Evans, "An improved digit-reversal permutation algorithm ...", *
- * IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125). *
- *--------------------------------------------------------------------*/
-static int brseed[256]; /* Evans' seed table */
-static int brsflg; /* flag for table building */
+ xi[x1] += v2i;
-static void BR_permute(double *x, int logm)
-{
- int i, j, imax, lg2, n;
- int off, fj, gno, *brp;
- double tmp, *xp, *xq;
+ exp += estep;
- lg2 = logm >> 1;
- n = 1 << lg2;
- if (logm & 1) lg2++;
-
- /* Create seed table if not yet built */
- if (brsflg != logm) {
- brsflg = logm;
- brseed[0] = 0;
- brseed[1] = 1;
- for (j = 2; j <= lg2; j++) {
- imax = 1 << (j - 1);
- for (i = 0; i < imax; i++) {
- brseed[i] <<= 1;
- brseed[i + imax] = brseed[i] + 1;
- }
+ x1++;
+ x2++;
}
- }
-
- /* Unshuffling loop */
- for (off = 1; off < n; off++) {
- fj = n * brseed[off]; i = off; j = fj;
- tmp = x[i]; x[i] = x[j]; x[j] = tmp;
- xp = &x[i];
- brp = &brseed[1];
- for (gno = 1; gno < brseed[off]; gno++) {
- xp += n;
- j = fj + *brp++;
- xq = x + j;
- tmp = *xp; *xp = *xq; *xq = tmp;
- }
- }
+ }
+ }
}
-/*--------------------------------------------------------------------*
- * Recursive part of the SRFFT algorithm. *
- *--------------------------------------------------------------------*/
-
-static void srrec(double *xr, double *xi, int logm)
+static void check_tables(int logm)
{
- static int m, m2, m4, m8, nel, n;
- static double *xr1, *xr2, *xi1, *xi2;
- static double *cn, *spcn, *smcn, *c3n, *spc3n, *smc3n;
- static double tmp1, tmp2, ang, c, s;
- static double *tab[MAXLOGM];
- /* Compute trivial cases */
- if (logm < 3) {
- if (logm == 2) { /* length m = 4 */
- xr2 = xr + 2;
- xi2 = xi + 2;
- tmp1 = *xr + *xr2;
- *xr2 = *xr - *xr2;
- *xr = tmp1;
- tmp1 = *xi + *xi2;
- *xi2 = *xi - *xi2;
- *xi = tmp1;
- xr1 = xr + 1;
- xi1 = xi + 1;
- xr2++;
- xi2++;
- tmp1 = *xr1 + *xr2;
- *xr2 = *xr1 - *xr2;
- *xr1 = tmp1;
- tmp1 = *xi1 + *xi2;
- *xi2 = *xi1 - *xi2;
- *xi1 = tmp1;
- xr2 = xr + 1;
- xi2 = xi + 1;
- tmp1 = *xr + *xr2;
- *xr2 = *xr - *xr2;
- *xr = tmp1;
- tmp1 = *xi + *xi2;
- *xi2 = *xi - *xi2;
- *xi = tmp1;
- xr1 = xr + 2;
- xi1 = xi + 2;
- xr2 = xr + 3;
- xi2 = xi + 3;
- tmp1 = *xr1 + *xi2;
- tmp2 = *xi1 + *xr2;
- *xi1 = *xi1 - *xr2;
- *xr2 = *xr1 - *xi2;
- *xr1 = tmp1;
- *xi2 = tmp2;
- return;
- }
- else if (logm == 1) { /* length m = 2 */
- xr2 = xr + 1;
- xi2 = xi + 1;
- tmp1 = *xr + *xr2;
- *xr2 = *xr - *xr2;
- *xr = tmp1;
- tmp1 = *xi + *xi2;
- *xi2 = *xi - *xi2;
- *xi = tmp1;
- return;
- }
- else if (logm == 0) return; /* length m = 1 */
- }
+ if (!(costbl[logm]))
+ {
+ int i;
+ int size = 1 << logm;
- /* Compute a few constants */
- m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
+ if (negsintbl[logm])
+ FreeMemory(negsintbl[logm]);
- /* Build tables of butterfly coefficients, if necessary */
- if ((logm >= 4) && (tab[logm-4] == NULL)) {
+ costbl[logm] = AllocMemory((size / 2) * sizeof(*(costbl[0])));
+ negsintbl[logm] = AllocMemory((size / 2) * sizeof(*(negsintbl[0])));
- /* Allocate memory for tables */
- nel = m4 - 2;
- tab[logm-4] = (double *)AllocMemory(6 * nel * sizeof(double));
-/*
- if ((tab[logm-4] = (double *)AllocMemory(6 * nel * sizeof(double))) == NULL) {
- error_exit();
- }
-*/
+ for (i = 0; i < (size >> 1); i++)
+ {
+ double theta = 2.0 * M_PI * ((double) i) / (double) size;
+ costbl[logm][i] = cos(theta);
+ negsintbl[logm][i] = -sin(theta);
+ }
+ }
+}
- /* Initialize pointers */
- cn = tab[logm-4]; spcn = cn + nel; smcn = spcn + nel;
- c3n = smcn + nel; spc3n = c3n + nel; smc3n = spc3n + nel;
+void fft(double *xr, double *xi, int logm)
+{
+ if (logm > MAXLOGM)
+ {
+ fprintf(stderr, "fft size too big\n");
+ exit(1);
+ }
- /* Compute tables */
- for (n = 1; n < m4; n++) {
- if (n == m8) continue;
- ang = n * TWOPI / m;
- c = cos(ang); s = sin(ang);
- *cn++ = c; *spcn++ = - (s + c); *smcn++ = s - c;
- ang = 3 * n * TWOPI / m;
- c = cos(ang); s = sin(ang);
- *c3n++ = c; *spc3n++ = - (s + c); *smc3n++ = s - c;
- }
- }
+ if (logm < 1)
+ {
+ //printf("logm < 1\n");
+ return;
+ }
- /* Step 1 */
- xr1 = xr; xr2 = xr1 + m2;
- xi1 = xi; xi2 = xi1 + m2;
- for (n = 0; n < m2; n++) {
- tmp1 = *xr1 + *xr2;
- *xr2 = *xr1 - *xr2;
- *xr1 = tmp1;
- tmp2 = *xi1 + *xi2;
- *xi2 = *xi1 - *xi2;
- *xi1 = tmp2;
- xr1++; xr2++; xi1++; xi2++;
- }
+ check_tables(logm);
- /* Step 2 */
- xr1 = xr + m2; xr2 = xr1 + m4;
- xi1 = xi + m2; xi2 = xi1 + m4;
- for (n = 0; n < m4; n++) {
- tmp1 = *xr1 + *xi2;
- tmp2 = *xi1 + *xr2;
- *xi1 = *xi1 - *xr2;
- *xr2 = *xr1 - *xi2;
- *xr1 = tmp1;
- *xi2 = tmp2;
- xr1++; xr2++; xi1++; xi2++;
- }
+ reorder(xr, logm);
+ reorder(xi, logm);
- /* Steps 3 & 4 */
- xr1 = xr + m2; xr2 = xr1 + m4;
- xi1 = xi + m2; xi2 = xi1 + m4;
- if (logm >= 4) {
- nel = m4 - 2;
- cn = tab[logm-4]; spcn = cn + nel; smcn = spcn + nel;
- c3n = smcn + nel; spc3n = c3n + nel; smc3n = spc3n + nel;
- }
- xr1++; xr2++; xi1++; xi2++;
- for (n = 1; n < m4; n++) {
- if (n == m8) {
- tmp1 = SQHALF * (*xr1 + *xi1);
- *xi1 = SQHALF * (*xi1 - *xr1);
- *xr1 = tmp1;
- tmp2 = SQHALF * (*xi2 - *xr2);
- *xi2 = -SQHALF * (*xr2 + *xi2);
- *xr2 = tmp2;
- } else {
- tmp2 = *cn++ * (*xr1 + *xi1);
- tmp1 = *spcn++ * *xr1 + tmp2;
- *xr1 = *smcn++ * *xi1 + tmp2;
- *xi1 = tmp1;
- tmp2 = *c3n++ * (*xr2 + *xi2);
- tmp1 = *spc3n++ * *xr2 + tmp2;
- *xr2 = *smc3n++ * *xi2 + tmp2;
- *xi2 = tmp1;
- }
- xr1++; xr2++; xi1++; xi2++;
- }
-
- /* Call ssrec again with half DFT length */
- srrec(xr, xi, logm-1);
-
- /* Call ssrec again twice with one quarter DFT length.
- Constants have to be recomputed, because they are static! */
- m = 1 << logm; m2 = m / 2;
- srrec(xr + m2, xi + m2, logm-2);
- m = 1 << logm; m4 = 3 * (m / 4);
- srrec(xr + m4, xi + m4, logm-2);
+ fft_proc(xr, xi, costbl[logm], negsintbl[logm], 1 << logm);
}
-/*--------------------------------------------------------------------*
- * Direct transform *
- *--------------------------------------------------------------------*/
-void srfft(double *xr, double *xi, int logm)
+void rfft(double *x, int logm)
{
- /* Call recursive routine */
- srrec(xr, xi, logm);
+ static double xi[1 << MAXLOGM];
- /* Output array unshuffling using bit-reversed indices */
- if (logm > 1) {
- BR_permute(xr, logm);
- BR_permute(xi, logm);
+ if (logm > MAXLOGM)
+ {
+ fprintf(stderr, "fft size too big\n");
+ exit(1);
}
-}
-/*--------------------------------------------------------------------*
- * Inverse transform. Uses Duhamel's trick (Ref: P. Duhamel *
- * et. al., "On computing the inverse DFT", IEEE Trans. ASSP, *
- * Feb. 1988, pp. 285-286). *
- *--------------------------------------------------------------------*/
-void srifft(double *xr, double *xi, int logm)
-{
- int i, m;
- double fac, *xrp, *xip;
+ memset(xi, 0, (1 << logm) * sizeof(xi[0]));
- /* Call direct FFT, swapping real & imaginary addresses */
- srfft(xi, xr, logm);
+ fft(x, xi, logm);
- /* Normalization */
- m = 1 << logm;
- fac = 1.0 / m;
- xrp = xr; xip = xi;
- for (i = 0; i < m; i++) {
- *xrp++ *= fac;
- *xip++ *= fac;
- }
+ memcpy(x + (1 << (logm - 1)), xi, (1 << (logm - 1)) * sizeof(*x));
}
-void rsfft(double *x, int logm)
+void ffti(double *xr, double *xi, int logm)
{
- int i;
- int size;
+ int i, size;
+ double fac;
+ double *xrp, *xip;
- /* Call recursive routine */
- rsrec(x, logm);
+ fft(xi, xr, logm);
- /* Output array unshuffling using bit-reversed indices */
- if (logm > 1) {
- BR_permute(x, logm);
- }
+ size = 1 << logm;
+ fac = 1.0 / size;
+ xrp = xr;
+ xip = xi;
+ for (i = 0; i < size; i++)
+ {
+ *xrp++ *= fac;
+ *xip++ *= fac;
+ }
+}
- size = 1 << (logm - 1);
+/*
+$Log: fft.c,v $
+Revision 1.7 2002/08/21 16:52:25 knik
+new simplier and faster fft routine and correct real fft
+new real fft is just a complex fft wrapper so it is slower than optimal but
+by surprise it seems to be at least as fast as the old buggy function
- for (i = 0; i < (size / 2); i++)
- {
- double tmp;
-
- tmp = 1 * x[i];
- x[i] = 1 * x[size - 1 - i];
- x[size - 1 - i] = tmp;
-
- tmp = 1 * x[size + i];
- x[size + i] = 1 * x[2*size - 1 - i];
- x[2*size - 1 - i] = tmp;
- }
-}
+*/