ref: 6ccab3bfbeac67b5707c8676085b4664ee51c321
parent: 2399a5dd623b9927662bc93ffb6081fd6fb03843
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Tue Jul 15 10:27:17 EDT 2025
Add dred_compare tool
--- a/Makefile.am
+++ b/Makefile.am
@@ -302,10 +302,13 @@
if EXTRA_PROGRAMS
if ENABLE_DEEP_PLC
-noinst_PROGRAMS += fargan_demo dump_data dump_weights_blob
+noinst_PROGRAMS += fargan_demo dump_data dump_weights_blob dred_compare
fargan_demo_SOURCES = dnn/fargan_demo.c
fargan_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
+dred_compare_SOURCES = dnn/dred_compare.c
+dred_compare_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
+
dump_data_SOURCES = dnn/dump_data.c
dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
@@ -361,6 +364,7 @@
tests/meson.build \
doc/meson.build \
tests/run_vectors.sh \
+ celt/mini_kfft.c \
celt/arm/arm2gnu.pl \
celt/arm/celt_pitch_xcorr_arm.s
--- /dev/null
+++ b/celt/mini_kfft.c
@@ -1,0 +1,498 @@
+/*
+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ * See COPYING file for more information.
+ */
+/* This is a minimalist, concatenated version of kiss-fft just to compute real scalar FFTs. */
+
+
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+
+#define kiss_fft_scalar float
+
+typedef struct {
+ kiss_fft_scalar r;
+ kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct kiss_fft_state* kiss_fft_cfg;
+
+/* If kiss_fft_alloc allocated a buffer, it is one contiguous
+ buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_free KISS_FFT_FREE
+
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_state{
+ int nfft;
+ int inverse;
+ int factors[2*MAXFACTORS];
+ kiss_fft_cpx twiddles[1];
+};
+
+/*
+ Explanation of macros dealing with complex math:
+
+ C_MUL(m,a,b) : m = a*b
+ C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
+ C_SUB( res, a,b) : res = a - b
+ C_SUBFROM( res , a) : res -= a
+ C_ADDTO( res , a) : res += a
+ * */
+
+# define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+# define C_FIXDIV(c,div) /* NOOP */
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r *= (s);\
+ (c).i *= (s); }while(0)
+
+
+#define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+
+#define C_ADD( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+ (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
+ }while(0)
+#define C_SUB( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+ (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
+ }while(0)
+#define C_ADDTO( res , a)\
+ do { \
+ CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+ (res).r += (a).r; (res).i += (a).i;\
+ }while(0)
+
+#define C_SUBFROM( res , a)\
+ do {\
+ CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+ (res).r -= (a).r; (res).i -= (a).i; \
+ }while(0)
+
+
+# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+# define HALF_OF(x) ((x)*((kiss_fft_scalar).5))
+
+#define kf_cexp(x,phase) \
+ do{ \
+ (x)->r = KISS_FFT_COS(phase);\
+ (x)->i = KISS_FFT_SIN(phase);\
+ }while(0)
+
+
+
+
+static void kf_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ kiss_fft_cpx * Fout2;
+ kiss_fft_cpx * tw1 = st->twiddles;
+ kiss_fft_cpx t;
+ Fout2 = Fout + m;
+ do{
+ C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+ C_MUL (t, *Fout2 , *tw1);
+ tw1 += fstride;
+ C_SUB( *Fout2 , *Fout , t );
+ C_ADDTO( *Fout , t );
+ ++Fout2;
+ ++Fout;
+ }while (--m);
+}
+
+static void kf_bfly4(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ const size_t m
+ )
+{
+ kiss_fft_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ size_t k=m;
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+
+
+ tw3 = tw2 = tw1 = st->twiddles;
+
+ do {
+ C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
+
+ C_MUL(scratch[0],Fout[m] , *tw1 );
+ C_MUL(scratch[1],Fout[m2] , *tw2 );
+ C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ if(st->inverse) {
+ Fout[m].r = scratch[5].r - scratch[4].i;
+ Fout[m].i = scratch[5].i + scratch[4].r;
+ Fout[m3].r = scratch[5].r + scratch[4].i;
+ Fout[m3].i = scratch[5].i - scratch[4].r;
+ }else{
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ }
+ ++Fout;
+ }while(--k);
+}
+
+static void kf_bfly3(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ size_t m
+ )
+{
+ size_t k=m;
+ const size_t m2 = 2*m;
+ kiss_fft_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_fft_cpx epi3;
+ epi3 = st->twiddles[fstride*m];
+
+ tw1=tw2=st->twiddles;
+
+ do{
+ C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+ C_MUL(scratch[1],Fout[m] , *tw1);
+ C_MUL(scratch[2],Fout[m2] , *tw2);
+
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
+
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+ C_MULBYSCALAR( scratch[0] , epi3.i );
+
+ C_ADDTO(*Fout,scratch[3]);
+
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ }while(--k);
+}
+
+static void kf_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int u;
+ kiss_fft_cpx scratch[13];
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx *tw;
+ kiss_fft_cpx ya,yb;
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
+
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
+
+ tw=st->twiddles;
+ for ( u=0; u<m; ++u ) {
+ C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+ scratch[0] = *Fout0;
+
+ C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
+
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
+
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+ scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
+
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
+
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
+}
+
+
+static
+void kf_work(
+ kiss_fft_cpx * Fout,
+ const kiss_fft_cpx * f,
+ const size_t fstride,
+ int in_stride,
+ int * factors,
+ const kiss_fft_cfg st
+ )
+{
+ kiss_fft_cpx * Fout_beg=Fout;
+ const int p=*factors++; /* the radix */
+ const int m=*factors++; /* stage's fft length/p */
+ const kiss_fft_cpx * Fout_end = Fout + p*m;
+
+ if (m==1) {
+ do{
+ *Fout = *f;
+ f += fstride*in_stride;
+ }while(++Fout != Fout_end );
+ }else{
+ do{
+ /* recursive call:
+ DFT of size m*p performed by doing
+ p instances of smaller DFTs of size m,
+ each one takes a decimated version of the input */
+ kf_work( Fout , f, fstride*p, in_stride, factors,st);
+ f += fstride*in_stride;
+ }while( (Fout += m) != Fout_end );
+ }
+
+ Fout=Fout_beg;
+
+ /* recombine the p smaller DFTs*/
+ switch (p) {
+ case 2: kf_bfly2(Fout,fstride,st,m); break;
+ case 3: kf_bfly3(Fout,fstride,st,m); break;
+ case 4: kf_bfly4(Fout,fstride,st,m); break;
+ case 5: kf_bfly5(Fout,fstride,st,m); break;
+ default: assert(0);
+ }
+}
+
+/* facbuf is populated by p1,m1,p2,m2, ...
+ where
+ p[i] * m[i] = m[i-1]
+ m0 = n */
+static
+void kf_factor(int n,int * facbuf)
+{
+ int p=4;
+ double floor_sqrt;
+ floor_sqrt = floor( sqrt((double)n) );
+
+ /*factor out powers of 4, powers of 2, then any remaining primes */
+ do {
+ while (n % p) {
+ switch (p) {
+ case 4: p = 2; break;
+ case 2: p = 3; break;
+ default: p += 2; break;
+ }
+ if (p > floor_sqrt)
+ p = n; /* no more factors, skip to end */
+ }
+ n /= p;
+ *facbuf++ = p;
+ *facbuf++ = n;
+ } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc. As such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg mini_kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+{
+
+ kiss_fft_cfg st=NULL;
+ size_t memneeded = (sizeof(struct kiss_fft_state)
+ + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
+
+ if ( lenmem==NULL ) {
+ st = ( kiss_fft_cfg)malloc( memneeded );
+ }else{
+ if (mem != NULL && *lenmem >= memneeded)
+ st = (kiss_fft_cfg)mem;
+ *lenmem = memneeded;
+ }
+ if (st) {
+ int i;
+ st->nfft=nfft;
+ st->inverse = inverse_fft;
+
+ for (i=0;i<nfft;++i) {
+ const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+ double phase = -2*pi*i / nfft;
+ if (st->inverse)
+ phase *= -1;
+ kf_cexp(st->twiddles+i, phase );
+ }
+
+ kf_factor(nfft,st->factors);
+ }
+ return st;
+}
+
+
+void mini_kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{
+ assert(fin != fout);
+ kf_work( fout, fin, 1,in_stride, st->factors,st );
+}
+
+void mini_kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ mini_kiss_fft_stride(cfg,fin,fout,1);
+}
+
+
+typedef struct kiss_fftr_state *kiss_fftr_cfg;
+
+struct kiss_fftr_state{
+ kiss_fft_cfg substate;
+ kiss_fft_cpx * tmpbuf;
+ kiss_fft_cpx * super_twiddles;
+};
+
+kiss_fftr_cfg mini_kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+{
+
+ int i;
+ kiss_fftr_cfg st = NULL;
+ size_t subsize = 0, memneeded;
+
+ assert ((nfft & 1) == 0);
+ nfft >>= 1;
+
+ mini_kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+ memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
+
+ if (lenmem == NULL) {
+ st = (kiss_fftr_cfg) malloc(memneeded);
+ } else {
+ if (*lenmem >= memneeded)
+ st = (kiss_fftr_cfg) mem;
+ *lenmem = memneeded;
+ }
+ if (!st)
+ return NULL;
+
+ st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
+ st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
+ st->super_twiddles = st->tmpbuf + nfft;
+ mini_kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+ for (i = 0; i < nfft/2; ++i) {
+ double phase =
+ -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
+ if (inverse_fft)
+ phase *= -1;
+ kf_cexp (st->super_twiddles+i,phase);
+ }
+ return st;
+}
+
+void mini_kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+ /* input buffer timedata is stored row-wise */
+ int k,ncfft;
+ kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
+
+ assert ( !st->substate->inverse);
+
+ ncfft = st->substate->nfft;
+
+ /*perform the parallel fft of two real signals packed in real,imag*/
+ mini_kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
+ /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+ * contains the sum of the even-numbered elements of the input time sequence
+ * The imag part is the sum of the odd-numbered elements
+ *
+ * The sum of tdc.r and tdc.i is the sum of the input time sequence.
+ * yielding DC of input time sequence
+ * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
+ * yielding Nyquist bin of input time sequence
+ */
+
+ tdc.r = st->tmpbuf[0].r;
+ tdc.i = st->tmpbuf[0].i;
+ C_FIXDIV(tdc,2);
+ CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
+ CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
+ freqdata[0].r = tdc.r + tdc.i;
+ freqdata[ncfft].r = tdc.r - tdc.i;
+ freqdata[ncfft].i = freqdata[0].i = 0;
+
+ for ( k=1;k <= ncfft/2 ; ++k ) {
+ fpk = st->tmpbuf[k];
+ fpnk.r = st->tmpbuf[ncfft-k].r;
+ fpnk.i = - st->tmpbuf[ncfft-k].i;
+ C_FIXDIV(fpk,2);
+ C_FIXDIV(fpnk,2);
+
+ C_ADD( f1k, fpk , fpnk );
+ C_SUB( f2k, fpk , fpnk );
+ C_MUL( tw , f2k , st->super_twiddles[k-1]);
+
+ freqdata[k].r = HALF_OF(f1k.r + tw.r);
+ freqdata[k].i = HALF_OF(f1k.i + tw.i);
+ freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+ freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+ }
+}
--- /dev/null
+++ b/dnn/dred_compare.c
@@ -1,0 +1,526 @@
+/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation
+ Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "mini_kfft.c"
+
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+#define OPUS_PI (3.14159265F)
+
+#define OPUS_COSF(_x) ((float)cos(_x))
+#define OPUS_SINF(_x) ((float)sin(_x))
+
+static void *check_alloc(void *_ptr){
+ if(_ptr==NULL){
+ fprintf(stderr,"Out of memory.\n");
+ exit(EXIT_FAILURE);
+ }
+ return _ptr;
+}
+
+static void *opus_malloc(size_t _size){
+ return check_alloc(malloc(_size));
+}
+
+static void *opus_realloc(void *_ptr,size_t _size){
+ return check_alloc(realloc(_ptr,_size));
+}
+
+#define FORMAT_S16_LE 0
+#define FORMAT_S24_LE 1
+#define FORMAT_F32_LE 2
+
+#define NBANDS (17)
+#define NFREQS (320)
+#define TEST_WIN_SIZE (640)
+#define TEST_WIN_STEP (160)
+
+static const int format_size[3] = {2, 3, 4};
+typedef union {
+ int i;
+ float f;
+} float_bits;
+
+static void biquad(float *y, float mem[2], const float *x,
+ const float *b, const float *a, int N) {
+ int i;
+ for (i=0;i<N;i++) {
+ float xi, yi;
+ xi = x[i];
+ yi = x[i] + mem[0];
+ mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
+ mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+ y[i] = yi;
+ }
+}
+
+static size_t read_pcm(float **_samples,FILE *_fin,int _nchannels,
+ int format){
+ unsigned char buf[1024];
+ float *samples;
+ size_t nsamples;
+ size_t csamples;
+ size_t xi;
+ size_t nread;
+ static const float a_hp[2] = {-1.97354f, 0.97417f};
+ static const float b_hp[2] = {-2.f, 1.f};
+ float mem[2] = {0};
+ samples=NULL;
+ nsamples=csamples=0;
+ int size = format_size[format];
+
+ for(;;){
+ nread=fread(buf,size*_nchannels,1024/(size*_nchannels),_fin);
+ if(nread<=0)break;
+ if(nsamples+nread>csamples){
+ do csamples=csamples<<1|1;
+ while(nsamples+nread>csamples);
+ samples=(float *)opus_realloc(samples,
+ _nchannels*csamples*sizeof(*samples));
+ }
+ if (format==FORMAT_S16_LE) {
+ for(xi=0;xi<nread;xi++){
+ int ci;
+ for(ci=0;ci<_nchannels;ci++){
+ int s;
+ s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)];
+ s=((s&0xFFFF)^0x8000)-0x8000;
+ samples[(nsamples+xi)*_nchannels+ci]=s;
+ }
+ }
+ } else if (format==FORMAT_S24_LE) {
+ for(xi=0;xi<nread;xi++){
+ int ci;
+ for(ci=0;ci<_nchannels;ci++){
+ int s;
+ s=buf[3*(xi*_nchannels+ci)+2]<<16
+ |buf[3*(xi*_nchannels+ci)+1]<<8
+ |buf[3*(xi*_nchannels+ci)];
+ s=((s&0xFFFFFF)^0x800000)-0x800000;
+ samples[(nsamples+xi)*_nchannels+ci]=(1.f/256.f)*s;
+ }
+ }
+ } else if (format==FORMAT_F32_LE) {
+ for(xi=0;xi<nread;xi++){
+ int ci;
+ for(ci=0;ci<_nchannels;ci++){
+ float_bits s;
+ s.i=(unsigned)buf[4*(xi*_nchannels+ci)+3]<<24
+ |buf[4*(xi*_nchannels+ci)+2]<<16
+ |buf[4*(xi*_nchannels+ci)+1]<<8
+ |buf[4*(xi*_nchannels+ci)];
+ samples[(nsamples+xi)*_nchannels+ci] = s.f*32768;
+ }
+ }
+ } else {
+ exit(1);
+ }
+ nsamples+=nread;
+ }
+ *_samples=(float *)opus_realloc(samples,
+ _nchannels*nsamples*sizeof(*samples));
+ biquad(*_samples, mem, *_samples, b_hp, a_hp, nsamples);
+ return nsamples;
+}
+
+static void spectrum(float *_ps,const int *_bands, int _nbands,
+ const float *_in,int _nchannels, size_t _nframes,
+ int _window_sz, int _step, int _downsample){
+ float window[TEST_WIN_SIZE];
+ float x[TEST_WIN_SIZE];
+ size_t xi;
+ int xj;
+ int ps_sz;
+ kiss_fft_cpx X[2][NFREQS+1];
+ kiss_fftr_cfg kfft;
+ ps_sz=_window_sz/2;
+ /* Blackman-Harris window. */
+ for(xj=0;xj<_window_sz;xj++){
+ double n = (xj+.5)/_window_sz;
+ window[xj]=0.35875 - 0.48829*cos(2*OPUS_PI*n)
+ + 0.14128*cos(4*OPUS_PI*n) - 0.01168*cos(6*OPUS_PI*n);
+ }
+ kfft = mini_kiss_fftr_alloc(_window_sz, 0, NULL, NULL);
+ for(xi=0;xi<_nframes;xi++){
+ int ci;
+ int xk;
+ int bi;
+ for(ci=0;ci<_nchannels;ci++){
+ for(xk=0;xk<_window_sz;xk++){
+ x[xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci];
+ }
+ mini_kiss_fftr(kfft, x, X[ci]);
+ }
+ for(bi=xj=0;bi<_nbands;bi++){
+ float p[2]={0};
+ for(;xj<_bands[bi+1];xj++){
+ for(ci=0;ci<_nchannels;ci++){
+ float re;
+ float im;
+ re = X[ci][xj].r*_downsample;
+ im = X[ci][xj].i*_downsample;
+ _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+.1;
+ p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci];
+ }
+ }
+ }
+ }
+ free(kfft);
+}
+
+
+/*Bands on which we compute the pseudo-NMR (Bark-derived
+ CELT bands).*/
+static const int BANDS[NBANDS+1]={
+ 0,8,16,24,32,40,48,56,64,80,96,112,128,160,192,224,272,320
+};
+
+
+void usage(const char *_argv0) {
+ fprintf(stderr,"Usage: %s [-s16|-s24|-f32] [-thresholds err4 err16]"
+ " <file1.sw> <file2.sw>\n", _argv0);
+}
+
+/* Taken from ancient CELT code */
+void psydecay_init(float *decayL, float *decayR, int len, int Fs)
+{
+ int i;
+ for (i=0;i<len;i++)
+ {
+ float f;
+ float deriv;
+ /* Real frequency (in Hz) */
+ f = Fs*i*(1/(2.f*len));
+ /* This is the derivative of the Vorbis freq->Bark function. */
+ deriv = (8.288e-8 * f)/(3.4225e-16 *f*f*f*f + 1)
+ + .009694/(5.476e-7 *f*f + 1) + 1e-4;
+ /* Back to FFT bin units */
+ deriv *= Fs*(1/(2.f*len));
+ /* decay corresponding to -10dB/Bark */
+ decayR[i] = pow(.1f, deriv);
+ /* decay corresponding to -25dB/Bark */
+ decayL[i] = pow(0.0031623f, deriv);
+ /*printf ("%f %f\n", decayL[i], decayR[i]);*/
+ }
+}
+
+#define PITCH_MIN 32
+#define PITCH_MAX 256
+#define PITCH_FRAME 320
+
+static float inner_prod(const float *x, const float *y, int len) {
+ float sum=0;
+ int i;
+ for (i=0;i<len;i++) sum += x[i]*y[i];
+ return sum;
+}
+
+static void compute_xcorr(const float *x, float *xcorr) {
+ int i;
+ float xx;
+ float filtered[PITCH_FRAME+PITCH_MAX];
+ for (i=0;i<PITCH_FRAME+PITCH_MAX;i++) {
+ filtered[i] = x[i-PITCH_MAX] - .8*x[i-PITCH_MAX-1];
+ }
+ xx = inner_prod(&filtered[PITCH_MAX],
+ &filtered[PITCH_MAX], PITCH_FRAME);
+ for (i=0;i<=PITCH_MAX;i++) {
+ float xy, yy;
+ xy = inner_prod(&filtered[PITCH_MAX],
+ &filtered[PITCH_MAX-i], PITCH_FRAME);
+ yy = inner_prod(&filtered[PITCH_MAX-i],
+ &filtered[PITCH_MAX-i], PITCH_FRAME);
+ xcorr[i] = xy/sqrt(xx*yy+PITCH_FRAME);
+ }
+}
+
+#define LOUDNESS 0.2f
+int main(int _argc,const char **_argv){
+ FILE *fin1;
+ FILE *fin2;
+ float *x;
+ float *y;
+ float *X;
+ float *Y;
+ double err4;
+ double err16;
+ double T2;
+ size_t xlength;
+ size_t ylength;
+ size_t nframes;
+ size_t xi;
+ int ci;
+ int xj;
+ int bi;
+ int nchannels;
+ int downsample;
+ int nbands;
+ int ybands;
+ int nfreqs;
+ int yfreqs;
+ size_t test_win_size;
+ size_t test_win_step;
+ int max_compare;
+ int format;
+ const char *argv0 = _argv[0];
+ double err4_threshold=-1, err16_threshold=-1;
+ int compare_thresholds=0;
+ float decayL[NFREQS], decayR[NFREQS];
+ float norm[NFREQS];
+ float pitch_error=0;
+ int pitch_count=0;
+ psydecay_init(decayL, decayR, NFREQS, 16000);
+ if(_argc<3){
+ usage(argv0);
+ return EXIT_FAILURE;
+ }
+ nchannels=1;
+ ybands=nbands=NBANDS;
+ nfreqs=NFREQS;
+ test_win_size=TEST_WIN_SIZE;
+ test_win_step=TEST_WIN_STEP;
+ downsample=1;
+ format=FORMAT_S16_LE;
+ while (_argc > 3) {
+ if(strcmp(_argv[1],"-s16")==0){
+ format=FORMAT_S16_LE;
+ _argv++;
+ _argc--;
+ } else if(strcmp(_argv[1],"-s24")==0){
+ format=FORMAT_S24_LE;
+ _argv++;
+ _argc--;
+ } else if(strcmp(_argv[1],"-f32")==0){
+ format=FORMAT_F32_LE;
+ _argv++;
+ _argc--;
+ } else if(strcmp(_argv[1],"-thresholds")==0){
+ if (_argc < 7) {
+ usage(argv0);
+ return EXIT_FAILURE;
+ }
+ err4_threshold=atof(_argv[2]);
+ err16_threshold=atof(_argv[3]);
+ compare_thresholds=1;
+ _argv+=3;
+ _argc-=3;
+ } else {
+ usage(argv0);
+ return EXIT_FAILURE;
+ }
+ }
+ if(_argc!=3){
+ usage(argv0);
+ return EXIT_FAILURE;
+ }
+ downsample=1;
+ yfreqs=nfreqs/downsample;
+ fin1=fopen(_argv[1],"rb");
+ if(fin1==NULL){
+ fprintf(stderr,"Error opening '%s'.\n",_argv[1]);
+ return EXIT_FAILURE;
+ }
+ fin2=fopen(_argv[2],"rb");
+ if(fin2==NULL){
+ fprintf(stderr,"Error opening '%s'.\n",_argv[2]);
+ fclose(fin1);
+ return EXIT_FAILURE;
+ }
+ /*Read in the data and allocate scratch space.*/
+ xlength=read_pcm(&x,fin1,1,format);
+ fclose(fin1);
+ ylength=read_pcm(&y,fin2,nchannels,format);
+ fclose(fin2);
+ if(xlength!=ylength*downsample){
+ fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n",
+ (unsigned long)xlength,(unsigned long)ylength*downsample);
+ return EXIT_FAILURE;
+ }
+ if(xlength<test_win_size){
+ fprintf(stderr,"Insufficient sample data (%lu<%lu).\n",
+ (unsigned long)xlength,test_win_size);
+ return EXIT_FAILURE;
+ }
+ nframes=(xlength-test_win_size+test_win_step)/test_win_step;
+ X=(float *)opus_malloc(nframes*nfreqs*nchannels*sizeof(*X));
+ Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y));
+
+ for(xi=2;xi<nframes-2;xi++){
+ float xcorr[PITCH_MAX+1], ycorr[PITCH_MAX+1];
+ int i;
+ float maxcorr=-1;
+ int pitch=0;
+ compute_xcorr(&x[xi*TEST_WIN_STEP], xcorr);
+ compute_xcorr(&y[xi*TEST_WIN_STEP], ycorr);
+ for (i=PITCH_MIN;i<=PITCH_MAX;i++) {
+ if (xcorr[i] > maxcorr) {
+ maxcorr = xcorr[i];
+ pitch = i;
+ }
+ }
+ if (xcorr[pitch] > .7) {
+ pitch_error += fabs(xcorr[pitch]-ycorr[pitch]);
+ pitch_count++;
+ }
+ }
+ pitch_error /= pitch_count;
+
+ /*Compute the per-band spectral energy of the original signal
+ and the error.*/
+ spectrum(X,BANDS,nbands,x,nchannels,nframes,
+ test_win_size,test_win_step,1);
+ free(x);
+ spectrum(Y,BANDS,ybands,y,nchannels,nframes,
+ test_win_size/downsample,test_win_step/downsample,downsample);
+ free(y);
+
+ norm[0]=1;
+ for(xj=1;xj<NFREQS;xj++){
+ norm[xj] = 1 + decayR[xj]*norm[xj-1];
+ }
+ for(xj=NFREQS-2;xj>=0;xj--){
+ norm[xj] = norm[xj] + decayL[xj]*norm[xj+1];
+ }
+ for(xj=0;xj<NFREQS;xj++) norm[xj] = 1.f/norm[xj];
+ for(xi=0;xi<nframes;xi++){
+ for(xj=1;xj<NFREQS;xj++){
+ X[xi*nfreqs+xj] = X[xi*nfreqs+xj]+decayR[xj]*X[xi*nfreqs+xj-1];
+ Y[xi*nfreqs+xj] = Y[xi*nfreqs+xj]+decayR[xj]*Y[xi*nfreqs+xj-1];
+ }
+ for(xj=NFREQS-2;xj>=0;xj--){
+ X[xi*nfreqs+xj] = X[xi*nfreqs+xj]+decayL[xj]*X[xi*nfreqs+xj+1];
+ Y[xi*nfreqs+xj] = Y[xi*nfreqs+xj]+decayL[xj]*Y[xi*nfreqs+xj+1];
+ }
+ for(xj=0;xj<NFREQS;xj++){
+ X[xi*nfreqs+xj] *= norm[xj];
+ Y[xi*nfreqs+xj] *= norm[xj];
+ }
+ }
+
+ for(xi=0;xi<nframes;xi++){
+ float maxE=0;
+ for(xj=0;xj<NFREQS;xj++){
+ maxE = MAX(maxE, X[xi*nfreqs+xj]);
+ }
+ /* Allow for up to 80 dB instantaneous dynamic range. */
+ for(xj=0;xj<NFREQS;xj++){
+ X[xi*nfreqs+xj] = MAX(1e-8*maxE, X[xi*nfreqs+xj]);
+ Y[xi*nfreqs+xj] = MAX(1e-8*maxE, Y[xi*nfreqs+xj]);
+ }
+ /* Forward temporal masking: -3 dB/2.5ms slope.*/
+ if(xi>0){
+ for(xj=0;xj<NFREQS;xj++){
+ X[xi*nfreqs+xj] += .5f*X[(xi-1)*nfreqs+xj];
+ Y[xi*nfreqs+xj] += .5f*Y[(xi-1)*nfreqs+xj];
+ }
+ }
+ }
+
+ /*Backward temporal masking: -10 dB/2.5ms slope.*/
+ for(xi=nframes-2;xi-->0;){
+ for(xj=0;xj<NFREQS;xj++){
+ X[xi*nfreqs+xj] += .1f*X[(xi+1)*nfreqs+xj];
+ Y[xi*nfreqs+xj] += .1f*Y[(xi+1)*nfreqs+xj];
+ }
+ }
+
+ max_compare=BANDS[nbands];
+ err4=0;
+ err16=0;
+ T2=0;
+ for(xi=0;xi<nframes;xi++){
+ double Ef2, Ef4, Tf2;
+ Ef2=0;
+ Ef4=0;
+ Tf2=0;
+ for(bi=0;bi<ybands;bi++){
+ double Eb2, Eb4, Tb2;
+ double w;
+ Tb2=Eb2=Eb4=0;
+ w = 1.f/(BANDS[bi+1]-BANDS[bi]);
+ for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){
+ float f, thresh;
+ f = xj*OPUS_PI/960;
+ /* Shape the lower threshold similar to 1/(1 - 0.85*z^-1)
+ deemphasis filter at 48 kHz. */
+ thresh = .1/(.15*.15 + f*f);
+ for(ci=0;ci<nchannels;ci++){
+ float re;
+ float im;
+ re = pow(Y[(xi*yfreqs+xj)*nchannels+ci]+thresh,LOUDNESS)
+ - pow(X[(xi*nfreqs+xj)*nchannels+ci]+thresh,LOUDNESS);
+ im = re*re;
+ Tb2 += w*pow(X[(xi*nfreqs+xj)*nchannels+ci]+thresh,
+ 2*LOUDNESS);
+ /* Per-band error weighting. */
+ im *= w;
+ Eb2+=im;
+ /* Same for 4th power, but make it less sensitive to
+ very low energies. */
+ re = pow(Y[(xi*yfreqs+xj)*nchannels+ci]+10*thresh,LOUDNESS)
+ - pow(X[(xi*nfreqs+xj)*nchannels+ci]+10*thresh,LOUDNESS);
+ im = re*re;
+ /* Per-band error weighting. */
+ im *= w;
+ Eb4+=im;
+ }
+ }
+ Eb2 /= (BANDS[bi+1]-BANDS[bi])*nchannels;
+ Eb4 /= (BANDS[bi+1]-BANDS[bi])*nchannels;
+ Tb2 /= (BANDS[bi+1]-BANDS[bi])*nchannels;
+ Ef2 += Eb2;
+ Ef4 += Eb4*Eb4;
+ Tf2 += Tb2;
+ }
+ Ef2/=nbands;
+ Ef4/=nbands;
+ Ef4*=Ef4;
+ Tf2/=nbands;
+ err4+=Ef2*Ef2;
+ err16+=Ef4*Ef4;
+ T2 += Tf2;
+ }
+ free(X);
+ free(Y);
+ err4=100*pow(err4/nframes,1.0/4)/sqrt(T2);
+ err16=100*pow(err16/nframes,1.0/16)/sqrt(T2);
+ fprintf(stderr, "err4 = %f, err16 = %f, pitch = %f\n",
+ err4, err16, pitch_error);
+ if (compare_thresholds) {
+ if (err4 <= err4_threshold && err16 <= err16_threshold) {
+ fprintf(stderr, "Comparison PASSED\n");
+ } else {
+ fprintf(stderr, "*** Comparison FAILED ***"
+ " (thresholds were %f %f)\n", err4_threshold, err16_threshold);
+ return EXIT_FAILURE;
+ }
+ }
+ return EXIT_SUCCESS;
+}
--
⑨