shithub: opus

--- a/Makefile.am

+++ b/Makefile.am

@@ -302,10 +302,13 @@

 if EXTRA_PROGRAMS

 if ENABLE_DEEP_PLC

-noinst_PROGRAMS += fargan_demo dump_data dump_weights_blob

+noinst_PROGRAMS += fargan_demo dump_data dump_weights_blob dred_compare

 fargan_demo_SOURCES = dnn/fargan_demo.c

 fargan_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)

+dred_compare_SOURCES = dnn/dred_compare.c

+dred_compare_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)

 dump_data_SOURCES = dnn/dump_data.c

 dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)

@@ -361,6 +364,7 @@

              tests/meson.build \

              doc/meson.build \

              tests/run_vectors.sh \

+	     celt/mini_kfft.c \

              celt/arm/arm2gnu.pl \

              celt/arm/celt_pitch_xcorr_arm.s

--- /dev/null

+++ b/celt/mini_kfft.c

@@ -1,0 +1,498 @@

+/*

+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.

+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft

+ *

+ *  SPDX-License-Identifier: BSD-3-Clause

+ *  See COPYING file for more information.

+ */

+/* This is a minimalist, concatenated version of kiss-fft just to compute real scalar FFTs. */

+#include <stdlib.h>

+#include <math.h>

+#include <assert.h>

+#define kiss_fft_scalar float

+typedef struct {

+    kiss_fft_scalar r;

+    kiss_fft_scalar i;

+}kiss_fft_cpx;

+typedef struct kiss_fft_state* kiss_fft_cfg;

+/* If kiss_fft_alloc allocated a buffer, it is one contiguous

+   buffer and can be simply free()d when no longer needed*/

+#define kiss_fft_free KISS_FFT_FREE

+#define MAXFACTORS 32

+/* e.g. an fft of length 128 has 4 factors

+ as far as kissfft is concerned

+ 4*4*4*2

+ */

+struct kiss_fft_state{

+    int nfft;

+    int inverse;

+    int factors[2*MAXFACTORS];

+    kiss_fft_cpx twiddles[1];

+};

+/*

+  Explanation of macros dealing with complex math:

+   C_MUL(m,a,b)         : m = a*b

+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise

+   C_SUB( res, a,b)     : res = a - b

+   C_SUBFROM( res , a)  : res -= a

+   C_ADDTO( res , a)    : res += a

+ * */

+#   define S_MUL(a,b) ( (a)*(b) )

+#define C_MUL(m,a,b) \

+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\

+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)

+#   define C_FIXDIV(c,div) /* NOOP */

+#   define C_MULBYSCALAR( c, s ) \

+    do{ (c).r *= (s);\

+        (c).i *= (s); }while(0)

+#define CHECK_OVERFLOW_OP(a,op,b) /* noop */

+#define  C_ADD( res, a,b)\

+    do { \

+        CHECK_OVERFLOW_OP((a).r,+,(b).r)\

+        CHECK_OVERFLOW_OP((a).i,+,(b).i)\

+        (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \

+    }while(0)

+#define  C_SUB( res, a,b)\

+    do { \

+        CHECK_OVERFLOW_OP((a).r,-,(b).r)\

+        CHECK_OVERFLOW_OP((a).i,-,(b).i)\

+        (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \

+    }while(0)

+#define C_ADDTO( res , a)\

+    do { \

+        CHECK_OVERFLOW_OP((res).r,+,(a).r)\

+        CHECK_OVERFLOW_OP((res).i,+,(a).i)\

+        (res).r += (a).r;  (res).i += (a).i;\

+    }while(0)

+#define C_SUBFROM( res , a)\

+    do {\

+        CHECK_OVERFLOW_OP((res).r,-,(a).r)\

+        CHECK_OVERFLOW_OP((res).i,-,(a).i)\

+        (res).r -= (a).r;  (res).i -= (a).i; \

+    }while(0)

+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)

+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)

+#  define HALF_OF(x) ((x)*((kiss_fft_scalar).5))

+#define  kf_cexp(x,phase) \

+    do{ \

+        (x)->r = KISS_FFT_COS(phase);\

+        (x)->i = KISS_FFT_SIN(phase);\

+    }while(0)

+static void kf_bfly2(

+        kiss_fft_cpx * Fout,

+        const size_t fstride,

+        const kiss_fft_cfg st,

+        int m

+        )

+{

+    kiss_fft_cpx * Fout2;

+    kiss_fft_cpx * tw1 = st->twiddles;

+    kiss_fft_cpx t;

+    Fout2 = Fout + m;

+    do{

+        C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);

+        C_MUL (t,  *Fout2 , *tw1);

+        tw1 += fstride;

+        C_SUB( *Fout2 ,  *Fout , t );

+        C_ADDTO( *Fout ,  t );

+        ++Fout2;

+        ++Fout;

+    }while (--m);

+}

+static void kf_bfly4(

+        kiss_fft_cpx * Fout,

+        const size_t fstride,

+        const kiss_fft_cfg st,

+        const size_t m

+        )

+{

+    kiss_fft_cpx *tw1,*tw2,*tw3;

+    kiss_fft_cpx scratch[6];

+    size_t k=m;

+    const size_t m2=2*m;

+    const size_t m3=3*m;

+    tw3 = tw2 = tw1 = st->twiddles;

+    do {

+        C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);

+        C_MUL(scratch[0],Fout[m] , *tw1 );

+        C_MUL(scratch[1],Fout[m2] , *tw2 );

+        C_MUL(scratch[2],Fout[m3] , *tw3 );

+        C_SUB( scratch[5] , *Fout, scratch[1] );

+        C_ADDTO(*Fout, scratch[1]);

+        C_ADD( scratch[3] , scratch[0] , scratch[2] );

+        C_SUB( scratch[4] , scratch[0] , scratch[2] );

+        C_SUB( Fout[m2], *Fout, scratch[3] );

+        tw1 += fstride;

+        tw2 += fstride*2;

+        tw3 += fstride*3;

+        C_ADDTO( *Fout , scratch[3] );

+        if(st->inverse) {

+            Fout[m].r = scratch[5].r - scratch[4].i;

+            Fout[m].i = scratch[5].i + scratch[4].r;

+            Fout[m3].r = scratch[5].r + scratch[4].i;

+            Fout[m3].i = scratch[5].i - scratch[4].r;

+        }else{

+            Fout[m].r = scratch[5].r + scratch[4].i;

+            Fout[m].i = scratch[5].i - scratch[4].r;

+            Fout[m3].r = scratch[5].r - scratch[4].i;

+            Fout[m3].i = scratch[5].i + scratch[4].r;

+        }

+        ++Fout;

+    }while(--k);

+}

+static void kf_bfly3(

+         kiss_fft_cpx * Fout,

+         const size_t fstride,

+         const kiss_fft_cfg st,

+         size_t m

+         )

+{

+     size_t k=m;

+     const size_t m2 = 2*m;

+     kiss_fft_cpx *tw1,*tw2;

+     kiss_fft_cpx scratch[5];

+     kiss_fft_cpx epi3;

+     epi3 = st->twiddles[fstride*m];

+     tw1=tw2=st->twiddles;

+     do{

+         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);

+         C_MUL(scratch[1],Fout[m] , *tw1);

+         C_MUL(scratch[2],Fout[m2] , *tw2);

+         C_ADD(scratch[3],scratch[1],scratch[2]);

+         C_SUB(scratch[0],scratch[1],scratch[2]);

+         tw1 += fstride;

+         tw2 += fstride*2;

+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);

+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);

+         C_MULBYSCALAR( scratch[0] , epi3.i );

+         C_ADDTO(*Fout,scratch[3]);

+         Fout[m2].r = Fout[m].r + scratch[0].i;

+         Fout[m2].i = Fout[m].i - scratch[0].r;

+         Fout[m].r -= scratch[0].i;

+         Fout[m].i += scratch[0].r;

+         ++Fout;

+     }while(--k);

+}

+static void kf_bfly5(

+        kiss_fft_cpx * Fout,

+        const size_t fstride,

+        const kiss_fft_cfg st,

+        int m

+        )

+{

+    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;

+    int u;

+    kiss_fft_cpx scratch[13];

+    kiss_fft_cpx * twiddles = st->twiddles;

+    kiss_fft_cpx *tw;

+    kiss_fft_cpx ya,yb;

+    ya = twiddles[fstride*m];

+    yb = twiddles[fstride*2*m];

+    Fout0=Fout;

+    Fout1=Fout0+m;

+    Fout2=Fout0+2*m;

+    Fout3=Fout0+3*m;

+    Fout4=Fout0+4*m;

+    tw=st->twiddles;

+    for ( u=0; u<m; ++u ) {

+        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);

+        scratch[0] = *Fout0;

+        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);

+        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);

+        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);

+        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);

+        C_ADD( scratch[7],scratch[1],scratch[4]);

+        C_SUB( scratch[10],scratch[1],scratch[4]);

+        C_ADD( scratch[8],scratch[2],scratch[3]);

+        C_SUB( scratch[9],scratch[2],scratch[3]);

+        Fout0->r += scratch[7].r + scratch[8].r;

+        Fout0->i += scratch[7].i + scratch[8].i;

+        scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);

+        scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);

+        scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);

+        scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);

+        C_SUB(*Fout1,scratch[5],scratch[6]);

+        C_ADD(*Fout4,scratch[5],scratch[6]);

+        scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);

+        scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);

+        scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);

+        scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);

+        C_ADD(*Fout2,scratch[11],scratch[12]);

+        C_SUB(*Fout3,scratch[11],scratch[12]);

+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

+    }

+}

+static

+void kf_work(

+        kiss_fft_cpx * Fout,

+        const kiss_fft_cpx * f,

+        const size_t fstride,

+        int in_stride,

+        int * factors,

+        const kiss_fft_cfg st

+        )

+{

+    kiss_fft_cpx * Fout_beg=Fout;

+    const int p=*factors++; /* the radix  */

+    const int m=*factors++; /* stage's fft length/p */

+    const kiss_fft_cpx * Fout_end = Fout + p*m;

+    if (m==1) {

+        do{

+            *Fout = *f;

+            f += fstride*in_stride;

+        }while(++Fout != Fout_end );

+    }else{

+        do{

+            /* recursive call:

+               DFT of size m*p performed by doing

+               p instances of smaller DFTs of size m,

+               each one takes a decimated version of the input */

+            kf_work( Fout , f, fstride*p, in_stride, factors,st);

+            f += fstride*in_stride;

+        }while( (Fout += m) != Fout_end );

+    }

+    Fout=Fout_beg;

+    /* recombine the p smaller DFTs*/

+    switch (p) {

+        case 2: kf_bfly2(Fout,fstride,st,m); break;

+        case 3: kf_bfly3(Fout,fstride,st,m); break;

+        case 4: kf_bfly4(Fout,fstride,st,m); break;

+        case 5: kf_bfly5(Fout,fstride,st,m); break;

+        default: assert(0);

+    }

+}

+/*  facbuf is populated by p1,m1,p2,m2, ...

+    where

+    p[i] * m[i] = m[i-1]

+    m0 = n                  */

+static

+void kf_factor(int n,int * facbuf)

+{

+    int p=4;

+    double floor_sqrt;

+    floor_sqrt = floor( sqrt((double)n) );

+    /*factor out powers of 4, powers of 2, then any remaining primes */

+    do {

+        while (n % p) {

+            switch (p) {

+                case 4: p = 2; break;

+                case 2: p = 3; break;

+                default: p += 2; break;

+            }

+            if (p > floor_sqrt)

+                p = n;          /* no more factors, skip to end */

+        }

+        n /= p;

+        *facbuf++ = p;

+        *facbuf++ = n;

+    } while (n > 1);

+}

+/*

+ *

+ * User-callable function to allocate all necessary storage space for the fft.

+ *

+ * The return value is a contiguous block of memory, allocated with malloc.  As such,

+ * It can be freed with free(), rather than a kiss_fft-specific function.

+ * */

+kiss_fft_cfg mini_kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )

+{

+    kiss_fft_cfg st=NULL;

+    size_t memneeded = (sizeof(struct kiss_fft_state)

+        + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/

+    if ( lenmem==NULL ) {

+        st = ( kiss_fft_cfg)malloc( memneeded );

+    }else{

+        if (mem != NULL && *lenmem >= memneeded)

+            st = (kiss_fft_cfg)mem;

+        *lenmem = memneeded;

+    }

+    if (st) {

+        int i;

+        st->nfft=nfft;

+        st->inverse = inverse_fft;

+        for (i=0;i<nfft;++i) {

+            const double pi=3.141592653589793238462643383279502884197169399375105820974944;

+            double phase = -2*pi*i / nfft;

+            if (st->inverse)

+                phase *= -1;

+            kf_cexp(st->twiddles+i, phase );

+        }

+        kf_factor(nfft,st->factors);

+    }

+    return st;

+}

+void mini_kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)

+{

+    assert(fin != fout);

+    kf_work( fout, fin, 1,in_stride, st->factors,st );

+}

+void mini_kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

+{

+    mini_kiss_fft_stride(cfg,fin,fout,1);

+}

+typedef struct kiss_fftr_state *kiss_fftr_cfg;

+struct kiss_fftr_state{

+    kiss_fft_cfg substate;

+    kiss_fft_cpx * tmpbuf;

+    kiss_fft_cpx * super_twiddles;

+};

+kiss_fftr_cfg mini_kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)

+{

+    int i;

+    kiss_fftr_cfg st = NULL;

+    size_t subsize = 0, memneeded;

+    assert ((nfft & 1) == 0);

+    nfft >>= 1;

+    mini_kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);

+    memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);

+    if (lenmem == NULL) {

+        st = (kiss_fftr_cfg) malloc(memneeded);

+    } else {

+        if (*lenmem >= memneeded)

+            st = (kiss_fftr_cfg) mem;

+        *lenmem = memneeded;

+    }

+    if (!st)

+        return NULL;

+    st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */

+    st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);

+    st->super_twiddles = st->tmpbuf + nfft;

+    mini_kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);

+    for (i = 0; i < nfft/2; ++i) {

+        double phase =

+            -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);

+        if (inverse_fft)

+            phase *= -1;

+        kf_cexp (st->super_twiddles+i,phase);

+    }

+    return st;

+}

+void mini_kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)

+{

+    /* input buffer timedata is stored row-wise */

+    int k,ncfft;

+    kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;

+    assert ( !st->substate->inverse);

+    ncfft = st->substate->nfft;

+    /*perform the parallel fft of two real signals packed in real,imag*/

+    mini_kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );

+    /* The real part of the DC element of the frequency spectrum in st->tmpbuf

+     * contains the sum of the even-numbered elements of the input time sequence

+     * The imag part is the sum of the odd-numbered elements

+     *

+     * The sum of tdc.r and tdc.i is the sum of the input time sequence.

+     *      yielding DC of input time sequence

+     * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...

+     *      yielding Nyquist bin of input time sequence

+     */

+    tdc.r = st->tmpbuf[0].r;

+    tdc.i = st->tmpbuf[0].i;

+    C_FIXDIV(tdc,2);

+    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);

+    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);

+    freqdata[0].r = tdc.r + tdc.i;

+    freqdata[ncfft].r = tdc.r - tdc.i;

+    freqdata[ncfft].i = freqdata[0].i = 0;

+    for ( k=1;k <= ncfft/2 ; ++k ) {

+        fpk    = st->tmpbuf[k];

+        fpnk.r =   st->tmpbuf[ncfft-k].r;

+        fpnk.i = - st->tmpbuf[ncfft-k].i;

+        C_FIXDIV(fpk,2);

+        C_FIXDIV(fpnk,2);

+        C_ADD( f1k, fpk , fpnk );

+        C_SUB( f2k, fpk , fpnk );

+        C_MUL( tw , f2k , st->super_twiddles[k-1]);

+        freqdata[k].r = HALF_OF(f1k.r + tw.r);

+        freqdata[k].i = HALF_OF(f1k.i + tw.i);

+        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);

+        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);

+    }

+}

--- /dev/null

+++ b/dnn/dred_compare.c

@@ -1,0 +1,526 @@

+/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation

+   Written by Jean-Marc Valin and Timothy B. Terriberry */

+/*

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright

+notice, this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#include <stdio.h>

+#include <stdlib.h>

+#include <math.h>

+#include <string.h>

+#include "mini_kfft.c"

+#define MAX(a,b) ((a)>(b) ? (a) : (b))

+#define OPUS_PI (3.14159265F)

+#define OPUS_COSF(_x)        ((float)cos(_x))

+#define OPUS_SINF(_x)        ((float)sin(_x))

+static void *check_alloc(void *_ptr){

+  if(_ptr==NULL){

+    fprintf(stderr,"Out of memory.\n");

+    exit(EXIT_FAILURE);

+  }

+  return _ptr;

+}

+static void *opus_malloc(size_t _size){

+  return check_alloc(malloc(_size));

+}

+static void *opus_realloc(void *_ptr,size_t _size){

+  return check_alloc(realloc(_ptr,_size));

+}

+#define FORMAT_S16_LE 0

+#define FORMAT_S24_LE 1

+#define FORMAT_F32_LE 2

+#define NBANDS (17)

+#define NFREQS (320)

+#define TEST_WIN_SIZE (640)

+#define TEST_WIN_STEP (160)

+static const int format_size[3] = {2, 3, 4};

+typedef union {

+    int i;

+    float f;

+} float_bits;

+static void biquad(float *y, float mem[2], const float *x,

+                   const float *b, const float *a, int N) {

+  int i;

+  for (i=0;i<N;i++) {

+    float xi, yi;

+    xi = x[i];

+    yi = x[i] + mem[0];

+    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);

+    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);

+    y[i] = yi;

+  }

+}

+static size_t read_pcm(float **_samples,FILE *_fin,int _nchannels,

+                       int format){

+  unsigned char  buf[1024];

+  float         *samples;

+  size_t         nsamples;

+  size_t         csamples;

+  size_t         xi;

+  size_t         nread;

+  static const float a_hp[2] = {-1.97354f, 0.97417f};

+  static const float b_hp[2] = {-2.f, 1.f};

+  float mem[2] = {0};

+  samples=NULL;

+  nsamples=csamples=0;

+  int size = format_size[format];

+  for(;;){

+    nread=fread(buf,size*_nchannels,1024/(size*_nchannels),_fin);

+    if(nread<=0)break;

+    if(nsamples+nread>csamples){

+      do csamples=csamples<<1|1;

+      while(nsamples+nread>csamples);

+      samples=(float *)opus_realloc(samples,

+       _nchannels*csamples*sizeof(*samples));

+    }

+    if (format==FORMAT_S16_LE) {

+      for(xi=0;xi<nread;xi++){

+        int ci;

+        for(ci=0;ci<_nchannels;ci++){

+          int s;

+          s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)];

+          s=((s&0xFFFF)^0x8000)-0x8000;

+          samples[(nsamples+xi)*_nchannels+ci]=s;

+        }

+      }

+    } else if (format==FORMAT_S24_LE) {

+       for(xi=0;xi<nread;xi++){

+         int ci;

+         for(ci=0;ci<_nchannels;ci++){

+           int s;

+           s=buf[3*(xi*_nchannels+ci)+2]<<16

+            |buf[3*(xi*_nchannels+ci)+1]<<8

+            |buf[3*(xi*_nchannels+ci)];

+           s=((s&0xFFFFFF)^0x800000)-0x800000;

+           samples[(nsamples+xi)*_nchannels+ci]=(1.f/256.f)*s;

+         }

+       }

+     } else if (format==FORMAT_F32_LE) {

+        for(xi=0;xi<nread;xi++){

+          int ci;

+          for(ci=0;ci<_nchannels;ci++){

+            float_bits s;

+            s.i=(unsigned)buf[4*(xi*_nchannels+ci)+3]<<24

+               |buf[4*(xi*_nchannels+ci)+2]<<16

+               |buf[4*(xi*_nchannels+ci)+1]<<8

+               |buf[4*(xi*_nchannels+ci)];

+            samples[(nsamples+xi)*_nchannels+ci] = s.f*32768;

+          }

+        }

+      } else {

+        exit(1);

+      }

+    nsamples+=nread;

+  }

+  *_samples=(float *)opus_realloc(samples,

+   _nchannels*nsamples*sizeof(*samples));

+  biquad(*_samples, mem, *_samples, b_hp, a_hp, nsamples);

+  return nsamples;

+}

+static void spectrum(float *_ps,const int *_bands, int _nbands,

+                     const float *_in,int _nchannels, size_t _nframes,

+                     int _window_sz, int _step, int _downsample){

+  float window[TEST_WIN_SIZE];

+  float x[TEST_WIN_SIZE];

+  size_t xi;

+  int    xj;

+  int    ps_sz;

+  kiss_fft_cpx X[2][NFREQS+1];

+  kiss_fftr_cfg kfft;

+  ps_sz=_window_sz/2;

+  /* Blackman-Harris window. */

+  for(xj=0;xj<_window_sz;xj++){

+    double n = (xj+.5)/_window_sz;

+    window[xj]=0.35875 - 0.48829*cos(2*OPUS_PI*n)

+             + 0.14128*cos(4*OPUS_PI*n) - 0.01168*cos(6*OPUS_PI*n);

+  }

+  kfft = mini_kiss_fftr_alloc(_window_sz, 0, NULL, NULL);

+  for(xi=0;xi<_nframes;xi++){

+    int ci;

+    int xk;

+    int bi;

+    for(ci=0;ci<_nchannels;ci++){

+      for(xk=0;xk<_window_sz;xk++){

+        x[xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci];

+      }

+      mini_kiss_fftr(kfft, x, X[ci]);

+    }

+    for(bi=xj=0;bi<_nbands;bi++){

+      float p[2]={0};

+      for(;xj<_bands[bi+1];xj++){

+        for(ci=0;ci<_nchannels;ci++){

+          float re;

+          float im;

+          re = X[ci][xj].r*_downsample;

+          im = X[ci][xj].i*_downsample;

+          _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+.1;

+          p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci];

+        }

+      }

+    }

+  }

+  free(kfft);

+}

+/*Bands on which we compute the pseudo-NMR (Bark-derived

+  CELT bands).*/

+static const int BANDS[NBANDS+1]={

+  0,8,16,24,32,40,48,56,64,80,96,112,128,160,192,224,272,320

+};

+void usage(const char *_argv0) {

+  fprintf(stderr,"Usage: %s [-s16|-s24|-f32] [-thresholds err4 err16]"

+  " <file1.sw> <file2.sw>\n", _argv0);

+}

+/* Taken from ancient CELT code */

+void psydecay_init(float *decayL, float *decayR, int len, int Fs)

+{

+   int i;

+   for (i=0;i<len;i++)

+   {

+      float f;

+      float deriv;

+      /* Real frequency (in Hz) */

+      f = Fs*i*(1/(2.f*len));

+      /* This is the derivative of the Vorbis freq->Bark function. */

+      deriv = (8.288e-8 * f)/(3.4225e-16 *f*f*f*f + 1)

+            + .009694/(5.476e-7 *f*f + 1) + 1e-4;

+      /* Back to FFT bin units */

+      deriv *= Fs*(1/(2.f*len));

+      /* decay corresponding to -10dB/Bark */

+      decayR[i] = pow(.1f, deriv);

+      /* decay corresponding to -25dB/Bark */

+      decayL[i] = pow(0.0031623f, deriv);

+      /*printf ("%f %f\n", decayL[i], decayR[i]);*/

+   }

+}

+#define PITCH_MIN 32

+#define PITCH_MAX 256

+#define PITCH_FRAME 320

+static float inner_prod(const float *x, const float *y, int len) {

+  float sum=0;

+  int i;

+  for (i=0;i<len;i++) sum += x[i]*y[i];

+  return sum;

+}

+static void compute_xcorr(const float *x, float *xcorr) {

+  int i;

+  float xx;

+  float filtered[PITCH_FRAME+PITCH_MAX];

+  for (i=0;i<PITCH_FRAME+PITCH_MAX;i++) {

+    filtered[i] = x[i-PITCH_MAX] - .8*x[i-PITCH_MAX-1];

+  }

+  xx = inner_prod(&filtered[PITCH_MAX],

+                  &filtered[PITCH_MAX], PITCH_FRAME);

+  for (i=0;i<=PITCH_MAX;i++) {

+    float xy, yy;

+    xy = inner_prod(&filtered[PITCH_MAX],

+                    &filtered[PITCH_MAX-i], PITCH_FRAME);

+    yy = inner_prod(&filtered[PITCH_MAX-i],

+                    &filtered[PITCH_MAX-i], PITCH_FRAME);

+    xcorr[i] = xy/sqrt(xx*yy+PITCH_FRAME);

+  }

+}

+#define LOUDNESS 0.2f

+int main(int _argc,const char **_argv){

+  FILE    *fin1;

+  FILE    *fin2;

+  float   *x;

+  float   *y;

+  float   *X;

+  float   *Y;

+  double    err4;

+  double    err16;

+  double    T2;

+  size_t   xlength;

+  size_t   ylength;

+  size_t   nframes;

+  size_t   xi;

+  int      ci;

+  int      xj;

+  int      bi;

+  int      nchannels;

+  int      downsample;

+  int      nbands;

+  int      ybands;

+  int      nfreqs;

+  int      yfreqs;

+  size_t   test_win_size;

+  size_t   test_win_step;

+  int      max_compare;

+  int      format;

+  const char *argv0 = _argv[0];

+  double err4_threshold=-1, err16_threshold=-1;

+  int compare_thresholds=0;

+  float decayL[NFREQS], decayR[NFREQS];

+  float norm[NFREQS];

+  float pitch_error=0;

+  int pitch_count=0;

+  psydecay_init(decayL, decayR, NFREQS, 16000);

+  if(_argc<3){

+    usage(argv0);

+    return EXIT_FAILURE;

+  }

+  nchannels=1;

+  ybands=nbands=NBANDS;

+  nfreqs=NFREQS;

+  test_win_size=TEST_WIN_SIZE;

+  test_win_step=TEST_WIN_STEP;

+  downsample=1;

+  format=FORMAT_S16_LE;

+  while (_argc > 3) {

+    if(strcmp(_argv[1],"-s16")==0){

+      format=FORMAT_S16_LE;

+      _argv++;

+      _argc--;

+    } else if(strcmp(_argv[1],"-s24")==0){

+      format=FORMAT_S24_LE;

+      _argv++;

+      _argc--;

+    } else if(strcmp(_argv[1],"-f32")==0){

+      format=FORMAT_F32_LE;

+      _argv++;

+      _argc--;

+    } else if(strcmp(_argv[1],"-thresholds")==0){

+      if (_argc < 7) {

+        usage(argv0);

+        return EXIT_FAILURE;

+      }

+      err4_threshold=atof(_argv[2]);

+      err16_threshold=atof(_argv[3]);

+      compare_thresholds=1;

+      _argv+=3;

+      _argc-=3;

+    } else {

+      usage(argv0);

+      return EXIT_FAILURE;

+    }

+  }

+  if(_argc!=3){

+    usage(argv0);

+    return EXIT_FAILURE;

+  }

+  downsample=1;

+  yfreqs=nfreqs/downsample;

+  fin1=fopen(_argv[1],"rb");

+  if(fin1==NULL){

+    fprintf(stderr,"Error opening '%s'.\n",_argv[1]);

+    return EXIT_FAILURE;

+  }

+  fin2=fopen(_argv[2],"rb");

+  if(fin2==NULL){

+    fprintf(stderr,"Error opening '%s'.\n",_argv[2]);

+    fclose(fin1);

+    return EXIT_FAILURE;

+  }

+  /*Read in the data and allocate scratch space.*/

+  xlength=read_pcm(&x,fin1,1,format);

+  fclose(fin1);

+  ylength=read_pcm(&y,fin2,nchannels,format);

+  fclose(fin2);

+  if(xlength!=ylength*downsample){

+    fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n",

+     (unsigned long)xlength,(unsigned long)ylength*downsample);

+    return EXIT_FAILURE;

+  }

+  if(xlength<test_win_size){

+    fprintf(stderr,"Insufficient sample data (%lu<%lu).\n",

+     (unsigned long)xlength,test_win_size);

+    return EXIT_FAILURE;

+  }

+  nframes=(xlength-test_win_size+test_win_step)/test_win_step;

+  X=(float *)opus_malloc(nframes*nfreqs*nchannels*sizeof(*X));

+  Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y));

+  for(xi=2;xi<nframes-2;xi++){

+    float xcorr[PITCH_MAX+1], ycorr[PITCH_MAX+1];

+    int i;

+    float maxcorr=-1;

+    int pitch=0;

+    compute_xcorr(&x[xi*TEST_WIN_STEP], xcorr);

+    compute_xcorr(&y[xi*TEST_WIN_STEP], ycorr);

+    for (i=PITCH_MIN;i<=PITCH_MAX;i++) {

+      if (xcorr[i] > maxcorr) {

+        maxcorr = xcorr[i];

+        pitch = i;

+      }

+    }

+    if (xcorr[pitch] > .7) {

+      pitch_error += fabs(xcorr[pitch]-ycorr[pitch]);

+      pitch_count++;

+    }

+  }

+  pitch_error /= pitch_count;

+  /*Compute the per-band spectral energy of the original signal

+     and the error.*/

+  spectrum(X,BANDS,nbands,x,nchannels,nframes,

+        test_win_size,test_win_step,1);

+  free(x);

+  spectrum(Y,BANDS,ybands,y,nchannels,nframes,

+        test_win_size/downsample,test_win_step/downsample,downsample);

+  free(y);

+  norm[0]=1;

+  for(xj=1;xj<NFREQS;xj++){

+    norm[xj] = 1 + decayR[xj]*norm[xj-1];

+  }

+  for(xj=NFREQS-2;xj>=0;xj--){

+    norm[xj] = norm[xj] + decayL[xj]*norm[xj+1];

+  }

+  for(xj=0;xj<NFREQS;xj++) norm[xj] = 1.f/norm[xj];

+  for(xi=0;xi<nframes;xi++){

+    for(xj=1;xj<NFREQS;xj++){

+      X[xi*nfreqs+xj] = X[xi*nfreqs+xj]+decayR[xj]*X[xi*nfreqs+xj-1];

+      Y[xi*nfreqs+xj] = Y[xi*nfreqs+xj]+decayR[xj]*Y[xi*nfreqs+xj-1];

+    }

+    for(xj=NFREQS-2;xj>=0;xj--){

+      X[xi*nfreqs+xj] = X[xi*nfreqs+xj]+decayL[xj]*X[xi*nfreqs+xj+1];

+      Y[xi*nfreqs+xj] = Y[xi*nfreqs+xj]+decayL[xj]*Y[xi*nfreqs+xj+1];

+    }

+    for(xj=0;xj<NFREQS;xj++){

+      X[xi*nfreqs+xj] *= norm[xj];

+      Y[xi*nfreqs+xj] *= norm[xj];

+    }

+  }

+  for(xi=0;xi<nframes;xi++){

+    float maxE=0;

+    for(xj=0;xj<NFREQS;xj++){

+      maxE = MAX(maxE, X[xi*nfreqs+xj]);

+    }

+    /* Allow for up to 80 dB instantaneous dynamic range. */

+    for(xj=0;xj<NFREQS;xj++){

+      X[xi*nfreqs+xj] = MAX(1e-8*maxE, X[xi*nfreqs+xj]);

+      Y[xi*nfreqs+xj] = MAX(1e-8*maxE, Y[xi*nfreqs+xj]);

+    }

+    /* Forward temporal masking: -3 dB/2.5ms slope.*/

+    if(xi>0){

+      for(xj=0;xj<NFREQS;xj++){

+        X[xi*nfreqs+xj] += .5f*X[(xi-1)*nfreqs+xj];

+        Y[xi*nfreqs+xj] += .5f*Y[(xi-1)*nfreqs+xj];

+      }

+    }

+  }

+  /*Backward temporal masking: -10 dB/2.5ms slope.*/

+  for(xi=nframes-2;xi-->0;){

+    for(xj=0;xj<NFREQS;xj++){

+      X[xi*nfreqs+xj] += .1f*X[(xi+1)*nfreqs+xj];

+      Y[xi*nfreqs+xj] += .1f*Y[(xi+1)*nfreqs+xj];

+    }

+  }

+  max_compare=BANDS[nbands];

+  err4=0;

+  err16=0;

+  T2=0;

+  for(xi=0;xi<nframes;xi++){

+    double Ef2, Ef4, Tf2;

+    Ef2=0;

+    Ef4=0;

+    Tf2=0;

+    for(bi=0;bi<ybands;bi++){

+      double Eb2, Eb4, Tb2;

+      double w;

+      Tb2=Eb2=Eb4=0;

+      w = 1.f/(BANDS[bi+1]-BANDS[bi]);

+      for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){

+        float f, thresh;

+        f = xj*OPUS_PI/960;

+        /* Shape the lower threshold similar to 1/(1 - 0.85*z^-1)

+           deemphasis filter at 48 kHz. */

+        thresh = .1/(.15*.15 + f*f);

+        for(ci=0;ci<nchannels;ci++){

+          float re;

+          float im;

+          re = pow(Y[(xi*yfreqs+xj)*nchannels+ci]+thresh,LOUDNESS)

+             - pow(X[(xi*nfreqs+xj)*nchannels+ci]+thresh,LOUDNESS);

+          im = re*re;

+          Tb2 += w*pow(X[(xi*nfreqs+xj)*nchannels+ci]+thresh,

+                     2*LOUDNESS);

+          /* Per-band error weighting. */

+          im *= w;

+          Eb2+=im;

+          /* Same for 4th power, but make it less sensitive to

+             very low energies. */

+          re = pow(Y[(xi*yfreqs+xj)*nchannels+ci]+10*thresh,LOUDNESS)

+             - pow(X[(xi*nfreqs+xj)*nchannels+ci]+10*thresh,LOUDNESS);

+          im = re*re;

+          /* Per-band error weighting. */

+          im *= w;

+          Eb4+=im;

+        }

+      }

+      Eb2 /= (BANDS[bi+1]-BANDS[bi])*nchannels;

+      Eb4 /= (BANDS[bi+1]-BANDS[bi])*nchannels;

+      Tb2 /= (BANDS[bi+1]-BANDS[bi])*nchannels;

+      Ef2 += Eb2;

+      Ef4 += Eb4*Eb4;

+      Tf2 += Tb2;

+    }

+    Ef2/=nbands;

+    Ef4/=nbands;

+    Ef4*=Ef4;

+    Tf2/=nbands;

+    err4+=Ef2*Ef2;

+    err16+=Ef4*Ef4;

+    T2 += Tf2;

+  }

+  free(X);

+  free(Y);

+  err4=100*pow(err4/nframes,1.0/4)/sqrt(T2);

+  err16=100*pow(err16/nframes,1.0/16)/sqrt(T2);

+  fprintf(stderr, "err4 = %f, err16 = %f, pitch = %f\n",

+          err4, err16, pitch_error);

+  if (compare_thresholds) {

+    if (err4 <= err4_threshold && err16 <= err16_threshold) {

+      fprintf(stderr, "Comparison PASSED\n");

+    } else {

+      fprintf(stderr, "*** Comparison FAILED ***"

+      " (thresholds were %f %f)\n", err4_threshold, err16_threshold);

+      return EXIT_FAILURE;

+    }

+  }

+  return EXIT_SUCCESS;

+}

--

⑨