shithub: opus

--- a/silk/float/SigProc_FLP.h

+++ b/silk/float/SigProc_FLP.h

@@ -74,7 +74,8 @@

     silk_float          *results,           /* O    result (length correlationCount)                            */

     const silk_float    *inputData,         /* I    input data to correlate                                     */

     opus_int            inputDataSize,      /* I    length of input                                             */

-    opus_int            correlationCount    /* I    number of correlation taps to compute                       */

+    opus_int            correlationCount,    /* I    number of correlation taps to compute                       */

+    int                 arch

);

 opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */

@@ -106,7 +107,8 @@

     const silk_float    minInvGain,         /* I    minimum inverse prediction gain                             */

     const opus_int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */

     const opus_int      nb_subfr,           /* I    number of subframes stacked in x                            */

-    const opus_int      D                   /* I    order                                                       */

+    const opus_int      D,                  /* I    order                                                       */

+    int                 arch

);

 /* multiply a vector by a constant */

@@ -132,7 +134,7 @@

);

 #ifndef OVERRIDE_inner_product_FLP

-#define silk_inner_product_FLP(data1, data2, dataSize) silk_inner_product_FLP_c(data1, data2, dataSize)

+#define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,silk_inner_product_FLP_c(data1, data2, dataSize))

 #endif

--- a/silk/float/autocorrelation_FLP.c

+++ b/silk/float/autocorrelation_FLP.c

@@ -37,7 +37,8 @@

     silk_float          *results,           /* O    result (length correlationCount)                            */

     const silk_float    *inputData,         /* I    input data to correlate                                     */

     opus_int            inputDataSize,      /* I    length of input                                             */

-    opus_int            correlationCount    /* I    number of correlation taps to compute                       */

+    opus_int            correlationCount,    /* I    number of correlation taps to compute                       */

+    int                 arch

     opus_int i;

@@ -47,6 +48,6 @@

     for( i = 0; i < correlationCount; i++ ) {

-        results[ i ] =  (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i );

+        results[ i ] =  (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i, arch );

--- a/silk/float/burg_modified_FLP.c

+++ b/silk/float/burg_modified_FLP.c

@@ -42,7 +42,8 @@

     const silk_float    minInvGain,         /* I    minimum inverse prediction gain                             */

     const opus_int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */

     const opus_int      nb_subfr,           /* I    number of subframes stacked in x                            */

-    const opus_int      D                   /* I    order                                                       */

+    const opus_int      D,                  /* I    order                                                       */

+    int                 arch

     opus_int         k, n, s, reached_max_gain;

@@ -60,7 +61,7 @@

     for( s = 0; s < nb_subfr; s++ ) {

         x_ptr = x + s * subfr_length;

         for( n = 1; n < D + 1; n++ ) {

-            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );

+            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n, arch );

     silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );

--- a/silk/float/corrMatrix_FLP.c

+++ b/silk/float/corrMatrix_FLP.c

@@ -41,7 +41,8 @@

     const silk_float                *t,                                 /* I    Target vector [L]                           */

     const opus_int                  L,                                  /* I    Length of vecors                            */

     const opus_int                  Order,                              /* I    Max lag for correlation                     */

-    silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */

+    silk_float                      *Xt,                                /* O    X'*t correlation vector [order]             */

+    int                             arch

     opus_int lag;

@@ -50,7 +51,7 @@

     ptr1 = &x[ Order - 1 ];                     /* Points to first sample of column 0 of X: X[:,0] */

     for( lag = 0; lag < Order; lag++ ) {

         /* Calculate X[:,lag]'*t */

-        Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L );

+        Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L, arch );

         ptr1--;                                 /* Next column of X */

@@ -60,7 +61,8 @@

     const silk_float                *x,                                 /* I    x vector [ L+order-1 ] used to create X     */

     const opus_int                  L,                                  /* I    Length of vectors                           */

     const opus_int                  Order,                              /* I    Max lag for correlation                     */

-    silk_float                      *XX                                 /* O    X'*X correlation matrix [order x order]     */

+    silk_float                      *XX,                                /* O    X'*X correlation matrix [order x order]     */

+    int                             arch

     opus_int j, lag;

@@ -79,7 +81,7 @@

     ptr2 = &x[ Order - 2 ];                     /* First sample of column 1 of X */

     for( lag = 1; lag < Order; lag++ ) {

         /* Calculate X[:,0]'*X[:,lag] */

-        energy = silk_inner_product_FLP( ptr1, ptr2, L );

+        energy = silk_inner_product_FLP( ptr1, ptr2, L, arch );

         matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy;

         matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy;

         /* Calculate X[:,j]'*X[:,j + lag] */

--- a/silk/float/find_LPC_FLP.c

+++ b/silk/float/find_LPC_FLP.c

@@ -38,7 +38,8 @@

     silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */

     opus_int16                      NLSF_Q15[],                         /* O    NLSFs                                       */

     const silk_float                x[],                                /* I    Input signal                                */

-    const silk_float                minInvGain                          /* I    Inverse of max prediction gain              */

+    const silk_float                minInvGain,                         /* I    Inverse of max prediction gain              */

+    int                             arch

     opus_int    k, subfr_length;

@@ -56,12 +57,12 @@

     psEncC->indices.NLSFInterpCoef_Q2 = 4;

     /* Burg AR analysis for the full frame */

-    res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );

+    res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, arch );

     if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {

         /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than        */

         /* adding it to the residual energy of the first 10 ms in each iteration of the search below    */

-        res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder );

+        res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder, arch );

         /* Convert to NLSFs */

         silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder );

--- a/silk/float/find_LTP_FLP.c

+++ b/silk/float/find_LTP_FLP.c

@@ -38,7 +38,8 @@

     const silk_float                r_ptr[],                            /* I    LPC residual                                */

     const opus_int                  lag[ MAX_NB_SUBFR ],                /* I    LTP lags                                    */

     const opus_int                  subfr_length,                       /* I    Subframe length                             */

-    const opus_int                  nb_subfr                            /* I    number of subframes                         */

+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */

+    int                             arch

     opus_int   k;

@@ -50,8 +51,8 @@

     XX_ptr = XX;

     for( k = 0; k < nb_subfr; k++ ) {

         lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );

-        silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, XX_ptr );

-        silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xX_ptr );

+        silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, XX_ptr, arch );

+        silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xX_ptr, arch );

         xx = ( silk_float )silk_energy_FLP( r_ptr, subfr_length + LTP_ORDER );

         temp = 1.0f / silk_max( xx, LTP_CORR_INV_MAX * 0.5f * ( XX_ptr[ 0 ] + XX_ptr[ 24 ] ) + 1.0f );

         silk_scale_vector_FLP( XX_ptr, temp, LTP_ORDER * LTP_ORDER );

--- a/silk/float/find_pitch_lags_FLP.c

+++ b/silk/float/find_pitch_lags_FLP.c

@@ -82,7 +82,7 @@

     silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );

     /* Calculate autocorrelation sequence */

-    silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );

+    silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );

     /* Add white noise, as a fraction of the energy */

     auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1;

--- a/silk/float/find_pred_coefs_FLP.c

+++ b/silk/float/find_pred_coefs_FLP.c

@@ -63,7 +63,7 @@

         celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );

         /* LTP analysis */

-        silk_find_LTP_FLP( XXLTP, xXLTP, res_pitch, psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );

+        silk_find_LTP_FLP( XXLTP, xXLTP, res_pitch, psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );

         /* Quantize LTP gain parameters */

         silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,

@@ -102,7 +102,7 @@

     /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */

-    silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain );

+    silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain, psEnc->sCmn.arch );

     /* Quantize LSFs */

     silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );

--- a/silk/float/main_FLP.h

+++ b/silk/float/main_FLP.h

@@ -138,7 +138,8 @@

     silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */

     opus_int16                      NLSF_Q15[],                         /* O    NLSFs                                       */

     const silk_float                x[],                                /* I    Input signal                                */

-    const silk_float                minInvGain                          /* I    Prediction gain from LTP (dB)               */

+    const silk_float                minInvGain,                         /* I    Prediction gain from LTP (dB)               */

+    int                             arch

);

 /* LTP analysis */

@@ -148,7 +149,8 @@

     const silk_float                r_ptr[],                            /* I    LPC residual                                */

     const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */

     const opus_int                  subfr_length,                       /* I    Subframe length                             */

-    const opus_int                  nb_subfr                            /* I    number of subframes                         */

+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */

+    int                             arch

);

 void silk_LTP_analysis_filter_FLP(

@@ -221,7 +223,8 @@

     const silk_float                *x,                                 /* I    x vector [ L+order-1 ] used to create X     */

     const opus_int                  L,                                  /* I    Length of vectors                           */

     const opus_int                  Order,                              /* I    Max lag for correlation                     */

-    silk_float                      *XX                                 /* O    X'*X correlation matrix [order x order]     */

+    silk_float                      *XX,                                /* O    X'*X correlation matrix [order x order]     */

+    int                             arch

);

 /* Calculates correlation vector X'*t */

@@ -230,7 +233,8 @@

     const silk_float                *t,                                 /* I    Target vector [L]                           */

     const opus_int                  L,                                  /* I    Length of vecors                            */

     const opus_int                  Order,                              /* I    Max lag for correlation                     */

-    silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */

+    silk_float                      *Xt,                                /* O    X'*t correlation vector [order]             */

+    int                             arch

);

 /* Apply sine window to signal vector.  */

--- a/silk/float/noise_shape_analysis_FLP.c

+++ b/silk/float/noise_shape_analysis_FLP.c

@@ -255,7 +255,7 @@

                 psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );

         } else {

             /* Calculate regular auto correlation */

-            silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );

+            silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, psEnc->sCmn.arch );

         /* Add white noise, as a fraction of energy */

--- a/silk/float/pitch_analysis_core_FLP.c

+++ b/silk/float/pitch_analysis_core_FLP.c

@@ -291,7 +291,7 @@

         for( j = 0; j < length_d_comp; j++ ) {

             d = d_comp[ j ];

             basis_ptr = target_ptr - d;

-            cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );

+            cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz, arch );

             if( cross_corr > 0.0f ) {

                 energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );

                 C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );

--- a/silk/x86/main_sse.h

+++ b/silk/x86/main_sse.h

@@ -278,11 +278,18 @@

 #if defined (OPUS_X86_PRESUME_AVX2)

 #define OVERRIDE_inner_product_FLP

-#define silk_inner_product_FLP(data1, data2, dataSize) silk_inner_product_FLP_avx2(data1, data2, dataSize)

+#define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,silk_inner_product_FLP_avx2(data1, data2, dataSize))

 #elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)

-/*#define OVERRIDE_inner_product_FLP*/

+#define OVERRIDE_inner_product_FLP

+extern double (*const SILK_INNER_PRODUCT_FLP_IMPL[OPUS_ARCHMASK + 1])(

+    const silk_float    *data1,

+    const silk_float    *data2,

+    opus_int            dataSize

+);

+#define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,(*SILK_INNER_PRODUCT_FLP_IMPL[(arch) & OPUS_ARCHMASK])(data1, data2, dataSize))

 #endif

--- a/silk/x86/x86_silk_map.c

+++ b/silk/x86/x86_silk_map.c

@@ -32,6 +32,7 @@

 #include "celt/x86/x86cpu.h"

 #include "structs.h"

 #include "SigProc_FIX.h"

+#include "SigProc_FLP.h"

 #include "pitch.h"

 #include "main.h"

@@ -156,4 +157,21 @@

};

 #endif

+#ifndef FIXED_POINT

+double (*const SILK_INNER_PRODUCT_FLP_IMPL[ OPUS_ARCHMASK + 1 ] )(

+    const silk_float    *data1,

+    const silk_float    *data2,

+    opus_int            dataSize

+) = {

+  silk_inner_product_FLP_c,                  /* non-sse */

+  silk_inner_product_FLP_c,

+  silk_inner_product_FLP_c,

+  silk_inner_product_FLP_c, /* sse4.1 */

+  MAY_HAVE_AVX2( silk_inner_product_FLP )  /* avx */

+};

+#endif

 #endif

--

⑨