shithub: opus

--- a/silk/VQ_WMat_EC.c

+++ b/silk/VQ_WMat_EC.c

@@ -34,85 +34,89 @@

 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */

 void silk_VQ_WMat_EC_c(

     opus_int8                   *ind,                           /* O    index of best codebook vector               */

-    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */

-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */

-    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */

-    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */

+    opus_int32                  *res_nrg_Q15,					/* O    best residual energy						*/

+    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate							*/

+    const opus_int32            *XX_Q17,						/* I    correlation matrix                          */

+    const opus_int32            *xX_Q17,						/* I    correlation vector							*/

     const opus_int8             *cb_Q7,                         /* I    codebook                                    */

-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */

     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */

-    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */

-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */

-    opus_int                    L                               /* I    number of vectors in codebook               */

+    const opus_int              subfr_len,						/* I    number of samples per subframe				*/

+    const opus_int              L                               /* I    number of vectors in codebook               */

-    opus_int   k, gain_tmp_Q7;

+    opus_int   k;

     const opus_int8 *cb_row_Q7;

-    opus_int16 diff_Q14[ 5 ];

-    opus_int32 sum1_Q14, sum2_Q16;

+	opus_int32 neg_xX_Q24[ 5 ];

+    opus_int32 sum1_Q15, sum2_Q24, sum1_best_Q15;

+	opus_int32 bits_res_Q8, bits_tot_Q8;

+	/* Negate and convert to new Q domain */

+	neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 );

+	neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 );

+	neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 );

+	neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 );

+	neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 );

     /* Loop over codebook */

-    *rate_dist_Q14 = silk_int32_MAX;

+    *rate_dist_Q8 = silk_int32_MAX;

+	*res_nrg_Q15 = silk_int32_MAX;

+	sum1_best_Q15 = silk_int32_MAX;

     cb_row_Q7 = cb_Q7;

     for( k = 0; k < L; k++ ) {

-        gain_tmp_Q7 = cb_gain_Q7[k];

-        diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );

-        diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );

-        diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );

-        diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );

-        diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );

         /* Weighted rate */

-        sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );

+		/* Quantization error: 1 - 2* xX * cb + cb' * XX * cb */

+		sum1_Q15 = SILK_FIX_CONST( 1.0001, 15 );

-        /* Penalty for too large gain */

-        sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );

+        /* first row of XX_Q17 */

+        sum2_Q24 = silk_MLA( neg_xX_Q24[ 0 ], XX_Q17[  1 ], cb_row_Q7[ 1 ] );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  2 ], cb_row_Q7[ 2 ] );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  3 ], cb_row_Q7[ 3 ] );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  4 ], cb_row_Q7[ 4 ] );

+        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  0 ], cb_row_Q7[ 0 ] );

+        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 0 ] );

-        silk_assert( sum1_Q14 >= 0 );

+        /* second row of XX_Q17 */

+        sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[  7 ], cb_row_Q7[ 2 ] );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  8 ], cb_row_Q7[ 3 ] );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  9 ], cb_row_Q7[ 4 ] );

+        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  6 ], cb_row_Q7[ 1 ] );

+        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 1 ] );

-        /* first row of W_Q18 */

-        sum2_Q16 = silk_SMULWB(           W_Q18[  1 ], diff_Q14[ 1 ] );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  2 ], diff_Q14[ 2 ] );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  3 ], diff_Q14[ 3 ] );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  4 ], diff_Q14[ 4 ] );

-        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  0 ], diff_Q14[ 0 ] );

-        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 0 ] );

+        /* third row of XX_Q17 */

+        sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 14 ], cb_row_Q7[ 4 ] );

+        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 12 ], cb_row_Q7[ 2 ] );

+        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 2 ] );

-        /* second row of W_Q18 */

-        sum2_Q16 = silk_SMULWB(           W_Q18[  7 ], diff_Q14[ 2 ] );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  8 ], diff_Q14[ 3 ] );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  9 ], diff_Q14[ 4 ] );

-        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  6 ], diff_Q14[ 1 ] );

-        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 1 ] );

+        /* fourth row of XX_Q17 */

+        sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] );

+        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 18 ], cb_row_Q7[ 3 ] );

+        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 3 ] );

-        /* third row of W_Q18 */

-        sum2_Q16 = silk_SMULWB(           W_Q18[ 13 ], diff_Q14[ 3 ] );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );

-        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );

-        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 2 ] );

+        /* last row of XX_Q17 */

+		sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 );

+        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 24 ], cb_row_Q7[ 4 ] );

+        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 4 ] );

-        /* fourth row of W_Q18 */

-        sum2_Q16 = silk_SMULWB(           W_Q18[ 19 ], diff_Q14[ 4 ] );

-        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );

-        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );

-        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 3 ] );

+		/* If ever the following assert triggers, increase LTP_CORR_INV_MAX */

+        silk_assert( sum1_Q15 >= 0 );

-        /* last row of W_Q18 */

-        sum2_Q16 = silk_SMULWB(           W_Q18[ 24 ], diff_Q14[ 4 ] );

-        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 4 ] );

-        silk_assert( sum1_Q14 >= 0 );

-        /* find best */

-        if( sum1_Q14 < *rate_dist_Q14 ) {

-            *rate_dist_Q14 = sum1_Q14;

-            *ind = (opus_int8)k;

-            *gain_Q7 = gain_tmp_Q7;

-        }

+		/* find best */

+		if( sum1_Q15 <= sum1_best_Q15 ) {

+			sum1_best_Q15 = sum1_Q15;

+			/* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */

+			bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 ) - (15 << 7) );

+			bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 2 );

+			if( bits_tot_Q8 <= *rate_dist_Q8 ) {

+				*rate_dist_Q8 = bits_tot_Q8;

+				*res_nrg_Q15 = sum1_Q15;

+				*ind = (opus_int8)k;

+			}

+		}

         /* Go to next cbk vector */

         cb_row_Q7 += LTP_ORDER;

--- a/silk/control_codec.c

+++ b/silk/control_codec.c

@@ -293,13 +293,10 @@

             psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );

         if( psEnc->sCmn.fs_kHz == 16 ) {

-            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 );

             psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;

         } else if( psEnc->sCmn.fs_kHz == 12 ) {

-            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 );

             psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;

         } else {

-            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 );

             psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;

--- a/silk/fixed/corrMatrix_FIX.c

+++ b/silk/fixed/corrMatrix_FIX.c

@@ -58,7 +58,7 @@

         for( lag = 0; lag < order; lag++ ) {

             inner_prod = 0;

             for( i = 0; i < L; i++ ) {

-                inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );

+                inner_prod = silk_ADD_RSHIFT32( inner_prod, silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );

             Xt[ lag ] = inner_prod; /* X[:,lag]'*t */

             ptr1--; /* Go to next column of X */

@@ -77,61 +77,54 @@

     const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */

     const opus_int                  L,                                      /* I    Length of vectors                                                           */

     const opus_int                  order,                                  /* I    Max lag for correlation                                                     */

-    const opus_int                  head_room,                              /* I    Desired headroom                                                            */

     opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */

-    opus_int                        *rshifts,                               /* I/O  Right shifts of correlations                                                */

+	opus_int32                      *nrg,									/* O	Energy of x vector															*/

+    opus_int                        *rshifts,                               /* O	Right shifts of correlations and energy                                     */

     int                             arch                                    /* I    Run-time architecture                                                       */

-    opus_int         i, j, lag, rshifts_local, head_room_rshifts;

+    opus_int         i, j, lag;

     opus_int32       energy;

     const opus_int16 *ptr1, *ptr2;

     /* Calculate energy to find shift used to fit in 32 bits */

-    silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 );

-    /* Add shifts to get the desired head room */

-    head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 );

+    silk_sum_sqr_shift( nrg, rshifts, x, L + order - 1 );

+	energy = *nrg;

-    energy = silk_RSHIFT32( energy, head_room_rshifts );

-    rshifts_local += head_room_rshifts;

-    /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */

+	/* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */

     /* Remove contribution of first order - 1 samples */

     for( i = 0; i < order - 1; i++ ) {

-        energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local );

+        energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), *rshifts );

-    if( rshifts_local < *rshifts ) {

-        /* Adjust energy */

-        energy = silk_RSHIFT32( energy, *rshifts - rshifts_local );

-        rshifts_local = *rshifts;

-    }

     /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */

     /* Fill out the diagonal of the correlation matrix */

     matrix_ptr( XX, 0, 0, order ) = energy;

+	silk_assert( energy >= 0 );

     ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */

     for( j = 1; j < order; j++ ) {

-        energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) );

-        energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) );

+        energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), *rshifts ) );

+        energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), *rshifts ) );

         matrix_ptr( XX, j, j, order ) = energy;

+		silk_assert( energy >= 0 );

     ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */

     /* Calculate the remaining elements of the correlation matrix */

-    if( rshifts_local > 0 ) {

+    if( *rshifts > 0 ) {

         /* Right shifting used */

         for( lag = 1; lag < order; lag++ ) {

             /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */

             energy = 0;

             for( i = 0; i < L; i++ ) {

-                energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local );

+                energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), *rshifts );

             /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */

             matrix_ptr( XX, lag, 0, order ) = energy;

             matrix_ptr( XX, 0, lag, order ) = energy;

             for( j = 1; j < ( order - lag ); j++ ) {

-                energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) );

-                energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) );

+                energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), *rshifts ) );

+                energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), *rshifts ) );

                 matrix_ptr( XX, lag + j, j, order ) = energy;

                 matrix_ptr( XX, j, lag + j, order ) = energy;

@@ -153,6 +146,5 @@

             ptr2--;/* Update pointer to first sample of next column (lag) in X */

-    *rshifts = rshifts_local;

--- a/silk/fixed/find_LTP_FIX.c

+++ b/silk/fixed/find_LTP_FIX.c

@@ -32,214 +32,68 @@

 #include "main_FIX.h"

 #include "tuning_parameters.h"

-/* Head room for correlations */

-#define LTP_CORRS_HEAD_ROOM                             2

-void silk_fit_LTP(

-    opus_int32 LTP_coefs_Q16[ LTP_ORDER ],

-    opus_int16 LTP_coefs_Q14[ LTP_ORDER ]

-);

 void silk_find_LTP_FIX(

-    opus_int16                      b_Q14[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                                                   */

-    opus_int32                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization                                           */

-    opus_int                        *LTPredCodGain_Q7,                      /* O    LTP coding gain                                                             */

-    const opus_int16                r_lpc[],                                /* I    residual signal after LPC signal + state for first 10 ms                    */

+    opus_int32                      XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Correlation matrix												*/

+    opus_int32                      xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ],	/* O    Correlation vector															*/

+    const opus_int16                r_ptr[],                                /* I    Residual signal after LPC								                    */

     const opus_int                  lag[ MAX_NB_SUBFR ],                    /* I    LTP lags                                                                    */

-    const opus_int32                Wght_Q15[ MAX_NB_SUBFR ],               /* I    weights                                                                     */

-    const opus_int                  subfr_length,                           /* I    subframe length                                                             */

-    const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */

-    const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */

-    opus_int                        corr_rshifts[ MAX_NB_SUBFR ],           /* O    right shifts applied to correlations                                        */

+    const opus_int                  subfr_length,                           /* I    Subframe length                                                             */

+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */

     int                             arch                                    /* I    Run-time architecture                                                       */

-    opus_int   i, k, lshift;

-    const opus_int16 *r_ptr, *lag_ptr;

-    opus_int16 *b_Q14_ptr;

+    opus_int   i, k, extra_shifts;

+	opus_int   xx_shifts, xX_shifts, XX_shifts;

+    const opus_int16 *lag_ptr;

+    opus_int32 *XXLTP_Q17_ptr, *xXLTP_Q17_ptr;

+	opus_int32 xx, nrg, temp;

-    opus_int32 regu;

-    opus_int32 *WLTP_ptr;

-    opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26;

-    opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits;

-    opus_int32 temp32, denom32;

-    opus_int   extra_shifts;

-    opus_int   rr_shifts, maxRshifts, maxRshifts_wxtra, LZs;

-    opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16;

-    opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];

-    opus_int32 wd, m_Q12;

-    b_Q14_ptr = b_Q14;

-    WLTP_ptr  = WLTP;

-    r_ptr     = &r_lpc[ mem_offset ];

+    xXLTP_Q17_ptr = xXLTP_Q17;

+    XXLTP_Q17_ptr = XXLTP_Q17;

     for( k = 0; k < nb_subfr; k++ ) {

         lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );

-        silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */

+        silk_sum_sqr_shift( &xx, &xx_shifts, r_ptr, subfr_length );									/* xx in Q( -xx_shifts ) */

+        silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, XXLTP_Q17_ptr, &nrg, &XX_shifts, arch );	/* XXLTP_Q17_ptr and nrg in Q( -XX_shifts ) */

+		extra_shifts = xx_shifts - XX_shifts;

+		if( extra_shifts > 0 ) {

+			/* Shift XX */

+			xX_shifts = xx_shifts;

+			for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {

+				XXLTP_Q17_ptr[ i ] = silk_RSHIFT32( XXLTP_Q17_ptr[ i ], extra_shifts );				/* Q( -xX_shifts ) */

+			}

+			nrg = silk_RSHIFT32( nrg, extra_shifts );												/* Q( -xX_shifts ) */

+		} else if( extra_shifts < 0 ) {

+			/* Shift xx */

+			xX_shifts = XX_shifts;

+			xx = silk_RSHIFT32( xx, -extra_shifts );												/* Q( -xX_shifts ) */

+		} else {

+			xX_shifts = xx_shifts;

+		}

+        silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xXLTP_Q17_ptr, xX_shifts, arch );	/* xXLTP_Q17_ptr in Q( -xX_shifts ) */

-        /* Assure headroom */

-        LZs = silk_CLZ32( rr[k] );

-        if( LZs < LTP_CORRS_HEAD_ROOM ) {

-            rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs );

-            rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );

-        }

-        corr_rshifts[ k ] = rr_shifts;

-        silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch );  /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */

-        /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */

-        silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch );  /* Rr_fix_ptr   in Q( -corr_rshifts[ k ] ) */

-        if( corr_rshifts[ k ] > rr_shifts ) {

-            rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */

-        }

-        silk_assert( rr[ k ] >= 0 );

-        regu = 1;

-        regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );

-        regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );

-        regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );

-        silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER );

-        silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */

-        /* Limit and store in Q14 */

-        silk_fit_LTP( b_Q16, b_Q14_ptr );

-        /* Calculate residual energy */

-        nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */

-        /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */

-        extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM );

-        denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */

-            silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts );    /* Q( -corr_rshifts[ k ] + extra_shifts ) */

-        denom32 = silk_max( denom32, 1 );

-        silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX );                       /* Wght always < 0.5 in Q0 */

-        temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 );             /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */

-        temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 );               /* Q26 */

-        /* Limit temp such that the below scaling never wraps around */

-        WLTP_max = 0;

-        for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {

-            WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max );

-        }

-        lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */

-        silk_assert( 26 - 18 + lshift >= 0 );

-        if( 26 - 18 + lshift < 31 ) {

-            temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) );

-        }

-        silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */

-        w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */

-        silk_assert( w[k] >= 0 );

-        r_ptr     += subfr_length;

-        b_Q14_ptr += LTP_ORDER;

-        WLTP_ptr  += LTP_ORDER * LTP_ORDER;

-    }

-    maxRshifts = 0;

-    for( k = 0; k < nb_subfr; k++ ) {

-        maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts );

-    }

-    /* Compute LTP coding gain */

-    if( LTPredCodGain_Q7 != NULL ) {

-        LPC_LTP_res_nrg = 0;

-        LPC_res_nrg     = 0;

-        silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */

-        for( k = 0; k < nb_subfr; k++ ) {

-            LPC_res_nrg     = silk_ADD32( LPC_res_nrg,     silk_RSHIFT( silk_ADD32( silk_SMULWB(  rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */

-            LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */

-        }

-        LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */

-        div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 );

-        *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) );

-        silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) );

-    }

-    /* smoothing */

-    /* d = sum( B, 1 ); */

-    b_Q14_ptr = b_Q14;

-    for( k = 0; k < nb_subfr; k++ ) {

-        d_Q14[ k ] = 0;

-        for( i = 0; i < LTP_ORDER; i++ ) {

-            d_Q14[ k ] += b_Q14_ptr[ i ];

-        }

-        b_Q14_ptr += LTP_ORDER;

-    }

-    /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */

-    /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */

-    max_abs_d_Q14 = 0;

-    max_w_bits    = 0;

-    for( k = 0; k < nb_subfr; k++ ) {

-        max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) );

-        /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */

-        /* Find bits needed in Q( 18 - maxRshifts ) */

-        max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts );

-    }

-    /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */

-    silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) );

-    /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */

-    extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14;

-    /* Subtract what we got available; bits in output var plus maxRshifts */

-    extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */

-    extra_shifts = silk_max_int( extra_shifts, 0 );

-    maxRshifts_wxtra = maxRshifts + extra_shifts;

-    temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */

-    wd = 0;

-    for( k = 0; k < nb_subfr; k++ ) {

-        /* w has at least 2 bits of headroom so no overflow should happen */

-        temp32 = silk_ADD32( temp32,                     silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) );                      /* Q( 18 - maxRshifts_wxtra ) */

-        wd     = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */

-    }

-    m_Q12 = silk_DIV32_varQ( wd, temp32, 12 );

-    b_Q14_ptr = b_Q14;

-    for( k = 0; k < nb_subfr; k++ ) {

-        /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */

-        if( 2 - corr_rshifts[k] > 0 ) {

-            temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] );

-        } else {

-            temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 );

-        }

-        g_Q26 = silk_MUL(

-            silk_DIV32(

-                SILK_FIX_CONST( LTP_SMOOTHING, 26 ),

-                silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ),                          /* Q10 */

-            silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) );    /* Q16 */

-        temp32 = 0;

-        for( i = 0; i < LTP_ORDER; i++ ) {

-            delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 );     /* 1638_Q14 = 0.1_Q0 */

-            temp32 += delta_b_Q14[ i ];                                 /* Q14 */

-        }

-        temp32 = silk_DIV32( g_Q26, temp32 );                           /* Q14 -> Q12 */

-        for( i = 0; i < LTP_ORDER; i++ ) {

-            b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 );

-        }

-        b_Q14_ptr += LTP_ORDER;

-    }

-}

-void silk_fit_LTP(

-    opus_int32 LTP_coefs_Q16[ LTP_ORDER ],

-    opus_int16 LTP_coefs_Q14[ LTP_ORDER ]

-)

-{

-    opus_int i;

-    for( i = 0; i < LTP_ORDER; i++ ) {

-        LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) );

+		/* At this point all correlations are in Q(-xX_shifts) */

+		temp = silk_SMLAWB( 1, nrg, SILK_FIX_CONST( LTP_CORR_INV_MAX, 16 ) );

+		temp = silk_max( temp, xx );

+TIC(div)

+#if 0

+		for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {

+			XXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( XXLTP_Q17_ptr[ i ], temp, 17 );

+		}

+		for( i = 0; i < LTP_ORDER; i++ ) {

+			xXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( xXLTP_Q17_ptr[ i ], temp, 17 );

+		}

+#else

+		for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {

+			XXLTP_Q17_ptr[ i ] = (opus_int32)( ( ((opus_int64)XXLTP_Q17_ptr[ i ]) << 17 ) / temp );

+		}

+		for( i = 0; i < LTP_ORDER; i++ ) {

+			xXLTP_Q17_ptr[ i ] = (opus_int32)( ( ((opus_int64)xXLTP_Q17_ptr[ i ]) << 17 ) / temp );

+		}

+#endif

+TOC(div)

+		r_ptr         += subfr_length;

+        XXLTP_Q17_ptr += LTP_ORDER * LTP_ORDER;

+        xXLTP_Q17_ptr += LTP_ORDER;

--- a/silk/fixed/find_pred_coefs_FIX.c

+++ b/silk/fixed/find_pred_coefs_FIX.c

@@ -41,13 +41,12 @@

     opus_int         i;

-    opus_int32       invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];

+    opus_int32       invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ];

     opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];

     const opus_int16 *x_ptr;

     opus_int16       *x_pre_ptr;

     VARDECL( opus_int16, LPC_in_pre );

-    opus_int32       tmp, min_gain_Q16, minInvGain_Q30;

-    opus_int         LTP_corrs_rshift[ MAX_NB_SUBFR ];

+    opus_int32       min_gain_Q16, minInvGain_Q30;

     SAVE_STACK;

     /* weighting for weighted least squares */

@@ -61,13 +60,11 @@

         /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */

         invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 );

-        /* Ensure Wght_Q15 a minimum value 1 */

-        invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 );

+        /* Limit inverse */

+        invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 100 );

         /* Square the inverted gains */

         silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) );

-        tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] );

-        Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 );

         /* Invert the inverted and normalized gains */

         local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );

@@ -77,7 +74,8 @@

            psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder

                + psEnc->sCmn.frame_length, opus_int16 );

     if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

-        VARDECL( opus_int32, WLTP );

+        VARDECL( opus_int32, xXLTP_Q17 );

+        VARDECL( opus_int32, XXLTP_Q17 );

         /**********/

         /* VOICED */

@@ -84,17 +82,16 @@

         /**********/

         silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );

-        ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );

+        ALLOC( xXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER, opus_int32 );

+        ALLOC( XXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );

         /* LTP analysis */

-        silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,

-            res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,

-            psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch );

+        silk_find_LTP_FIX( XXLTP_Q17, xXLTP_Q17, &res_pitch[ psEnc->sCmn.ltp_mem_length ],

+			psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );

         /* Quantize LTP gain parameters */

         silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,

-            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr,

-            psEnc->sCmn.arch);

+            &psEncCtrl->LTPredCodGain_Q7, XXLTP_Q17, xXLTP_Q17, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );

         /* Control LTP scaling */

         silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );

@@ -119,7 +116,6 @@

         silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );

         psEncCtrl->LTPredCodGain_Q7 = 0;

-        psEnc->sCmn.sum_log_gain_Q7 = 0;

     /* Limit on total predictive coding gain */

--- a/silk/fixed/main_FIX.h

+++ b/silk/fixed/main_FIX.h

@@ -168,16 +168,12 @@

 /* LTP analysis */

 void silk_find_LTP_FIX(

-    opus_int16                      b_Q14[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                                                   */

-    opus_int32                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization                                           */

-    opus_int                        *LTPredCodGain_Q7,                      /* O    LTP coding gain                                                             */

-    const opus_int16                r_lpc[],                                /* I    residual signal after LPC signal + state for first 10 ms                    */

+    opus_int32                      XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Correlation matrix												*/

+    opus_int32                      xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ],	/* O    Correlation vector															*/

+    const opus_int16                r_lpc[],                                /* I    Residual signal after LPC								                    */

     const opus_int                  lag[ MAX_NB_SUBFR ],                    /* I    LTP lags                                                                    */

-    const opus_int32                Wght_Q15[ MAX_NB_SUBFR ],               /* I    weights                                                                     */

-    const opus_int                  subfr_length,                           /* I    subframe length                                                             */

-    const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */

-    const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */

-    opus_int                        corr_rshifts[ MAX_NB_SUBFR ],           /* O    right shifts applied to correlations                                        */

+    const opus_int                  subfr_length,                           /* I    Subframe length                                                             */

+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */

     int                             arch                                    /* I    Run-time architecture                                                       */

);

@@ -231,9 +227,9 @@

     const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */

     const opus_int                  L,                                      /* I    Length of vectors                                                           */

     const opus_int                  order,                                  /* I    Max lag for correlation                                                     */

-    const opus_int                  head_room,                              /* I    Desired headroom                                                            */

     opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */

-    opus_int                        *rshifts,                               /* I/O  Right shifts of correlations                                                */

+	opus_int32                      *nrg,									/* O	Energy of x vector															*/

+    opus_int                        *rshifts,                               /* O    Right shifts of correlations                                                */

     int                              arch                                   /* I    Run-time architecture                                                       */

);

@@ -246,22 +242,6 @@

     opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */

     const opus_int                  rshifts,                                /* I    Right shifts of correlations                                                */

     int                             arch                                    /* I    Run-time architecture                                                       */

-);

-/* Add noise to matrix diagonal */

-void silk_regularize_correlations_FIX(

-    opus_int32                      *XX,                                    /* I/O  Correlation matrices                                                        */

-    opus_int32                      *xx,                                    /* I/O  Correlation values                                                          */

-    opus_int32                      noise,                                  /* I    Noise to add                                                                */

-    opus_int                        D                                       /* I    Dimension of XX                                                             */

-);

-/* Solves Ax = b, assuming A is symmetric */

-void silk_solve_LDL_FIX(

-    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */

-    opus_int                        M,                                      /* I    Size of matrix                                                              */

-    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */

-    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */

);

 #ifndef FORCE_CPP_BUILD

--- a/silk/fixed/solve_LS_FIX.c

+++ /dev/null

@@ -1,249 +1,0 @@

-/***********************************************************************

-Copyright (c) 2006-2011, Skype Limited. All rights reserved.

-Redistribution and use in source and binary forms, with or without

-modification, are permitted provided that the following conditions

-are met:

-- Redistributions of source code must retain the above copyright notice,

-this list of conditions and the following disclaimer.

-- Redistributions in binary form must reproduce the above copyright

-notice, this list of conditions and the following disclaimer in the

-documentation and/or other materials provided with the distribution.

-- Neither the name of Internet Society, IETF or IETF Trust, nor the

-names of specific contributors, may be used to endorse or promote

-products derived from this software without specific prior written

-permission.

-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

-POSSIBILITY OF SUCH DAMAGE.

-***********************************************************************/

-#ifdef HAVE_CONFIG_H

-#include "config.h"

-#endif

-#include "main_FIX.h"

-#include "stack_alloc.h"

-#include "tuning_parameters.h"

-/*****************************/

-/* Internal function headers */

-/*****************************/

-typedef struct {

-    opus_int32 Q36_part;

-    opus_int32 Q48_part;

-} inv_D_t;

-/* Factorize square matrix A into LDL form */

-static OPUS_INLINE void silk_LDL_factorize_FIX(

-    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */

-    opus_int            M,          /* I   Size of Matrix                                               */

-    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */

-    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */

-);

-/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */

-static OPUS_INLINE void silk_LS_SolveFirst_FIX(

-    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */

-    opus_int            M,          /* I    Dim of Matrix equation                                      */

-    const opus_int32    *b,         /* I    b Vector                                                    */

-    opus_int32          *x_Q16      /* O    x Vector                                                    */

-);

-/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */

-static OPUS_INLINE void silk_LS_SolveLast_FIX(

-    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */

-    const opus_int      M,          /* I    Dim of Matrix equation                                      */

-    const opus_int32    *b,         /* I    b Vector                                                    */

-    opus_int32          *x_Q16      /* O    x Vector                                                    */

-);

-static OPUS_INLINE void silk_LS_divide_Q16_FIX(

-    opus_int32          T[],        /* I/O  Numenator vector                                            */

-    inv_D_t             *inv_D,     /* I    1 / D vector                                                */

-    opus_int            M           /* I    dimension                                                   */

-);

-/* Solves Ax = b, assuming A is symmetric */

-void silk_solve_LDL_FIX(

-    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */

-    opus_int                        M,                                      /* I    Size of matrix                                                              */

-    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */

-    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */

-)

-{

-    VARDECL( opus_int32, L_Q16 );

-    opus_int32 Y[      MAX_MATRIX_SIZE ];

-    inv_D_t   inv_D[  MAX_MATRIX_SIZE ];

-    SAVE_STACK;

-    silk_assert( M <= MAX_MATRIX_SIZE );

-    ALLOC( L_Q16, M * M, opus_int32 );

-    /***************************************************

-    Factorize A by LDL such that A = L*D*L',

-    where L is lower triangular with ones on diagonal

-    ****************************************************/

-    silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );

-    /****************************************************

-    * substitute D*L'*x = Y. ie:

-    L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b

-    ******************************************************/

-    silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );

-    /****************************************************

-    D*L'*x = Y <=> L'*x = inv(D)*Y, because D is

-    diagonal just multiply with 1/d_i

-    ****************************************************/

-    silk_LS_divide_Q16_FIX( Y, inv_D, M );

-    /****************************************************

-    x = inv(L') * inv(D) * Y

-    *****************************************************/

-    silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );

-    RESTORE_STACK;

-}

-static OPUS_INLINE void silk_LDL_factorize_FIX(

-    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */

-    opus_int            M,          /* I   Size of Matrix                                               */

-    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */

-    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */

-)

-{

-    opus_int   i, j, k, status, loop_count;

-    const opus_int32 *ptr1, *ptr2;

-    opus_int32 diag_min_value, tmp_32, err;

-    opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];

-    opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;

-    silk_assert( M <= MAX_MATRIX_SIZE );

-    status = 1;

-    diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );

-    for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {

-        status = 0;

-        for( j = 0; j < M; j++ ) {

-            ptr1 = matrix_adr( L_Q16, j, 0, M );

-            tmp_32 = 0;

-            for( i = 0; i < j; i++ ) {

-                v_Q0[ i ] = silk_SMULWW(         D_Q0[ i ], ptr1[ i ] ); /* Q0 */

-                tmp_32    = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */

-            }

-            tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );

-            if( tmp_32 < diag_min_value ) {

-                tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );

-                /* Matrix not positive semi-definite, or ill conditioned */

-                for( i = 0; i < M; i++ ) {

-                    matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );

-                }

-                status = 1;

-                break;

-            }

-            D_Q0[ j ] = tmp_32;                         /* always < max(Correlation) */

-            /* two-step division */

-            one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 );                    /* Q36 */

-            one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 );                   /* Q40 */

-            err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) );     /* Q24 */

-            one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 );                 /* Q48 */

-            /* Save 1/Ds */

-            inv_D[ j ].Q36_part = one_div_diag_Q36;

-            inv_D[ j ].Q48_part = one_div_diag_Q48;

-            matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */

-            ptr1 = matrix_adr( A, j, 0, M );

-            ptr2 = matrix_adr( L_Q16, j + 1, 0, M );

-            for( i = j + 1; i < M; i++ ) {

-                tmp_32 = 0;

-                for( k = 0; k < j; k++ ) {

-                    tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */

-                }

-                tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */

-                /* tmp_32 / D_Q0[j] : Divide to Q16 */

-                matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),

-                    silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );

-                /* go to next column */

-                ptr2 += M;

-            }

-        }

-    }

-    silk_assert( status == 0 );

-}

-static OPUS_INLINE void silk_LS_divide_Q16_FIX(

-    opus_int32          T[],        /* I/O  Numenator vector                                            */

-    inv_D_t             *inv_D,     /* I    1 / D vector                                                */

-    opus_int            M           /* I    dimension                                                   */

-)

-{

-    opus_int   i;

-    opus_int32 tmp_32;

-    opus_int32 one_div_diag_Q36, one_div_diag_Q48;

-    for( i = 0; i < M; i++ ) {

-        one_div_diag_Q36 = inv_D[ i ].Q36_part;

-        one_div_diag_Q48 = inv_D[ i ].Q48_part;

-        tmp_32 = T[ i ];

-        T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );

-    }

-}

-/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */

-static OPUS_INLINE void silk_LS_SolveFirst_FIX(

-    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */

-    opus_int            M,          /* I    Dim of Matrix equation                                      */

-    const opus_int32    *b,         /* I    b Vector                                                    */

-    opus_int32          *x_Q16      /* O    x Vector                                                    */

-)

-{

-    opus_int i, j;

-    const opus_int32 *ptr32;

-    opus_int32 tmp_32;

-    for( i = 0; i < M; i++ ) {

-        ptr32 = matrix_adr( L_Q16, i, 0, M );

-        tmp_32 = 0;

-        for( j = 0; j < i; j++ ) {

-            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );

-        }

-        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );

-    }

-}

-/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */

-static OPUS_INLINE void silk_LS_SolveLast_FIX(

-    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */

-    const opus_int      M,          /* I    Dim of Matrix equation                                      */

-    const opus_int32    *b,         /* I    b Vector                                                    */

-    opus_int32          *x_Q16      /* O    x Vector                                                    */

-)

-{

-    opus_int i, j;

-    const opus_int32 *ptr32;

-    opus_int32 tmp_32;

-    for( i = M - 1; i >= 0; i-- ) {

-        ptr32 = matrix_adr( L_Q16, 0, i, M );

-        tmp_32 = 0;

-        for( j = M - 1; j > i; j-- ) {

-            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );

-        }

-        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );

-    }

-}

--- a/silk/float/find_LTP_FLP.c

+++ b/silk/float/find_LTP_FLP.c

@@ -33,100 +33,32 @@

 #include "tuning_parameters.h"

 void silk_find_LTP_FLP(

-    silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */

-    silk_float                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */

-    silk_float                      *LTPredCodGain,                     /* O    LTP coding gain                             */

-    const silk_float                r_lpc[],                            /* I    LPC residual                                */

+    silk_float                      XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */

+    silk_float                      xX[ MAX_NB_SUBFR * LTP_ORDER ],		/* O    Weight for LTP quantization       */

+    const silk_float                r_ptr[],                            /* I    LPC residual                                */

     const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */

-    const silk_float                Wght[ MAX_NB_SUBFR ],               /* I    Weights                                     */

     const opus_int                  subfr_length,                       /* I    Subframe length                             */

-    const opus_int                  nb_subfr,                           /* I    number of subframes                         */

-    const opus_int                  mem_offset                          /* I    Number of samples in LTP memory             */

+    const opus_int                  nb_subfr                            /* I    number of subframes                         */

-    opus_int   i, k;

-    silk_float *b_ptr, temp, *WLTP_ptr;

-    silk_float LPC_res_nrg, LPC_LTP_res_nrg;

-    silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ];

-    silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu;

-    silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];

-    const silk_float *r_ptr, *lag_ptr;

+    opus_int   k;

+    silk_float *xX_ptr, *XX_ptr;

+    const silk_float *lag_ptr;

+    silk_float xx, temp;

-    b_ptr    = b;

-    WLTP_ptr = WLTP;

-    r_ptr    = &r_lpc[ mem_offset ];

+    xX_ptr = xX;

+    XX_ptr = XX;

     for( k = 0; k < nb_subfr; k++ ) {

         lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );

+        silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, XX_ptr );

+        silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xX_ptr );

+        xx = ( silk_float )silk_energy_FLP( r_ptr, subfr_length );

+        temp = 1.0f / silk_max( xx, LTP_CORR_INV_MAX * 0.5f * ( XX_ptr[ 0 ] + XX_ptr[ 24 ] ) + 1.0f );

+        silk_scale_vector_FLP( XX_ptr, temp, LTP_ORDER * LTP_ORDER );

+        silk_scale_vector_FLP( xX_ptr, temp, LTP_ORDER );

-        silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr );

-        silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr );

-        rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length );

-        regu = 1.0f + rr[ k ] +

-            matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) +

-            matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER );

-        regu *= LTP_DAMPING / 3;

-        silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER );

-        silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr );

-        /* Calculate residual energy */

-        nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER );

-        temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length );

-        silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER );

-        w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER );

-        r_ptr    += subfr_length;

-        b_ptr    += LTP_ORDER;

-        WLTP_ptr += LTP_ORDER * LTP_ORDER;

-    }

-    /* Compute LTP coding gain */

-    if( LTPredCodGain != NULL ) {

-        LPC_LTP_res_nrg = 1e-6f;

-        LPC_res_nrg     = 0.0f;

-        for( k = 0; k < nb_subfr; k++ ) {

-            LPC_res_nrg     += rr[  k ] * Wght[ k ];

-            LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ];

-        }

-        silk_assert( LPC_LTP_res_nrg > 0 );

-        *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg );

-    }

-    /* Smoothing */

-    /* d = sum( B, 1 ); */

-    b_ptr = b;

-    for( k = 0; k < nb_subfr; k++ ) {

-        d[ k ] = 0;

-        for( i = 0; i < LTP_ORDER; i++ ) {

-            d[ k ] += b_ptr[ i ];

-        }

-        b_ptr += LTP_ORDER;

-    }

-    /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */

-    temp = 1e-3f;

-    for( k = 0; k < nb_subfr; k++ ) {

-        temp += w[ k ];

-    }

-    m = 0;

-    for( k = 0; k < nb_subfr; k++ ) {

-        m += d[ k ] * w[ k ];

-    }

-    m = m / temp;

-    b_ptr = b;

-    for( k = 0; k < nb_subfr; k++ ) {

-        g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] );

-        temp = 0;

-        for( i = 0; i < LTP_ORDER; i++ ) {

-            delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f );

-            temp += delta_b[ i ];

-        }

-        temp = g / temp;

-        for( i = 0; i < LTP_ORDER; i++ ) {

-            b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp;

-        }

-        b_ptr += LTP_ORDER;

+        r_ptr  += subfr_length;

+        XX_ptr += LTP_ORDER * LTP_ORDER;

+        xX_ptr += LTP_ORDER;

--- a/silk/float/find_pred_coefs_FLP.c

+++ b/silk/float/find_pred_coefs_FLP.c

@@ -41,7 +41,8 @@

     opus_int         i;

-    silk_float       WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];

+    silk_float       XXLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];

+    silk_float       xXLTP[ MAX_NB_SUBFR * LTP_ORDER ];

     silk_float       invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ];

     opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];

     const silk_float *x_ptr;

@@ -61,14 +62,13 @@

         /**********/

         silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );

-        /* LTP analysis */

-        silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch,

-            psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length );

+		/* LTP analysis */

+        silk_find_LTP_FLP( XXLTP, xXLTP, &res_pitch[ psEnc->sCmn.ltp_mem_length ],

+            psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );

         /* Quantize LTP gain parameters */

         silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,

-            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr,

-            psEnc->sCmn.arch );

+			&psEncCtrl->LTPredCodGain, XXLTP, xXLTP, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );

         /* Control LTP scaling */

         silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );

@@ -91,7 +91,6 @@

         silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );

         psEncCtrl->LTPredCodGain = 0.0f;

-        psEnc->sCmn.sum_log_gain_Q7 = 0;

     /* Limit on total predictive coding gain */

--- a/silk/float/main_FLP.h

+++ b/silk/float/main_FLP.h

@@ -153,15 +153,12 @@

 /* LTP analysis */

 void silk_find_LTP_FLP(

-    silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */

-    silk_float                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */

-    silk_float                      *LTPredCodGain,                     /* O    LTP coding gain                             */

-    const silk_float                r_lpc[],                            /* I    LPC residual                                */

+    silk_float                      XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */

+    silk_float                      xX[ MAX_NB_SUBFR * LTP_ORDER ],		/* O    Weight for LTP quantization       */

+    const silk_float                r_ptr[],                            /* I    LPC residual                                */

     const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */

-    const silk_float                Wght[ MAX_NB_SUBFR ],               /* I    Weights                                     */

     const opus_int                  subfr_length,                       /* I    Subframe length                             */

-    const opus_int                  nb_subfr,                           /* I    number of subframes                         */

-    const opus_int                  mem_offset                          /* I    Number of samples in LTP memory             */

+    const opus_int                  nb_subfr                            /* I    number of subframes                         */

);

 void silk_LTP_analysis_filter_FLP(

@@ -198,14 +195,14 @@

 /* LTP tap quantizer */

 void silk_quant_LTP_gains_FLP(

-    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */

+    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    Quantized LTP gains							*/

     opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */

     opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */

-    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */

-    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */

-    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */

-    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */

-    const opus_int                  nb_subfr,                           /* I    number of subframes                         */

+	silk_float                      *pred_gain_dB,						/* O	LTP prediction gain							*/

+    const silk_float                XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Correlation matrix					*/

+    const silk_float                xX[ MAX_NB_SUBFR * LTP_ORDER ],		/* I    Correlation vector							*/

+    const opus_int					subfr_len,							/* I    Number of samples per subframe				*/

+    const opus_int					nb_subfr,                           /* I    Number of subframes							*/

     int                             arch                                /* I    Run-time architecture                       */

);

@@ -243,22 +240,6 @@

     const opus_int                  L,                                  /* I    Length of vecors                            */

     const opus_int                  Order,                              /* I    Max lag for correlation                     */

     silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */

-);

-/* Add noise to matrix diagonal */

-void silk_regularize_correlations_FLP(

-    silk_float                      *XX,                                /* I/O  Correlation matrices                        */

-    silk_float                      *xx,                                /* I/O  Correlation values                          */

-    const silk_float                noise,                              /* I    Noise energy to add                         */

-    const opus_int                  D                                   /* I    Dimension of XX                             */

-);

-/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */

-void silk_solve_LDL_FLP(

-    silk_float                      *A,                                 /* I/O  Symmetric square matrix, out: reg.          */

-    const opus_int                  M,                                  /* I    Size of matrix                              */

-    const silk_float                *b,                                 /* I    Pointer to b vector                         */

-    silk_float                      *x                                  /* O    Pointer to x solution vector                */

);

 /* Apply sine window to signal vector.  */

--- a/silk/float/solve_LS_FLP.c

+++ /dev/null

@@ -1,207 +1,0 @@

-/***********************************************************************

-Copyright (c) 2006-2011, Skype Limited. All rights reserved.

-Redistribution and use in source and binary forms, with or without

-modification, are permitted provided that the following conditions

-are met:

-- Redistributions of source code must retain the above copyright notice,

-this list of conditions and the following disclaimer.

-- Redistributions in binary form must reproduce the above copyright

-notice, this list of conditions and the following disclaimer in the

-documentation and/or other materials provided with the distribution.

-- Neither the name of Internet Society, IETF or IETF Trust, nor the

-names of specific contributors, may be used to endorse or promote

-products derived from this software without specific prior written

-permission.

-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

-POSSIBILITY OF SUCH DAMAGE.

-***********************************************************************/

-#ifdef HAVE_CONFIG_H

-#include "config.h"

-#endif

-#include "main_FLP.h"

-#include "tuning_parameters.h"

-/**********************************************************************

- * LDL Factorisation. Finds the upper triangular matrix L and the diagonal

- * Matrix D (only the diagonal elements returned in a vector)such that

- * the symmetric matric A is given by A = L*D*L'.

- **********************************************************************/

-static OPUS_INLINE void silk_LDL_FLP(

-    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */

-    opus_int            M,          /* I    Size of Matrix                                                  */

-    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */

-    silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */

-);

-/**********************************************************************

- * Function to solve linear equation Ax = b, when A is a MxM lower

- * triangular matrix, with ones on the diagonal.

- **********************************************************************/

-static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(

-    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */

-    opus_int            M,          /* I    Dim of Matrix equation                                          */

-    const silk_float    *b,         /* I    b Vector                                                        */

-    silk_float          *x          /* O    x Vector                                                        */

-);

-/**********************************************************************

- * Function to solve linear equation (A^T)x = b, when A is a MxM lower

- * triangular, with ones on the diagonal. (ie then A^T is upper triangular)

- **********************************************************************/

-static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(

-    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */

-    opus_int            M,          /* I    Dim of Matrix equation                                          */

-    const silk_float    *b,         /* I    b Vector                                                        */

-    silk_float          *x          /* O    x Vector                                                        */

-);

-/**********************************************************************

- * Function to solve linear equation Ax = b, when A is a MxM

- * symmetric square matrix - using LDL factorisation

- **********************************************************************/

-void silk_solve_LDL_FLP(

-    silk_float                      *A,                                 /* I/O  Symmetric square matrix, out: reg.          */

-    const opus_int                  M,                                  /* I    Size of matrix                              */

-    const silk_float                *b,                                 /* I    Pointer to b vector                         */

-    silk_float                      *x                                  /* O    Pointer to x solution vector                */

-)

-{

-    opus_int   i;

-    silk_float L[    MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ];

-    silk_float T[    MAX_MATRIX_SIZE ];

-    silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/

-    silk_assert( M <= MAX_MATRIX_SIZE );

-    /***************************************************

-    Factorize A by LDL such that A = L*D*(L^T),

-    where L is lower triangular with ones on diagonal

-    ****************************************************/

-    silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv );

-    /****************************************************

-    * substitute D*(L^T) = T. ie:

-    L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b

-    ******************************************************/

-    silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T );

-    /****************************************************

-    D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is

-    diagonal just multiply with 1/d_i

-    ****************************************************/

-    for( i = 0; i < M; i++ ) {

-        T[ i ] = T[ i ] * Dinv[ i ];

-    }

-    /****************************************************

-    x = inv(L') * inv(D) * T

-    *****************************************************/

-    silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x );

-}

-static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(

-    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */

-    opus_int            M,          /* I    Dim of Matrix equation                                          */

-    const silk_float    *b,         /* I    b Vector                                                        */

-    silk_float          *x          /* O    x Vector                                                        */

-)

-{

-    opus_int   i, j;

-    silk_float temp;

-    const silk_float *ptr1;

-    for( i = M - 1; i >= 0; i-- ) {

-        ptr1 =  matrix_adr( L, 0, i, M );

-        temp = 0;

-        for( j = M - 1; j > i ; j-- ) {

-            temp += ptr1[ j * M ] * x[ j ];

-        }

-        temp = b[ i ] - temp;

-        x[ i ] = temp;

-    }

-}

-static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(

-    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */

-    opus_int            M,          /* I    Dim of Matrix equation                                          */

-    const silk_float    *b,         /* I    b Vector                                                        */

-    silk_float          *x          /* O    x Vector                                                        */

-)

-{

-    opus_int   i, j;

-    silk_float temp;

-    const silk_float *ptr1;

-    for( i = 0; i < M; i++ ) {

-        ptr1 =  matrix_adr( L, i, 0, M );

-        temp = 0;

-        for( j = 0; j < i; j++ ) {

-            temp += ptr1[ j ] * x[ j ];

-        }

-        temp = b[ i ] - temp;

-        x[ i ] = temp;

-    }

-}

-static OPUS_INLINE void silk_LDL_FLP(

-    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */

-    opus_int            M,          /* I    Size of Matrix                                                  */

-    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */

-    silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */

-)

-{

-    opus_int i, j, k, loop_count, err = 1;

-    silk_float *ptr1, *ptr2;

-    double temp, diag_min_value;

-    silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/

-    silk_assert( M <= MAX_MATRIX_SIZE );

-    diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] );

-    for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) {

-        err = 0;

-        for( j = 0; j < M; j++ ) {

-            ptr1 = matrix_adr( L, j, 0, M );

-            temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/

-            for( i = 0; i < j; i++ ) {

-                v[ i ] = ptr1[ i ] * D[ i ];

-                temp  -= ptr1[ i ] * v[ i ];

-            }

-            if( temp < diag_min_value ) {

-                /* Badly conditioned matrix: add white noise and run again */

-                temp = ( loop_count + 1 ) * diag_min_value - temp;

-                for( i = 0; i < M; i++ ) {

-                    matrix_ptr( A, i, i, M ) += ( silk_float )temp;

-                }

-                err = 1;

-                break;

-            }

-            D[ j ]    = ( silk_float )temp;

-            Dinv[ j ] = ( silk_float )( 1.0f / temp );

-            matrix_ptr( L, j, j, M ) = 1.0f;

-            ptr1 = matrix_adr( A, j, 0, M );

-            ptr2 = matrix_adr( L, j + 1, 0, M);

-            for( i = j + 1; i < M; i++ ) {

-                temp = 0.0;

-                for( k = 0; k < j; k++ ) {

-                    temp += ptr2[ k ] * v[ k ];

-                }

-                matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] );

-                ptr2 += M; /* go to next column*/

-            }

-        }

-    }

-    silk_assert( err == 0 );

-}

--- a/silk/float/wrappers_FLP.c

+++ b/silk/float/wrappers_FLP.c

@@ -172,31 +172,34 @@

 /* Floating-point Silk LTP quantiation wrapper */

 /***********************************************/

 void silk_quant_LTP_gains_FLP(

-    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */

+    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    Quantized LTP gains							*/

     opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */

     opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */

-    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */

-    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */

-    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */

-    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */

-    const opus_int                  nb_subfr,                           /* I    number of subframes                         */

+	silk_float                      *pred_gain_dB,						/* O	LTP prediction gain							*/

+    const silk_float                XX[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Correlation matrix					*/

+    const silk_float                xX[ MAX_NB_SUBFR * LTP_ORDER ],		/* I    Correlation vector							*/

+    const opus_int					subfr_len,							/* I    Number of samples per subframe				*/

+    const opus_int					nb_subfr,                           /* I    Number of subframes							*/

     int                             arch                                /* I    Run-time architecture                       */

-    opus_int   i;

+    opus_int   i, pred_gain_dB_Q7;

     opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ];

-    opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ];

+    opus_int32 XX_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];

+    opus_int32 xX_Q17[ MAX_NB_SUBFR * LTP_ORDER ];

-    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {

-        B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f );

-    }

     for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) {

-        W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );

+        XX_Q17[ i ] = (opus_int32)silk_float2int( XX[ i ] * 131072.0f );

+    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {

+        xX_Q17[ i ] = (opus_int32)silk_float2int( xX[ i ] * 131072.0f );

+    }

-    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch );

+    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, &pred_gain_dB_Q7, XX_Q17, xX_Q17, subfr_len, nb_subfr, arch );

     for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {

         B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );

+	*pred_gain_dB = (silk_float)pred_gain_dB_Q7 * ( 1.0f / 128.0f );

--- a/silk/lin2log.c

+++ b/silk/lin2log.c

@@ -41,6 +41,6 @@

     silk_CLZ_FRAC( inLin, &lz, &frac_Q7 );

     /* Piece-wise parabolic approximation */

-    return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 );

+    return silk_ADD_LSHIFT32( silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 ), 31 - lz, 7 );

--- a/silk/main.h

+++ b/silk/main.h

@@ -205,14 +205,14 @@

 /* LTP tap quantizer */

 void silk_quant_LTP_gains(

-    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */

+    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* O	Quantized LTP gains				*/

     opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */

     opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */

-    opus_int32                  *sum_gain_dB_Q7,                            /* I/O  Cumulative max prediction gain  */

-    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */

-    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */

-    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */

-    const opus_int              nb_subfr,                                   /* I    number of subframes             */

+	opus_int	                *pred_gain_dB_Q7,							/* O	LTP prediction gain				*/

+    const opus_int32            XX_Q17[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I    Correlation matrix in Q18       */

+    const opus_int32            xX_Q17[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I    Correlation vector in Q18       */

+    const opus_int              subfr_len,									/* I    Number of samples per subframe  */

+    const opus_int              nb_subfr,                                   /* I    Number of subframes             */

     int                         arch                                        /* I    Run-time architecture           */

);

@@ -219,23 +219,19 @@

 /* Entropy constrained matrix-weighted VQ, for a single input data vector */

 void silk_VQ_WMat_EC_c(

     opus_int8                   *ind,                           /* O    index of best codebook vector               */

-    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */

-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */

-    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */

-    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */

+    opus_int32                  *res_nrg_Q15,					/* O    best residual energy						*/

+    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate							*/

+    const opus_int32            *XX_Q17,						/* I    correlation matrix                          */

+    const opus_int32            *xX_Q17,						/* I    correlation vector							*/

     const opus_int8             *cb_Q7,                         /* I    codebook                                    */

-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */

     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */

-    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */

-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */

-    opus_int                    L                               /* I    number of vectors in codebook               */

+    const opus_int              subfr_len,						/* I    number of samples per subframe				*/

+    const opus_int              L                               /* I    number of vectors in codebook               */

);

 #if !defined(OVERRIDE_silk_VQ_WMat_EC)

-#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

-                          mu_Q9, max_gain_Q7, L, arch) \

-    ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

-                          mu_Q9, max_gain_Q7, L))

+#define silk_VQ_WMat_EC(ind, rate_dist_Q15, gain_Q8, XX_Q17, xX_Q17, cb_Q7, cl_Q5, subfr_len, L, arch) \

+    ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q15, gain_Q8, XX_Q17, xX_Q17, cb_Q7, cl_Q5, subfr_len, L))

 #endif

 /************************************/

--- a/silk/quant_LTP_gains.c

+++ b/silk/quant_LTP_gains.c

@@ -30,17 +30,16 @@

 #endif

 #include "main.h"

-#include "tuning_parameters.h"

 void silk_quant_LTP_gains(

-    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */

+    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* O	Quantized LTP gains				*/

     opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */

     opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */

-    opus_int32                  *sum_log_gain_Q7,                           /* I/O  Cumulative max prediction gain  */

-    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */

-    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */

-    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */

-    const opus_int              nb_subfr,                                   /* I    number of subframes             */

+	opus_int	                *pred_gain_dB_Q7,							/* O	LTP prediction gain				*/

+    const opus_int32            XX_Q17[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I    Correlation matrix in Q18       */

+    const opus_int32            xX_Q17[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I    Correlation vector in Q18       */

+    const opus_int              subfr_len,									/* I    Number of samples per subframe  */

+    const opus_int              nb_subfr,                                   /* I    Number of subframes             */

     int                         arch                                        /* I    Run-time architecture           */

@@ -48,75 +47,54 @@

     opus_int8            temp_idx[ MAX_NB_SUBFR ];

     const opus_uint8     *cl_ptr_Q5;

     const opus_int8      *cbk_ptr_Q7;

-    const opus_uint8     *cbk_gain_ptr_Q7;

     const opus_int16     *b_Q14_ptr;

-    const opus_int32     *W_Q18_ptr;

-    opus_int32           rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;

-    opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;

+    const opus_int32     *XX_Q17_ptr, *xX_Q17_ptr;

+    opus_int32           res_nrg_Q15_subfr, res_nrg_Q15, rate_dist_Q7_subfr, rate_dist_Q7, min_rate_dist_Q7;

     /***************************************************/

     /* iterate over different codebooks with different */

     /* rates/distortions, and choose best */

     /***************************************************/

-    min_rate_dist_Q14 = silk_int32_MAX;

-    best_sum_log_gain_Q7 = 0;

+    min_rate_dist_Q7 = silk_int32_MAX;

     for( k = 0; k < 3; k++ ) {

-        /* Safety margin for pitch gain control, to take into account factors

-           such as state rescaling/rewhitening. */

-        opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 );

         cl_ptr_Q5  = silk_LTP_gain_BITS_Q5_ptrs[ k ];

         cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[        k ];

-        cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ];

         cbk_size   = silk_LTP_vq_sizes[          k ];

         /* Set up pointer to first subframe */

-        W_Q18_ptr = W_Q18;

-        b_Q14_ptr = B_Q14;

+        XX_Q17_ptr = XX_Q17;

+        xX_Q17_ptr = xX_Q17;

+        b_Q14_ptr  = B_Q14;

-        rate_dist_Q14 = 0;

-        sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;

+		res_nrg_Q15 = 0;

+        rate_dist_Q7 = 0;

         for( j = 0; j < nb_subfr; j++ ) {

-            max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 )

-                                        + SILK_FIX_CONST( 7, 7 ) ) - gain_safety;

             silk_VQ_WMat_EC(

                 &temp_idx[ j ],         /* O    index of best codebook vector                           */

-                &rate_dist_Q14_subfr,   /* O    best weighted quantization error + mu * rate            */

-                &gain_Q7,               /* O    sum of absolute LTP coefficients                        */

-                b_Q14_ptr,              /* I    input vector to be quantized                            */

-                W_Q18_ptr,              /* I    weighting matrix                                        */

+				&res_nrg_Q15_subfr,		/* O	residual energy											*/

+                &rate_dist_Q7_subfr,    /* O    best weighted quantization error + mu * rate            */

+                XX_Q17_ptr,             /* I    correlation matrix                                      */

+                xX_Q17_ptr,             /* I    correlation vector										*/

                 cbk_ptr_Q7,             /* I    codebook                                                */

-                cbk_gain_ptr_Q7,        /* I    codebook effective gains                                */

                 cl_ptr_Q5,              /* I    code length for each codebook vector                    */

-                mu_Q9,                  /* I    tradeoff between weighted error and rate                */

-                max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */

+                subfr_len,              /* I    number of samples per subframe			                */

                 cbk_size,               /* I    number of vectors in codebook                           */

                 arch                    /* I    Run-time architecture                                   */

);

-            rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );

-            sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7

-                                + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 ));

+			res_nrg_Q15   = silk_ADD_POS_SAT32( res_nrg_Q15, res_nrg_Q15_subfr );

+            rate_dist_Q7 = silk_ADD_POS_SAT32( rate_dist_Q7, rate_dist_Q7_subfr );

-            b_Q14_ptr += LTP_ORDER;

-            W_Q18_ptr += LTP_ORDER * LTP_ORDER;

+            b_Q14_ptr  += LTP_ORDER;

+            XX_Q17_ptr += LTP_ORDER * LTP_ORDER;

+            xX_Q17_ptr += LTP_ORDER;

-        /* Avoid never finding a codebook */

-        rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 );

-        if( rate_dist_Q14 < min_rate_dist_Q14 ) {

-            min_rate_dist_Q14 = rate_dist_Q14;

+        if( rate_dist_Q7 <= min_rate_dist_Q7 ) {

+            min_rate_dist_Q7 = rate_dist_Q7;

             *periodicity_index = (opus_int8)k;

             silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );

-            best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;

-        /* Break early in low-complexity mode if rate distortion is below threshold */

-        if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) {

-            break;

-        }

     cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ];

@@ -125,5 +103,13 @@

             B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );

-    *sum_log_gain_Q7 = best_sum_log_gain_Q7;

+	if( nb_subfr == 2 ) {

+		res_nrg_Q15 = silk_RSHIFT32( res_nrg_Q15, 1 );

+	} else {

+		res_nrg_Q15 = silk_RSHIFT32( res_nrg_Q15, 2 );

+	}

+	*pred_gain_dB_Q7 = (opus_int)silk_SMULBB( -3, silk_lin2log( res_nrg_Q15 ) - ( 15 << 7 ) );

--- a/silk/structs.h

+++ b/silk/structs.h

@@ -171,7 +171,6 @@

     opus_int                     pitchEstimationLPCOrder;           /* Whitening filter order for pitch estimator                       */

     opus_int32                   pitchEstimationThreshold_Q16;      /* Threshold for pitch estimator                                    */

     opus_int                     LTPQuantLowComplexity;             /* Flag for low complexity LTP quantization                         */

-    opus_int                     mu_LTP_Q9;                         /* Rate-distortion tradeoff in LTP quantization                     */

     opus_int32                   sum_log_gain_Q7;                   /* Cumulative max prediction gain                                   */

     opus_int                     NLSF_MSVQ_Survivors;               /* Number of survivors in NLSF MSVQ                                 */

     opus_int                     first_frame_after_reset;           /* Flag for deactivating NLSF interpolation, pitch prediction       */

--- a/silk/tables.h

+++ b/silk/tables.h

@@ -76,10 +76,7 @@

 extern const opus_uint8  silk_LTP_per_index_iCDF[ 3 ];                                              /*   3 */

 extern const opus_uint8  * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ];                            /*   3 */

 extern const opus_uint8  * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ];                         /*   3 */

-extern const opus_int16  silk_LTP_gain_middle_avg_RD_Q14;

 extern const opus_int8   * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ];                                /* 168 */

-extern const opus_uint8  * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS];

 extern const opus_int8   silk_LTP_vq_sizes[ NB_LTP_CBKS ];                                          /*   3 */

 extern const opus_uint8  silk_LTPscale_iCDF[ 3 ];                                                   /*   4 */

--- a/silk/tables_LTP.c

+++ b/silk/tables_LTP.c

@@ -51,8 +51,6 @@

         24,     20,     16,     12,      9,      5,      2,      0

};

-const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304;

 static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = {

         15,    131,    138,    138,    155,    155,    173,    173

};

@@ -265,30 +263,6 @@

     (opus_int8 *)&silk_LTP_gain_vq_0[0][0],

     (opus_int8 *)&silk_LTP_gain_vq_1[0][0],

     (opus_int8 *)&silk_LTP_gain_vq_2[0][0]

-};

-/* Maximum frequency-dependent response of the pitch taps above,

-   computed as max(abs(freqz(taps))) */

-static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = {

-      46,      2,     90,     87,     93,     91,     82,     98

-};

-static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = {

-     109,    120,    118,     12,    113,    115,    117,    119,

-      99,     59,     87,    111,     63,    111,    112,     80

-};

-static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = {

-     126,    124,    125,    124,    129,    121,    126,     23,

-     132,    127,    127,    127,    126,    127,    122,    133,

-     130,    134,    101,    118,    119,    145,    126,     86,

-     124,    120,    123,    119,    170,    173,    107,    109

-};

-const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = {

-    &silk_LTP_gain_vq_0_gain[0],

-    &silk_LTP_gain_vq_1_gain[0],

-    &silk_LTP_gain_vq_2_gain[0]

};

 const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = {

--- a/silk/tuning_parameters.h

+++ b/silk/tuning_parameters.h

@@ -54,17 +54,7 @@

 #define FIND_LPC_COND_FAC                               1e-5f

 /* LTP analysis defines */

-#define FIND_LTP_COND_FAC                               1e-5f

-#define LTP_DAMPING                                     0.05f

-#define LTP_SMOOTHING                                   0.1f

-/* LTP quantization settings */

-#define MU_LTP_QUANT_NB                                 0.03f

-#define MU_LTP_QUANT_MB                                 0.025f

-#define MU_LTP_QUANT_WB                                 0.02f

-/* Max cumulative LTP gain */

-#define MAX_SUM_LOG_GAIN_DB                             250.0f

+#define LTP_CORR_INV_MAX								0.02f

 /***********************/

 /* High pass filtering */

--- a/silk/x86/main_sse.h

+++ b/silk/x86/main_sse.h

@@ -34,6 +34,7 @@

 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)

+#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */

 #  define OVERRIDE_silk_VQ_WMat_EC

 void silk_VQ_WMat_EC_sse4_1(

@@ -78,6 +79,7 @@

     ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

                           mu_Q9, max_gain_Q7, L))

+#endif

 #endif

 #  define OVERRIDE_silk_NSQ

--- a/silk/x86/x86_silk_map.c

+++ b/silk/x86/x86_silk_map.c

@@ -90,6 +90,7 @@

   MAY_HAVE_SSE4_1( silk_NSQ )  /* avx */

};

+#if 0 /* FIXME: SSE disabled until silk_VQ_WMat_EC_sse4_1() gets updated. */

 void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(

     opus_int8                   *ind,                           /* O    index of best codebook vector               */

     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */

@@ -109,6 +110,7 @@

   MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */

   MAY_HAVE_SSE4_1( silk_VQ_WMat_EC )  /* avx */

};

+#endif

 void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(

     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */

--- a/silk_sources.mk

+++ b/silk_sources.mk

@@ -101,7 +101,6 @@

 silk/fixed/regularize_correlations_FIX.c \

 silk/fixed/residual_energy16_FIX.c \

 silk/fixed/residual_energy_FIX.c \

-silk/fixed/solve_LS_FIX.c \

 silk/fixed/warped_autocorrelation_FIX.c \

 silk/fixed/apply_sine_window_FIX.c \

 silk/fixed/autocorr_FIX.c \

@@ -133,7 +132,6 @@

 silk/float/process_gains_FLP.c \

 silk/float/regularize_correlations_FLP.c \

 silk/float/residual_energy_FLP.c \

-silk/float/solve_LS_FLP.c \

 silk/float/warped_autocorrelation_FLP.c \

 silk/float/wrappers_FLP.c \

 silk/float/autocorrelation_FLP.c \

--

⑨