shithub: aacenc

ref: fc2a0b223565a46e642d762e659e2c589624eb13
dir: /nok_pitch.c/

View raw version
/**************************************************************************

This software module was originally developed by
Nokia in the course of development of the MPEG-2 AAC/MPEG-4 
Audio standard ISO/IEC13818-7, 14496-1, 2 and 3.
This software module is an implementation of a part
of one or more MPEG-2 AAC/MPEG-4 Audio tools as specified by the
MPEG-2 aac/MPEG-4 Audio standard. ISO/IEC  gives users of the
MPEG-2aac/MPEG-4 Audio standards free license to this software module
or modifications thereof for use in hardware or software products
claiming conformance to the MPEG-2 aac/MPEG-4 Audio  standards. Those
intending to use this software module in hardware or software products
are advised that this use may infringe existing patents. The original
developer of this software module, the subsequent
editors and their companies, and ISO/IEC have no liability for use of
this software module or modifications thereof in an
implementation. Copyright is not released for non MPEG-2 aac/MPEG-4
Audio conforming products. The original developer retains full right to
use the code for the developer's own purpose, assign or donate the code to a
third party and to inhibit third party from using the code for non
MPEG-2 aac/MPEG-4 Audio conforming products. This copyright notice
must be included in all copies or derivative works.
Copyright (c)1997.  

***************************************************************************/
/**************************************************************************
  nok_pitch.c  - Long Term Prediction (LTP) subroutines.
 
  Author(s): Juha Ojanpera, Lin Yin
  	     Nokia Research Center, Speech and Audio Systems.
  *************************************************************************/


/**************************************************************************
  Version Control Information			Method: CVS
  Identifiers:
  $Revision: 1.6 $
  $Date: 2000/02/08 07:47:59 $ (check in)
  $Author: menno $
  *************************************************************************/


/**************************************************************************
  External Objects Needed
  *************************************************************************/
/*
  Standard library declarations.  */
#include <math.h>
#include <stdio.h>

/*
  Interface to related modules.  */
#include "tf_main.h"
#include "block.h"
#include "interface.h"
#include "nok_ltp_common.h"
#include "nok_ltp_enc.h"

/*************************************************************************
  External Objects Provided
  *************************************************************************/
#include "nok_pitch.h"



/**************************************************************************
  Internal Objects
  *************************************************************************/
#include "nok_ltp_common_internal.h"

void lnqgj (double (*a)[LPC + 1]);

void w_quantize (double *freq, int *ltp_idx);

/*
  Purpose:	Codebook for LTP weight coefficients.  */

double codebook[CODESIZE] =
{
  0.570829,
  0.696616,
  0.813004,
  0.911304,
  0.984900,
  1.067894,
  1.194601,
  1.369533
};

/**************************************************************************
  Title:	snr_pred

  Purpose:	Determines is it feasible to employ long term prediction in 
                the encoder.

  Usage:        y = snr_pred(mdct_in, mdct_pred, sfb_flag, sfb_offset, 
                             block_type, side_info, num_of_sfb)

  Input:        mdct_in     -  spectral coefficients
                mdct_pred   -  predicted spectral coefficients
                sfb_flag    -  an array of flags indicating whether LTP is 
                               switched on (1) /off (0). One bit is reseved 
                               for each sfb.
                sfb_offset  -  scalefactor boundaries
                block_type  -  window sequence type
		side_info   -  LTP side information
		num_of_sfb  -  number of scalefactor bands

  Output:	y - number of bits saved by using long term prediction     

  References:	-

  Explanation:	-

  Author(s):	Juha Ojanpera, Lin Yin
  *************************************************************************/

double
snr_pred (double *mdct_in,
	  double *mdct_pred,
	  int *sfb_flag,
	  int *sfb_offset,
	  enum WINDOW_TYPE block_type,
	  int side_info,
	  int num_of_sfb
)
{
	int i, j, flen;
	double snr_limit;
	double num_bit, snr[NSFB_LONG];
	double temp1, temp2;
	double energy[BLOCK_LEN_LONG], snr_p[BLOCK_LEN_LONG];


	if (block_type != ONLY_SHORT_WINDOW)
    {
		flen = BLOCK_LEN_LONG;
		snr_limit = 1.e-30;
    }
	else
    {
		flen = BLOCK_LEN_SHORT;
		snr_limit = 1.e-20;
    }

	for (i = 0; i < flen; i++)
    {
		energy[i] = mdct_in[i] * mdct_in[i];
		snr_p[i] = (mdct_in[i] - mdct_pred[i]) * (mdct_in[i] - mdct_pred[i]);
    }

	num_bit = 0.0;

	for (i = 0; i < num_of_sfb; i++)
    {
		temp1 = 0.0;
		temp2 = 0.0;
		for (j = sfb_offset[i]; j < sfb_offset[i + 1]; j++)
		{
			temp1 += energy[j];
			temp2 += snr_p[j];
		}

		if (temp2 < snr_limit)
			temp2 = snr_limit;

		if (temp1 > /*1.e-20*/0.0)
			snr[i] = -10. * log10 (temp2 / temp1);
		else
			snr[i] = 0.0;

		sfb_flag[i] = 1;

		if (block_type != ONLY_SHORT_WINDOW)
		{
			if (snr[i] <= 0.0)
			{
				sfb_flag[i] = 0;
				for (j = sfb_offset[i]; j < sfb_offset[i + 1]; j++)
					mdct_pred[j] = 0.0;
			} else {
				num_bit += snr[i] / 6. * (sfb_offset[i + 1] - sfb_offset[i]);
			}
		}
    }

	if (num_bit < side_info)
    {
		num_bit = 0.0;
		for (j = 0; j < flen; j++)
			mdct_pred[j] = 0.0;
		for (i = 0; i < num_of_sfb; i++)
			sfb_flag[i] = 0;
    }
	else
		num_bit -= side_info;

	return (num_bit);
}


/**************************************************************************
  Title:	prediction

  Purpose:	Predicts current frame from the past output samples.

  Usage:        prediction(buffer, predicted_samples, weight, delay, flen)

  Input:	buffer	          - past reconstructed output samples
                weight            - LTP gain (scaling factor)
		delay             - LTP lag (optimal delay)
                flen              - length of the frame

  Output:	predicted_samples - predicted samples

  References:	-

  Explanation:  -

  Author(s):	Juha Ojanpera, Lin Yin
  *************************************************************************/

void
prediction (short *buffer,
	    double *predicted_samples,
	    double *weight,
	    int delay,
	    int flen
)
{

  int i, j;
  int offset;

  for (i = 0; i < flen; i++)
    {
      offset = NOK_LT_BLEN - flen - delay + i - (LPC - 1) / 2;
      predicted_samples[i] = 0.0;
      for (j = 0; j < LPC; j++)
	predicted_samples[i] += weight[j] * buffer[offset + j];
    }

}


/**************************************************************************
  Title:	estimate_delay

  Purpose:      Estimates optimal delay between current frame and past
                reconstructed output samples. The lag between 0...DELAY-1 
                with the maximum correlation becomes the LTP lag (or delay).

  Usage:        y = estimate_delay(sb_samples, x_buffer, flen)

  Input:	sb_samples - current frame
                x_buffer   - past reconstructed output samples
                flen       - length of the frame

  Output:	y  - LTP lag

  References:	-

  Explanation:	-

  Author(s):	Juha Ojanpera, Lin Yin
  *************************************************************************/

int estimate_delay (double *sb_samples,
					short *x_buffer,
					int flen
					)
{
	int i, j;
	int delay;
	double corr[DELAY],corrtmp;
	double p_max, energy;

	p_max = 0.0;
	delay = 0;
		energy = 0.0;
		corr[0] = 0.0;
		for (j = 1; j < 2049; j++)
		{
			corr[0] += x_buffer[NOK_LT_BLEN - j] * sb_samples[2048 - j];
			energy += x_buffer[NOK_LT_BLEN - j] * x_buffer[NOK_LT_BLEN - j];
		}
		corrtmp=corr[0];
		if (energy != 0.0)
			corr[0] = corr[0] / sqrt(energy);
		else
			corr[0] = 0.0;

		if (p_max < corr[0])
		{
			p_max = corr[0];
			delay = 0;
		}

	/* Used to look like this:
	   for (i = 1; i < DELAY; i+=16)
	   Because the new code by Oxygene2000 is so fast, we can now look for the
	   delay in steps of 1, instead of 16. Thus giving a more accurate delay estimation
	*/
	for (i = 1; i < DELAY; i++)
	{
//NOK_LT_BLEN=4096
		energy -= x_buffer[NOK_LT_BLEN - i] * x_buffer[NOK_LT_BLEN - i]; 
		energy += x_buffer[NOK_LT_BLEN - i - 2048] * x_buffer[NOK_LT_BLEN - i - 2048]; //2048=j_max
		corr[i] = corrtmp;
		corr[i] -= x_buffer[NOK_LT_BLEN - i] * sb_samples[2047];
		corr[i] += x_buffer[NOK_LT_BLEN - i - 2048] * sb_samples[0];
		corrtmp=corr[i];
//flen=2048
//		for (j = 1; j < 2049; j++) //2049=flen+1
//		{
//			corr[i] += x_buffer[NOK_LT_BLEN - i - j] * sb_samples[2048 - j];
//			energy += x_buffer[NOK_LT_BLEN - i - j] * x_buffer[NOK_LT_BLEN - i - j];
//		}
		if (energy != 0.0)
			corr[i] = corr[i] / sqrt(energy);
		else
			corr[i] = 0.0;

		if (p_max < corr[i])
		{
			p_max = corr[i];
			delay = i;
		}
	}

//	if (delay < (LPC - 1) / 2)
//		delay = (LPC - 1) / 2;
	if (delay<0) delay=0; //for LPC=1

	/*
	fprintf(stdout, "delay : %d ... ", delay);
	*/

	return delay;
}


/**************************************************************************
  Title:	pitch

  Purpose:	Calculates LTP gains, quantizes them and finally scales
                the past output samples (from 'delay' to 'delay'+'flen') 
                to get the predicted frame.

  Usage:	pitch(sb_samples, sb_samples_pred, x_buffer, ltp_coef, 
                      delay, flen)

  Input:        sb_samples      - current frame
                x_buffer        - past reconstructed output samples
                delay           - LTP lag
                flen            - length of the frame

  Output:	sb_samples_pred - predicted frame
                ltp_coef        - indices of the LTP gains

  References:	1.)  lnqgj
                2.)  w_quantize
                3.)  prediction

  Explanation:	-

  Author(s):	Juha Ojanpera, Lin Yin
  *************************************************************************/

void
pitch (double *sb_samples,
       double *sb_samples_pred,
       short *x_buffer,
       int *ltp_coef,
       int delay,
       int flen
)
{
	int i, k, j;
	int offset1, offset2;
	double weight[LPC];
	double r[LPC][LPC + 1];


	for (i = 0; i < LPC; i++)
		for (j = 0; j < LPC; j++)
		{
			offset1 = NOK_LT_BLEN - flen - delay + i - (LPC - 1) / 2;
			offset2 = NOK_LT_BLEN - flen - delay + j - (LPC - 1) / 2;
			r[i][j] = 0.0;
			for (k = 0; k < flen; k++)
				r[i][j] += x_buffer[offset1 + k] * x_buffer[offset2 + k];
		}
	for (i = 0; i < LPC; i++)
		r[i][i] = 1.010 * r[i][i];

	for (i = 0; i < LPC; i++)
    {
		offset1 = NOK_LT_BLEN - flen - delay + i - (LPC - 1) / 2; 
		r[i][LPC] = 0.0;
		for (k = 0; k < flen; k++)
			r[i][LPC] += x_buffer[offset1 + k] * sb_samples[k];
    }

	for (i = 0; i < LPC; i++)
		weight[i] = 0.0;

	if (r[(LPC - 1) / 2][(LPC - 1) / 2] != 0.0)
		weight[(LPC - 1) / 2] = r[(LPC - 1) / 2][LPC] / r[(LPC - 1) / 2][(LPC - 1) / 2];

	lnqgj (r);

	for (i = 0; i < LPC; i++)
		weight[i] = r[i][LPC];

	/* 
	fprintf(stdout, "unquantized weight : %f ... ", weight[0]);
	*/

	w_quantize (weight, ltp_coef);

	/*
	fprintf(stdout, "quantized weight : %f\n", weight[0]);    
	*/

	prediction (x_buffer, sb_samples_pred, weight, delay, flen);
}


/**************************************************************************
  Title:	lnqgj

  Purpose:	Calculates LTP gains.

  Usage:	lnqgj(a)

  Input:	a - auto-correlation matrix

  Output:	a - LTP gains (in the last column)   

  References:	-

  Explanation:	-

  Author(s):	Juha Ojanpera, Lin Yin
  *************************************************************************/

void
lnqgj (double (*a)[LPC + 1])
{
	int nn, nnpls1, ip, i, j;
	double p, w;

	for (nn = 0; nn < LPC; nn++)
    {
		p = 0.0;
		nnpls1 = nn + 1;
		for (i = nn; i < LPC; i++)
			if (p - fabs (a[i][nn]) < 0)
			{
				p = fabs (a[i][nn]);
				ip = i;
			}

		if (p - 1.e-10 <= 0)
			return;

		if (p - 1.e-10 > 0)
		{
			for (j = nn; j <= LPC; j++)
			{
				w = a[nn][j];
				a[nn][j] = a[ip][j];
				a[ip][j] = w;
			}

			for (j = nnpls1; j <= LPC; j++)
				a[nn][j] = a[nn][j] / a[nn][nn];

			for (i = 0; i < LPC; i++)
				if ((i - nn) != 0)
					for (j = nnpls1; j <= LPC; j++)
						a[i][j] = a[i][j] - a[i][nn] * a[nn][j];
		}
    }
}


/**************************************************************************
  Title:	w_quantize

  Purpose:	Quantizes LTP gain(s).

  Usage:	w_quantize(freq, ltp_idx)

  Input:	freq    - original LTP gain(s)

  Output:	freq    - LTP gain(s) selected from the codebook
                ltp_idx - corresponding indices of the LTP gain(s)

  References:	-

  Explanation:	The closest value from the codebook is selected to be the
                quantized LTP gain.

  Author(s):	Juha Ojanpera, Lin Yin
  *************************************************************************/

void
w_quantize (double *freq, int *ltp_idx)
{
	int i, j;
	double dist, low;


	low = 1.0e+10;
	for (i = 0; i < LPC; i++)
    {
		dist = 0.0;
		for (j = 0; j < CODESIZE; j++)
		{
			dist = (freq[i] - codebook[j]) * (freq[i] - codebook[j]);
			if (dist < low)
			{
				low = dist;
				ltp_idx[i] = j;
			}
		}
    }

	for (j = 0; j < LPC; j++)
		freq[j] = codebook[ltp_idx[j]];
}