ref: 85ebcbef4f9beaa9075c2ae02d95a3b57dbbb067
dir: /enc_tf.c/
#include <math.h>
#include <stdlib.h>
#include <memory.h>
#include "aacenc.h"
#include "bitstream.h"
#include "interface.h"
#include "enc.h"
#include "block.h"
#include "tf_main.h"
#include "psych.h"
#include "aac_back_pred.h"
#include "mc_enc.h"
#include "ms.h"
#include "is.h"
#include "aac_qc.h"
#include "all.h"
#include "aac_se_enc.h"
/* AAC tables */
/* First attempt at supporting multiple sampling rates *
* and bitrates correctly. */
/* Tables for maximum nomber of scalefactor bands */
/* Needs more fine-tuning. Only the values for 44.1kHz have been changed
on lower bitrates. */
int max_sfb_s[] = { 12, 12, 12, 13, 14, 13, 15, 15, 15, 15, 15, 15 };
int max_sfb_l[] = { 49, 49, 47, 48, 49, 51, 47, 47, 43, 43, 43, 40 };
int block_size_samples = 1024; /* nr of samples per block in one! audio channel */
int short_win_in_long = 8;
int max_ch; /* no of of audio channels */
double *spectral_line_vector[MAX_TIME_CHANNELS];
double *reconstructed_spectrum[MAX_TIME_CHANNELS];
double *overlap_buffer[MAX_TIME_CHANNELS];
double *DTimeSigBuf[MAX_TIME_CHANNELS];
double *DTimeSigLookAheadBuf[MAX_TIME_CHANNELS+2];
/* static variables used by the T/F mapping */
enum QC_MOD_SELECT qc_select = AAC_QC; /* later f(encPara) */
enum AAC_PROFILE profile = MAIN;
enum WINDOW_TYPE block_type[MAX_TIME_CHANNELS];
enum WINDOW_TYPE desired_block_type[MAX_TIME_CHANNELS];
enum WINDOW_TYPE next_desired_block_type[MAX_TIME_CHANNELS+2];
/* Additional variables for AAC */
int aacAllowScalefacs = 1; /* Allow AAC scalefactors to be nonconstant */
TNS_INFO tnsInfo[MAX_TIME_CHANNELS];
AACQuantInfo quantInfo[MAX_TIME_CHANNELS]; /* Info structure for AAC quantization and coding */
/* Channel information */
Ch_Info channelInfo[MAX_TIME_CHANNELS];
/* AAC shorter windows 960-480-120 */
int useShortWindows=0; /* don't use shorter windows */
// TEMPORARY HACK
int srate_idx;
int sampling_rate;
int bit_rate;
// END OF HACK
/* EncTfFree() */
/* Free memory allocated by t/f-based encoder core. */
void EncTfFree (void)
{
int chanNum;
for (chanNum=0;chanNum<MAX_TIME_CHANNELS;chanNum++) {
if (DTimeSigBuf[chanNum]) free(DTimeSigBuf[chanNum]);
if (spectral_line_vector[chanNum]) free(spectral_line_vector[chanNum]);
if (reconstructed_spectrum[chanNum]) free(reconstructed_spectrum[chanNum]);
if (overlap_buffer[chanNum]) free(overlap_buffer[chanNum]);
}
for (chanNum=0;chanNum<MAX_TIME_CHANNELS+2;chanNum++) {
if (DTimeSigLookAheadBuf[chanNum]) free(DTimeSigLookAheadBuf[chanNum]);
}
}
/*****************************************************************************************
***
*** Function: EncTfInit
***
*** Purpose: Initialize the T/F-part and the macro blocks of the T/F part of the VM
***
*** Description:
***
***
*** Parameters:
***
***
*** Return Value:
***
*** **** MPEG-4 VM ****
***
****************************************************************************************/
void EncTfInit (faacAACConfig *ac, int VBR_setting)
{
int chanNum, i;
int SampleRates[] = {
96000,88200,64000,48000,44100,32000,24000,22050,16000,12000,11025,8000,0
};
int BitRates[] = {
64000,80000,96000,112000,128000,160000,192000,224000,256000,0
};
sampling_rate = ac->sampling_rate;
bit_rate = ac->bit_rate;
for (i = 0; ; i++)
{
if (SampleRates[i] == sampling_rate) {
srate_idx = i;
break;
}
}
profile = MAIN;
qc_select = AAC_PRED; /* enable prediction */
if (ac->profile == LOW) {
profile = LOW;
qc_select = AAC_QC; /* disable prediction */
}
/* set the return values */
max_ch = ac->channels;
/* some global initializations */
for (chanNum=0;chanNum<MAX_TIME_CHANNELS;chanNum++) {
DTimeSigBuf[chanNum] = (double*)malloc(block_size_samples*sizeof(double));
spectral_line_vector[chanNum] = (double*)malloc(2*block_size_samples*sizeof(double));
reconstructed_spectrum[chanNum] = (double*)malloc(block_size_samples*sizeof(double));
memset(reconstructed_spectrum[chanNum], 0, block_size_samples*sizeof(double));
overlap_buffer[chanNum] = (double*)malloc(sizeof(double)*block_size_samples);
memset(overlap_buffer[chanNum],0,(block_size_samples)*sizeof(double));
block_type[chanNum] = ONLY_LONG_WINDOW;
}
for (chanNum=0;chanNum<MAX_TIME_CHANNELS+2;chanNum++) {
DTimeSigLookAheadBuf[chanNum] = (double*)malloc((block_size_samples)*sizeof(double));
memset(DTimeSigLookAheadBuf[chanNum],0,(block_size_samples)*sizeof(double));
}
PredInit();
/* initialize psychoacoustic module */
EncTf_psycho_acoustic_init();
/* initialize spectrum processing */
/* initialize quantization and coding */
tf_init_encode_spectrum_aac(0);
/* Init TNS */
for (chanNum=0;chanNum<MAX_TIME_CHANNELS;chanNum++) {
TnsInit(sampling_rate,profile,&tnsInfo[chanNum]);
quantInfo[chanNum].tnsInfo = &tnsInfo[chanNum]; /* Set pointer to TNS data */
}
}
/*****************************************************************************************
***
*** Function: EncTfFrame
***
*** Purpose: processes a block of time signal input samples into a bitstream
*** based on T/F encoding
***
*** Description:
***
***
*** Parameters:
***
***
*** Return Value: returns the number of used bits
***
*** **** MPEG-4 VM ****
***
****************************************************************************************/
int EncTfFrame (faacAACStream *as, BsBitStream *fixed_stream)
{
int used_bits;
int error;
/* Energy array (computed before prediction for long windows) */
double energy[MAX_TIME_CHANNELS][MAX_SCFAC_BANDS];
/* determine the function parameters used earlier: HP 21-aug-96 */
int average_bits = as->frame_bits;
int available_bitreservoir_bits = as->available_bits-as->frame_bits;
/* actual amount of bits currently in the bit reservoir */
/* it is the job of this module to determine
the no of bits to use in addition to average_block_bits
max. available: average_block_bits + available_bitreservoir_bits */
int max_bitreservoir_bits = 8184;
/* max. allowed amount of bits in the reservoir (used to avoid padding bits) */
long num_bits_available;
double *p_ratio[MAX_TIME_CHANNELS], allowed_distortion[MAX_TIME_CHANNELS][MAX_SCFAC_BANDS];
double p_ratio_long[MAX_TIME_CHANNELS][MAX_SCFAC_BANDS];
double p_ratio_short[MAX_TIME_CHANNELS][MAX_SCFAC_BANDS];
int nr_of_sfb[MAX_TIME_CHANNELS], sfb_width_table[MAX_TIME_CHANNELS][MAX_SCFAC_BANDS];
int sfb_offset_table[MAX_TIME_CHANNELS][MAX_SCFAC_BANDS+1];
int no_sub_win, sub_win_size;
/* structures holding the output of the psychoacoustic model */
CH_PSYCH_OUTPUT_LONG chpo_long[MAX_TIME_CHANNELS+2];
CH_PSYCH_OUTPUT_SHORT chpo_short[MAX_TIME_CHANNELS+2][MAX_SHORT_WINDOWS];
// memset(chpo_long,0,sizeof(CH_PSYCH_OUTPUT_LONG)*(MAX_TIME_CHANNELS+2));
// memset(chpo_short,0,sizeof(CH_PSYCH_OUTPUT_SHORT)*(MAX_TIME_CHANNELS+2)*MAX_SHORT_WINDOWS);
// memset(p_ratio_long,0,sizeof(double)*(MAX_TIME_CHANNELS)*MAX_SCFAC_BANDS);
// memset(p_ratio_short,0,sizeof(double)*(MAX_TIME_CHANNELS)*MAX_SCFAC_BANDS);
{ /* convert float input to double, which is the internal format */
/* store input data in look ahead buffer which may be necessary for the window switching decision */
int i;
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
for( i=0; i<block_size_samples; i++ ) {
/* last frame input data are encoded now */
DTimeSigBuf[chanNum][i] = DTimeSigLookAheadBuf[chanNum][i];
DTimeSigLookAheadBuf[chanNum][i] = as->inputBuffer[chanNum][i];
} /* end for(i ..) */
} /* end for(chanNum ... ) */
for (chanNum=2;chanNum<4;chanNum++) {
if (chanNum == 2) {
for(i = 0; i < block_size_samples; i++){
DTimeSigLookAheadBuf[chanNum][i] = (DTimeSigLookAheadBuf[0][i]+DTimeSigLookAheadBuf[1][i])*0.5;
}
} else {
for(i = 0; i < block_size_samples; i++){
DTimeSigLookAheadBuf[chanNum][i] = (DTimeSigLookAheadBuf[0][i]-DTimeSigLookAheadBuf[1][i])*0.5;
}
}
}
}
if (fixed_stream == NULL) {
psy_fill_lookahead(DTimeSigLookAheadBuf, max_ch+2);
return FNO_ERROR; /* quick'n'dirty fix for encoder startup HP 21-aug-96 */
}
/* Keep track of number of bits used */
used_bits = 0;
/***********************************************************************/
/* Determine channel elements */
/***********************************************************************/
DetermineChInfo(channelInfo,max_ch);
/******************************************************************************************************************************
*
* psychoacoustic
*
******************************************************************************************************************************/
{
int chanNum;
for (chanNum=0;chanNum<max_ch+2;chanNum++) {
EncTf_psycho_acoustic(
sampling_rate,
chanNum,
&DTimeSigLookAheadBuf[chanNum],
&next_desired_block_type[chanNum],
(int)qc_select,
block_size_samples,
chpo_long,
chpo_short
);
}
}
/******************************************************************************************************************************
*
* block_switch processing
*
******************************************************************************************************************************/
{
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
/* A few definitions: */
/* block_type: Initially, the block_type used in the previous frame. */
/* Will be set to the block_type to use this frame. */
/* A block type will be selected to ensure a meaningful */
/* window transition. */
/* next_desired_block_type: Block_type (LONG or SHORT) which the psycho */
/* model wants to use next frame. The psycho model is */
/* using a look-ahead buffer. */
/* desired_block_type: Block_type (LONG or SHORT) which the psycho */
/* previously wanted to use. It is the desired block_type */
/* for this frame. */
if ( (block_type[chanNum]==ONLY_SHORT_WINDOW)||(block_type[chanNum]==LONG_SHORT_WINDOW) ) {
if ( (desired_block_type[chanNum]==ONLY_LONG_WINDOW)&&(next_desired_block_type[chanNum]==ONLY_LONG_WINDOW) ) {
block_type[chanNum]=SHORT_LONG_WINDOW;
} else {
block_type[chanNum]=ONLY_SHORT_WINDOW;
}
} else if (next_desired_block_type[chanNum]==ONLY_SHORT_WINDOW) {
block_type[chanNum]=LONG_SHORT_WINDOW;
} else {
block_type[chanNum]=ONLY_LONG_WINDOW;
}
desired_block_type[chanNum]=next_desired_block_type[chanNum];
}
}
// printf("%d %d\n", block_type[0], block_type[1]);
// block_type[0] = ONLY_LONG_WINDOW;
// block_type[1] = ONLY_LONG_WINDOW;
// block_type[0] = ONLY_SHORT_WINDOW;
// block_type[1] = ONLY_SHORT_WINDOW;
// if (as->use_MS)
// block_type[1] = block_type[0];
{
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
/* Set window shape paremeter in quantInfo */
// quantInfo[chanNum].window_shape = WS_DOLBY;
quantInfo[chanNum].window_shape = WS_FHG;
switch( block_type[chanNum] ) {
case ONLY_SHORT_WINDOW :
no_sub_win = short_win_in_long;
sub_win_size = block_size_samples/short_win_in_long;
quantInfo[chanNum].max_sfb = max_sfb_s[srate_idx];
#if 0
quantInfo[chanNum].num_window_groups = 4;
quantInfo[chanNum].window_group_length[0] = 1;
quantInfo[chanNum].window_group_length[1] = 2;
quantInfo[chanNum].window_group_length[2] = 3;
quantInfo[chanNum].window_group_length[3] = 2;
#else
quantInfo[chanNum].num_window_groups = 1;
quantInfo[chanNum].window_group_length[0] = 8;
quantInfo[chanNum].window_group_length[1] = 0;
quantInfo[chanNum].window_group_length[2] = 0;
quantInfo[chanNum].window_group_length[3] = 0;
quantInfo[chanNum].window_group_length[4] = 0;
quantInfo[chanNum].window_group_length[5] = 0;
quantInfo[chanNum].window_group_length[6] = 0;
quantInfo[chanNum].window_group_length[7] = 0;
#endif
break;
default:
no_sub_win = 1;
sub_win_size = block_size_samples;
quantInfo[chanNum].max_sfb = max_sfb_l[srate_idx];
quantInfo[chanNum].num_window_groups = 1;
quantInfo[chanNum].window_group_length[0]=1;
break;
}
}
}
{
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
/* Count number of bits used for gain_control_data */
used_bits += WriteGainControlData(&quantInfo[chanNum], /* quantInfo contains packed gain control data */
NULL, /* NULL BsBitStream. Only counting bits, no need to write yet */
0); /* Zero write flag means don't write */
}
}
/******************************************************************************************************************************
*
* T/F mapping
*
******************************************************************************************************************************/
{
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
buffer2freq(
DTimeSigBuf[chanNum],
spectral_line_vector[chanNum],
overlap_buffer[chanNum],
block_type[chanNum],
quantInfo[chanNum].window_shape,
block_size_samples,
block_size_samples/2,
block_size_samples/short_win_in_long,
MOVERLAPPED
);
}
}
/******************************************************************************************************************************
*
* adapt ratios of psychoacoustic module to codec scale factor bands
*
******************************************************************************************************************************/
{
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
switch( block_type[chanNum] ) {
case ONLY_LONG_WINDOW:
memcpy( (char*)sfb_width_table[chanNum], (char*)chpo_long[chanNum].cb_width, (NSFB_LONG+1)*sizeof(int) );
nr_of_sfb[chanNum] = chpo_long[chanNum].no_of_cb;
p_ratio[chanNum] = p_ratio_long[chanNum];
break;
case LONG_SHORT_WINDOW:
memcpy( (char*)sfb_width_table[chanNum], (char*)chpo_long[chanNum].cb_width, (NSFB_LONG+1)*sizeof(int) );
nr_of_sfb[chanNum] = chpo_long[chanNum].no_of_cb;
p_ratio[chanNum] = p_ratio_long[chanNum];
break;
case ONLY_SHORT_WINDOW:
memcpy( (char*)sfb_width_table[chanNum], (char*)chpo_short[chanNum][0].cb_width, (NSFB_SHORT+1)*sizeof(int) );
nr_of_sfb[chanNum] = chpo_short[chanNum][0].no_of_cb;
p_ratio[chanNum] = p_ratio_short[chanNum];
break;
case SHORT_LONG_WINDOW:
memcpy( (char*)sfb_width_table[chanNum], (char*)chpo_long[chanNum].cb_width, (NSFB_LONG+1)*sizeof(int) );
nr_of_sfb[chanNum] = chpo_long[chanNum].no_of_cb;
p_ratio[chanNum] = p_ratio_long[chanNum];
break;
}
}
}
// if (as->use_MS) {
MSPreprocess(p_ratio_long, p_ratio_short, chpo_long, chpo_short,
channelInfo, block_type, quantInfo, as->use_MS, max_ch);
// } else {
// int chanNum;
// for (chanNum=0;chanNum<max_ch;chanNum++) {
//
// /* Save p_ratio from psychoacoustic model for next frame. */
// /* Psycho model is using a look-ahead window for block switching */
// if (as->use_MS) {
// memcpy( (char*)p_ratio_long[chanNum], (char*)chpo_long[chanNum+2].p_ratio, (NSFB_LONG)*sizeof(double) );
// memcpy( (char*)p_ratio_short[chanNum],(char*)chpo_short[chanNum+2][0].p_ratio,(MAX_SHORT_WINDOWS*NSFB_SHORT)*sizeof(double) );
// } else {
// memcpy( (char*)p_ratio_long[chanNum], (char*)chpo_long[chanNum].p_ratio, (NSFB_LONG)*sizeof(double) );
// memcpy( (char*)p_ratio_short[chanNum],(char*)chpo_short[chanNum][0].p_ratio,(MAX_SHORT_WINDOWS*NSFB_SHORT)*sizeof(double) );
// }
// }
// }
MSEnergy(spectral_line_vector, energy, chpo_long, chpo_short, sfb_width_table,
channelInfo, block_type, quantInfo, as->use_MS, max_ch);
{
int chanNum;
for (chanNum=0;chanNum<max_ch;chanNum++) {
/* Construct sf band offset table */
int offset=0;
int sfb;
for (sfb=0;sfb<nr_of_sfb[chanNum];sfb++) {
sfb_offset_table[chanNum][sfb] = offset;
offset+=sfb_width_table[chanNum][sfb];
}
sfb_offset_table[chanNum][nr_of_sfb[chanNum]]=offset;
}
}
/******************************************************************************************************************************
*
* quantization and coding
*
******************************************************************************************************************************/
{
int padding_limit = max_bitreservoir_bits;
int maxNumBitsByteAligned;
int chanNum;
int numFillBits;
int bitsLeftAfterFill;
/* bit budget */
num_bits_available = (long)(average_bits + available_bitreservoir_bits - used_bits);
/* find the largest byte-aligned section with fewer bits than num_bits_available */
maxNumBitsByteAligned = ((num_bits_available >> 3) << 3);
/* Compute how many reservoir bits can be used and still be able to byte */
/* align without exceeding num_bits_available, and have room for an ID_END marker */
available_bitreservoir_bits = maxNumBitsByteAligned - LEN_SE_ID - average_bits;
/******************************************/
/* Perform TNS analysis and filtering */
/******************************************/
for (chanNum=0;chanNum<max_ch;chanNum++) {
error = TnsEncode(nr_of_sfb[chanNum], /* Number of bands per window */
quantInfo[chanNum].max_sfb, /* max_sfb */
block_type[chanNum],
sfb_offset_table[chanNum],
spectral_line_vector[chanNum],
&tnsInfo[chanNum],
as->use_TNS);
if (error == FERROR)
return FERROR;
}
/******************************************/
/* Apply Intensity Stereo */
/******************************************/
if (as->use_IS) {
ISEncode(spectral_line_vector,
channelInfo,
sfb_offset_table,
block_type,
quantInfo,
max_ch);
}
/***********************************************************************/
/* If prediction is used, compute predictor info and residual spectrum */
/***********************************************************************/
for (chanNum=0;chanNum<max_ch;chanNum++) {
// if (qc_select == AAC_PRED) {
if (0) {
int numPredBands;
max_pred_sfb = 40;
numPredBands = min(max_pred_sfb,nr_of_sfb[chanNum]);
PredCalcPrediction( spectral_line_vector[chanNum],
reconstructed_spectrum[chanNum],
(int)block_type[chanNum],
numPredBands,
sfb_width_table[chanNum],
&(quantInfo[chanNum].pred_global_flag),
quantInfo[chanNum].pred_sfb_flag,
&(quantInfo[chanNum].reset_group_number),
chanNum);
} else {
quantInfo[chanNum].pred_global_flag = 0;
}
} /* for(chanNum... */
/******************************************/
/* Apply MS stereo */
/******************************************/
if (as->use_MS == 1) {
MSEncode(spectral_line_vector,
channelInfo,
sfb_offset_table,
block_type,
quantInfo,
max_ch);
} else if (as->use_MS == 0) {
MSEncodeSwitch(spectral_line_vector,
channelInfo,
sfb_offset_table,
block_type,
quantInfo,
max_ch);
}
/************************************************/
/* Call the AAC quantization and coding module. */
/************************************************/
for (chanNum = 0; chanNum < max_ch; chanNum++) {
int bitsToUse;
bitsToUse = (int)((average_bits - used_bits)/max_ch);
bitsToUse += (int)(0.2*available_bitreservoir_bits/max_ch);
error = tf_encode_spectrum_aac( &spectral_line_vector[chanNum],
&p_ratio[chanNum],
&allowed_distortion[chanNum],
&energy[chanNum],
&block_type[chanNum],
&sfb_width_table[chanNum],
&nr_of_sfb[chanNum],
bitsToUse,
available_bitreservoir_bits,
padding_limit,
fixed_stream,
NULL,
1, /* nr of audio channels */
&reconstructed_spectrum[chanNum],
useShortWindows,
aacAllowScalefacs,
&quantInfo[chanNum],
&(channelInfo[chanNum]),
0/*no vbr*/,
bit_rate);
if (error == FERROR)
return error;
}
/* If short window, reconstruction not needed for prediction */
for (chanNum=0;chanNum<max_ch;chanNum++) {
if ((block_type[chanNum]==ONLY_SHORT_WINDOW)) {
int sind;
for (sind=0;sind<1024;sind++) {
reconstructed_spectrum[chanNum][sind]=0.0;
}
}
}
/**********************************/
/* Write out all encoded channels */
/**********************************/
for (chanNum=0;chanNum<max_ch;chanNum++) {
if (channelInfo[chanNum].present) {
/* Write out a single_channel_element */
if (!channelInfo[chanNum].cpe) {
/* Write out sce */ /* BugFix by YT '+=' sould be '=' */
used_bits = WriteSCE(&quantInfo[chanNum], /* Quantization information */
channelInfo[chanNum].tag,
fixed_stream, /* Bitstream */
1); /* Write flag, 1 means write */
} else {
if (channelInfo[chanNum].ch_is_left) {
/* Write out cpe */
used_bits = WriteCPE(&quantInfo[chanNum], /* Quantization information,left */
&quantInfo[channelInfo[chanNum].paired_ch], /* Right */
channelInfo[chanNum].tag,
channelInfo[chanNum].common_window, /* common window */
&(channelInfo[chanNum].ms_info),
fixed_stream, /* Bitstream */
1); /* Write flag, 1 means write */
}
} /* if (!channelInfo[chanNum].cpe) else */
} /* if (chann...*/
} /* for (chanNum...*/
/* Compute how many fill bits are needed to avoid overflowing bit reservoir */
/* Save room for ID_END terminator */
if (used_bits < (8 - LEN_SE_ID) ) {
numFillBits = 8 - LEN_SE_ID - used_bits;
} else {
numFillBits = 0;
}
/* Write AAC fill_elements, smallest fill element is 7 bits. */
/* Function may leave up to 6 bits left after fill, so tell it to fill a few extra */
numFillBits += 6;
bitsLeftAfterFill=WriteAACFillBits(fixed_stream,numFillBits);
used_bits += (numFillBits - bitsLeftAfterFill);
/* Write ID_END terminator */
BsPutBit(fixed_stream,ID_END,LEN_SE_ID);
used_bits += LEN_SE_ID;
/* Now byte align the bitstream */
used_bits += ByteAlign(fixed_stream);
} /* Quantization and coding block */
return FNO_ERROR;
}