ref: 8299edfc0c34aaf91ef07bf2410ad15423bcaf96
parent: 693421ea238175e67211ce8a0be8d1db450a9698
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Wed Jan 10 12:41:50 EST 2018
Scaling back the pitch filter when most of the energy is above 3.2 kHz That corresponds to the fundamental for the shortest pitch period allowed
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -63,6 +63,7 @@
float music_prob_max;
int bandwidth;
float activity_probability;
+ float max_pitch_ratio;
/* Store as Q6 char to save space. */
unsigned char leak_boost[LEAK_BANDS];
} AnalysisInfo;
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -1109,7 +1109,7 @@
static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
- int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
+ int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
{
int c;
VARDECL(celt_sig, _pre);
@@ -1165,7 +1165,10 @@
gain1 = 0;
pitch_index = COMBFILTER_MINPERIOD;
}
-
+#ifndef DISABLE_FLOAT_API
+ if (analysis->valid)
+ gain1 *= analysis->max_pitch_ratio;
+#endif
/* Gain threshold for enabling the prefilter/postfilter */
pf_threshold = QCONST16(.2f,15);
@@ -1603,7 +1606,7 @@
&& st->complexity >= 5;
prefilter_tapset = st->tapset_decision;
- pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
+ pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
pitch_change = 1;
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -446,6 +446,8 @@
float leakage_from[NB_TBANDS+1];
float leakage_to[NB_TBANDS+1];
float layer_out[MAX_NEURONS];
+ float below_max_pitch;
+ float above_max_pitch;
SAVE_STACK;
alpha = 1.f/IMIN(10, 1+tonal->count);
@@ -722,6 +724,8 @@
maxE = 0;
noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
noise_floor *= noise_floor;
+ below_max_pitch=0;
+ above_max_pitch=0;
for (b=0;b<NB_TBANDS;b++)
{
float E=0;
@@ -738,6 +742,12 @@
}
E = SCALE_ENER(E);
maxE = MAX32(maxE, E);
+ if (band_start < 64)
+ {
+ below_max_pitch += E;
+ } else {
+ above_max_pitch += E;
+ }
tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
Em = MAX32(E, tonal->meanE[b]);
/* Consider the band "active" only if all these conditions are met:
@@ -767,6 +777,7 @@
/* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
E *= 256.f*(1.f/Q15ONE)*(1.f/Q15ONE);
#endif
+ above_max_pitch += E;
tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
Em = MAX32(E, tonal->meanE[b]);
if (Em > 3*noise_ratio*noise_floor*160 || E > noise_ratio*noise_floor*160)
@@ -774,6 +785,10 @@
/* Check if the band is masked (see below). */
is_masked[b] = E < (tonal->prev_bandwidth == 20 ? .01f : .05f)*bandwidth_mask;
}
+ if (above_max_pitch > below_max_pitch)
+ info->max_pitch_ratio = below_max_pitch/above_max_pitch;
+ else
+ info->max_pitch_ratio = 1;
/* In some cases, resampling aliasing can create a small amount of energy in the first band
being cut. So if the last band is masked, we don't include it. */
if (bandwidth == 20 && is_masked[NB_TBANDS])