ref: 89c5e06d4bd5a5e41dda53478f9f706d5196fdef
parent: ea3b30f946d0e3a8d5b88d1b71cac56fb87955fd
author: Felicia Lim <flim@google.com>
date: Mon Feb 10 09:36:35 EST 2020
Revert "Fixes to the the activity flag that is passed to Silk so it represents the final activity flag used in the DTX decision" This reverts commit ea3b30f946d0e3a8d5b88d1b71cac56fb87955fd.
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -892,15 +892,34 @@
#endif
/* Decides if DTX should be turned on (=1) or off (=0) */
-static int decide_dtx_mode(opus_int activity, /* indicates if this frame contains speech/music */
- int *nb_no_activity_frames /* number of consecutive frames with no activity */
- )
-
+static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */
+ int *nb_no_activity_frames, /* number of consecutive frames with no activity */
+ opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */
+ const opus_val16 *pcm, /* input pcm signal */
+ int frame_size, /* frame size */
+ int channels,
+ int is_silence, /* only digital silence detected in this frame */
+ int arch
+ )
{
- if (!activity)
+ opus_val32 noise_energy;
+
+ if (!is_silence)
{
+ if (activity_probability < DTX_ACTIVITY_THRESHOLD) /* is noise */
+ {
+ noise_energy = compute_frame_energy(pcm, frame_size, channels, arch);
+
+ /* but is sufficiently quiet */
+ is_silence = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy);
+ }
+ }
+
+ if (is_silence)
+ {
/* The number of consecutive DTX frames should be within the allowed bounds */
(*nb_no_activity_frames)++;
+
if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
{
if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
@@ -1083,8 +1102,6 @@
int analysis_read_subframe_bak=-1;
int is_silence = 0;
#endif
- opus_int activity = VAD_NO_DECISION;
-
VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK;
@@ -1152,17 +1169,6 @@
if (!is_silence)
st->voice_ratio = -1;
- if (analysis_info.valid) {
- activity = !is_silence && analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD;
- if (!activity) {
- /* Mark as active if this noise frame is sufficiently loud */
- opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
- activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
- }
- } else {
- activity = !is_silence;
- }
-
st->detected_bandwidth = 0;
if (analysis_info.valid)
{
@@ -1662,6 +1668,7 @@
if (st->mode != MODE_CELT_ONLY)
{
opus_int32 total_bitRate, celt_rate;
+ opus_int activity;
#ifdef FIXED_POINT
const opus_int16 *pcm_silk;
#else
@@ -1669,6 +1676,14 @@
ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
#endif
+ activity = VAD_NO_DECISION;
+#ifndef DISABLE_FLOAT_API
+ if( analysis_info.valid ) {
+ /* Inform SILK about the Opus VAD decision */
+ activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD );
+ }
+#endif
+
/* Distribute bits between SILK and CELT */
total_bitRate = 8 * bytes_target * frame_rate;
if( st->mode == MODE_HYBRID ) {
@@ -2129,7 +2144,8 @@
#ifndef DISABLE_FLOAT_API
if (st->use_dtx && (analysis_info.valid || is_silence))
{
- if (decide_dtx_mode(activity, &st->nb_no_activity_frames))
+ if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames,
+ st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch))
{
st->rangeFinal = 0;
data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);