ref: e38ffc2c9716d8764aef7e88ad40d0ebbb572ff2
dir: /src/silence.c/
/* Silence effect for SoX
* by Heikki Leinonen (heilei@iki.fi) 25.03.2001
* Major Modifications by Chris Bagwell 06.08.2001
* Minor addition by Donnie Smith 13.08.2003
*
* This effect can delete samples from the start of a sound file
* until it sees a specified count of samples exceed a given threshold
* (any of the channels).
* This effect can also delete samples from the end of a sound file
* when it sees a specified count of samples below a given threshold
* (all channels).
* It may also be used to delete samples anywhere in a sound file.
* Thesholds can be given as either a percentage or in decibels.
*/
#include "sox_i.h"
#include <string.h>
/* Private data for silence effect. */
#define SILENCE_TRIM 0
#define SILENCE_TRIM_FLUSH 1
#define SILENCE_COPY 2
#define SILENCE_COPY_FLUSH 3
#define SILENCE_STOP 4
typedef struct {
char start;
int start_periods;
char *start_duration_str;
size_t start_duration;
double start_threshold;
char start_unit; /* "d" for decibels or "%" for percent. */
int restart;
sox_sample_t *start_holdoff;
size_t start_holdoff_offset;
size_t start_holdoff_end;
int start_found_periods;
char stop;
int stop_periods;
char *stop_duration_str;
size_t stop_duration;
double stop_threshold;
char stop_unit;
sox_sample_t *stop_holdoff;
size_t stop_holdoff_offset;
size_t stop_holdoff_end;
int stop_found_periods;
double *window;
double *window_current;
double *window_end;
size_t window_size;
double rms_sum;
char leave_silence;
/* State Machine */
char mode;
} priv_t;
static void clear_rms(sox_effect_t * effp)
{
priv_t * silence = (priv_t *) effp->priv;
memset(silence->window, 0,
silence->window_size * sizeof(double));
silence->window_current = silence->window;
silence->window_end = silence->window + silence->window_size;
silence->rms_sum = 0;
}
static int sox_silence_getopts(sox_effect_t * effp, int argc, char **argv)
{
priv_t * silence = (priv_t *) effp->priv;
int parse_count;
--argc, ++argv;
/* check for option switches */
silence->leave_silence = sox_false;
if (argc > 0)
{
if (!strcmp("-l", *argv)) {
argc--; argv++;
silence->leave_silence = sox_true;
}
}
if (argc < 1)
return lsx_usage(effp);
/* Parse data related to trimming front side */
silence->start = sox_false;
if (sscanf(argv[0], "%d", &silence->start_periods) != 1)
return lsx_usage(effp);
if (silence->start_periods < 0)
{
lsx_fail("Periods must not be negative");
return(SOX_EOF);
}
argv++;
argc--;
if (silence->start_periods > 0)
{
silence->start = sox_true;
if (argc < 2)
return lsx_usage(effp);
/* We do not know the sample rate so we can not fully
* parse the duration info yet. So save argument off
* for future processing.
*/
silence->start_duration_str = lsx_malloc(strlen(argv[0])+1);
strcpy(silence->start_duration_str,argv[0]);
/* Perform a fake parse to do error checking */
if (lsx_parsesamples(0.,silence->start_duration_str,
&silence->start_duration,'s') == NULL)
return lsx_usage(effp);
parse_count = sscanf(argv[1], "%lf%c", &silence->start_threshold,
&silence->start_unit);
if (parse_count < 1)
return lsx_usage(effp);
else if (parse_count < 2)
silence->start_unit = '%';
argv++; argv++;
argc--; argc--;
}
silence->stop = sox_false;
/* Parse data needed for trimming of backside */
if (argc > 0)
{
if (argc < 3)
return lsx_usage(effp);
if (sscanf(argv[0], "%d", &silence->stop_periods) != 1)
return lsx_usage(effp);
if (silence->stop_periods < 0)
{
silence->stop_periods = -silence->stop_periods;
silence->restart = 1;
}
else
silence->restart = 0;
silence->stop = sox_true;
argv++;
argc--;
/* We do not know the sample rate so we can not fully
* parse the duration info yet. So save argument off
* for future processing.
*/
silence->stop_duration_str = lsx_malloc(strlen(argv[0])+1);
strcpy(silence->stop_duration_str,argv[0]);
/* Perform a fake parse to do error checking */
if (lsx_parsesamples(0.,silence->stop_duration_str,
&silence->stop_duration,'s') == NULL)
return lsx_usage(effp);
parse_count = sscanf(argv[1], "%lf%c", &silence->stop_threshold,
&silence->stop_unit);
if (parse_count < 1)
return lsx_usage(effp);
else if (parse_count < 2)
silence->stop_unit = '%';
argv++; argv++;
argc--; argc--;
}
/* Error checking */
if (silence->start)
{
if ((silence->start_unit != '%') && (silence->start_unit != 'd'))
{
lsx_fail("Invalid unit specified");
return lsx_usage(effp);
}
if ((silence->start_unit == '%') && ((silence->start_threshold < 0.0)
|| (silence->start_threshold > 100.0)))
{
lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
return (SOX_EOF);
}
if ((silence->start_unit == 'd') && (silence->start_threshold >= 0.0))
{
lsx_fail("silence threshold should be less than 0.0 dB");
return(SOX_EOF);
}
}
if (silence->stop)
{
if ((silence->stop_unit != '%') && (silence->stop_unit != 'd'))
{
lsx_fail("Invalid unit specified");
return(SOX_EOF);
}
if ((silence->stop_unit == '%') && ((silence->stop_threshold < 0.0) ||
(silence->stop_threshold > 100.0)))
{
lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
return (SOX_EOF);
}
if ((silence->stop_unit == 'd') && (silence->stop_threshold >= 0.0))
{
lsx_fail("silence threshold should be less than 0.0 dB");
return(SOX_EOF);
}
}
return(SOX_SUCCESS);
}
static int sox_silence_start(sox_effect_t * effp)
{
priv_t *silence = (priv_t *)effp->priv;
/* When you want to remove silence, small window sizes are
* better or else RMS will look like non-silence at
* aburpt changes from load to silence.
*/
silence->window_size = (effp->in_signal.rate / 50) *
effp->in_signal.channels;
silence->window = lsx_malloc(silence->window_size * sizeof(double));
clear_rms(effp);
/* Now that we know sample rate, reparse duration. */
if (silence->start)
{
if (lsx_parsesamples(effp->in_signal.rate, silence->start_duration_str,
&silence->start_duration, 's') == NULL)
return lsx_usage(effp);
/* Align to multiple of channels */
silence->start_duration += (silence->start_duration %
effp->in_signal.channels);
}
if (silence->stop)
{
if (lsx_parsesamples(effp->in_signal.rate,silence->stop_duration_str,
&silence->stop_duration,'s') == NULL)
return lsx_usage(effp);
/* Align to multiple of channels */
silence->stop_duration += (silence->stop_duration %
effp->in_signal.channels);
}
if (silence->start)
silence->mode = SILENCE_TRIM;
else
silence->mode = SILENCE_COPY;
silence->start_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->start_duration);
silence->start_holdoff_offset = 0;
silence->start_holdoff_end = 0;
silence->start_found_periods = 0;
silence->stop_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->stop_duration);
silence->stop_holdoff_offset = 0;
silence->stop_holdoff_end = 0;
silence->stop_found_periods = 0;
return(SOX_SUCCESS);
}
static sox_bool aboveThreshold(sox_effect_t const * effp,
sox_sample_t value /* >= 0 */, double threshold, int unit)
{
/* When scaling low bit data, noise values got scaled way up */
/* Only consider the original bits when looking for silence */
sox_sample_t masked_value = value & (-1 << (32 - effp->in_signal.precision));
double scaled_value = (double)masked_value / SOX_SAMPLE_MAX;
if (unit == '%')
scaled_value *= 100;
else if (unit == 'd')
scaled_value = linear_to_dB(scaled_value);
return scaled_value > threshold;
}
static sox_sample_t compute_rms(sox_effect_t * effp, sox_sample_t sample)
{
priv_t * silence = (priv_t *) effp->priv;
double new_sum;
sox_sample_t rms;
new_sum = silence->rms_sum;
new_sum -= *silence->window_current;
new_sum += ((double)sample * (double)sample);
rms = sqrt(new_sum / silence->window_size);
return (rms);
}
static void update_rms(sox_effect_t * effp, sox_sample_t sample)
{
priv_t * silence = (priv_t *) effp->priv;
silence->rms_sum -= *silence->window_current;
*silence->window_current = ((double)sample * (double)sample);
silence->rms_sum += *silence->window_current;
silence->window_current++;
if (silence->window_current >= silence->window_end)
silence->window_current = silence->window;
}
/* Process signed long samples from ibuf to obuf. */
/* Return number of samples processed in isamp and osamp. */
static int sox_silence_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf,
size_t *isamp, size_t *osamp)
{
priv_t * silence = (priv_t *) effp->priv;
int threshold;
size_t i, j;
size_t nrOfTicks, nrOfInSamplesRead, nrOfOutSamplesWritten;
nrOfInSamplesRead = 0;
nrOfOutSamplesWritten = 0;
switch (silence->mode)
{
case SILENCE_TRIM:
/* Reads and discards all input data until it detects a
* sample that is above the specified threshold. Turns on
* copy mode when detected.
* Need to make sure and copy input in groups of "channels" to
* prevent getting buffers out of sync.
*/
silence_trim:
nrOfTicks = min((*isamp-nrOfInSamplesRead),
(*osamp-nrOfOutSamplesWritten)) /
effp->in_signal.channels;
for(i = 0; i < nrOfTicks; i++)
{
threshold = 0;
for (j = 0; j < effp->in_signal.channels; j++)
{
threshold |= aboveThreshold(effp,
compute_rms(effp, ibuf[j]),
silence->start_threshold,
silence->start_unit);
}
if (threshold)
{
/* Add to holdoff buffer */
for (j = 0; j < effp->in_signal.channels; j++)
{
update_rms(effp, *ibuf);
silence->start_holdoff[
silence->start_holdoff_end++] = *ibuf++;
nrOfInSamplesRead++;
}
if (silence->start_holdoff_end >=
silence->start_duration)
{
if (++silence->start_found_periods >=
silence->start_periods)
{
silence->mode = SILENCE_TRIM_FLUSH;
goto silence_trim_flush;
}
/* Trash holdoff buffer since its not
* needed. Start looking again.
*/
silence->start_holdoff_offset = 0;
silence->start_holdoff_end = 0;
}
}
else /* !above Threshold */
{
silence->start_holdoff_end = 0;
for (j = 0; j < effp->in_signal.channels; j++)
{
update_rms(effp, ibuf[j]);
}
ibuf += effp->in_signal.channels;
nrOfInSamplesRead += effp->in_signal.channels;
}
} /* for nrOfTicks */
break;
case SILENCE_TRIM_FLUSH:
silence_trim_flush:
nrOfTicks = min((silence->start_holdoff_end -
silence->start_holdoff_offset),
(*osamp-nrOfOutSamplesWritten));
for(i = 0; i < nrOfTicks; i++)
{
*obuf++ = silence->start_holdoff[silence->start_holdoff_offset++];
nrOfOutSamplesWritten++;
}
/* If fully drained holdoff then switch to copy mode */
if (silence->start_holdoff_offset == silence->start_holdoff_end)
{
silence->start_holdoff_offset = 0;
silence->start_holdoff_end = 0;
silence->mode = SILENCE_COPY;
goto silence_copy;
}
break;
case SILENCE_COPY:
/* Attempts to copy samples into output buffer.
*
* Case B:
* If not looking for silence to terminate copy then
* blindly copy data into output buffer.
*
* Case A:
*
* Case 1a:
* If previous silence was detect then see if input sample is
* above threshold. If found then flush out hold off buffer
* and copy over to output buffer.
*
* Case 1b:
* If no previous silence detect then see if input sample
* is above threshold. If found then copy directly
* to output buffer.
*
* Case 2:
* If not above threshold then silence is detect so
* store in hold off buffer and do not write to output
* buffer. Even though it wasn't put in output
* buffer, inform user that input was consumed.
*
* If hold off buffer is full after this then stop
* copying data and discard data in hold off buffer.
*
* Special leave_silence logic:
*
* During this mode, go ahead and copy input
* samples to output buffer instead of holdoff buffer
* Then also short ciruit any flushes that would occur
* when non-silence is detect since samples were already
* copied. This has the effect of always leaving
* holdoff[] amount of silence but deleting any
* beyond that amount.
*
*/
silence_copy:
nrOfTicks = min((*isamp-nrOfInSamplesRead),
(*osamp-nrOfOutSamplesWritten)) /
effp->in_signal.channels;
if (silence->stop)
{
/* Case A */
for(i = 0; i < nrOfTicks; i++)
{
threshold = 1;
for (j = 0; j < effp->in_signal.channels; j++)
{
threshold &= aboveThreshold(effp,
compute_rms(effp, ibuf[j]),
silence->stop_threshold,
silence->stop_unit);
}
/* Case 1a
* If above threshold, check to see if we where holding
* off previously. If so then flush this buffer.
* We haven't incremented any pointers yet so nothing
* is lost.
*
* If user wants to leave_silence, then we
* were already copying the data and so no
* need to flush the old data. Just resume
* copying as if we were not holding off.
*/
if (threshold && silence->stop_holdoff_end
&& !silence->leave_silence)
{
silence->mode = SILENCE_COPY_FLUSH;
goto silence_copy_flush;
}
/* Case 1b */
else if (threshold)
{
/* Not holding off so copy into output buffer */
for (j = 0; j < effp->in_signal.channels; j++)
{
update_rms(effp, *ibuf);
*obuf++ = *ibuf++;
nrOfInSamplesRead++;
nrOfOutSamplesWritten++;
}
}
/* Case 2 */
else if (!threshold)
{
/* Add to holdoff buffer */
for (j = 0; j < effp->in_signal.channels; j++)
{
update_rms(effp, *ibuf);
if (silence->leave_silence) {
*obuf++ = *ibuf;
nrOfOutSamplesWritten++;
}
silence->stop_holdoff[
silence->stop_holdoff_end++] = *ibuf++;
nrOfInSamplesRead++;
}
/* Check if holdoff buffer is greater than duration
*/
if (silence->stop_holdoff_end >=
silence->stop_duration)
{
/* Increment found counter and see if this
* is the last period. If so then exit.
*/
if (++silence->stop_found_periods >=
silence->stop_periods)
{
silence->stop_holdoff_offset = 0;
silence->stop_holdoff_end = 0;
if (!silence->restart)
{
*isamp = nrOfInSamplesRead;
*osamp = nrOfOutSamplesWritten;
silence->mode = SILENCE_STOP;
/* Return SOX_EOF since no more processing */
return (SOX_EOF);
}
else
{
silence->stop_found_periods = 0;
silence->start_found_periods = 0;
silence->start_holdoff_offset = 0;
silence->start_holdoff_end = 0;
clear_rms(effp);
silence->mode = SILENCE_TRIM;
goto silence_trim;
}
}
else
{
/* Flush this buffer and start
* looking again.
*/
silence->mode = SILENCE_COPY_FLUSH;
goto silence_copy_flush;
}
break;
} /* Filled holdoff buffer */
} /* Detected silence */
} /* For # of samples */
} /* Trimming off backend */
else /* !(silence->stop) */
{
/* Case B */
memcpy(obuf, ibuf, sizeof(sox_sample_t)*nrOfTicks*
effp->in_signal.channels);
nrOfInSamplesRead += (nrOfTicks*effp->in_signal.channels);
nrOfOutSamplesWritten += (nrOfTicks*effp->in_signal.channels);
}
break;
case SILENCE_COPY_FLUSH:
silence_copy_flush:
nrOfTicks = min((silence->stop_holdoff_end -
silence->stop_holdoff_offset),
(*osamp-nrOfOutSamplesWritten));
for(i = 0; i < nrOfTicks; i++)
{
*obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
nrOfOutSamplesWritten++;
}
/* If fully drained holdoff then return to copy mode */
if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
{
silence->stop_holdoff_offset = 0;
silence->stop_holdoff_end = 0;
silence->mode = SILENCE_COPY;
goto silence_copy;
}
break;
case SILENCE_STOP:
nrOfInSamplesRead = *isamp;
break;
}
*isamp = nrOfInSamplesRead;
*osamp = nrOfOutSamplesWritten;
return (SOX_SUCCESS);
}
static int sox_silence_drain(sox_effect_t * effp, sox_sample_t *obuf, size_t *osamp)
{
priv_t * silence = (priv_t *) effp->priv;
size_t i;
size_t nrOfTicks, nrOfOutSamplesWritten = 0;
/* Only if in flush mode will there be possible samples to write
* out during drain() call.
*/
if (silence->mode == SILENCE_COPY_FLUSH ||
silence->mode == SILENCE_COPY)
{
nrOfTicks = min((silence->stop_holdoff_end -
silence->stop_holdoff_offset), *osamp);
for(i = 0; i < nrOfTicks; i++)
{
*obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
nrOfOutSamplesWritten++;
}
/* If fully drained holdoff then stop */
if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
{
silence->stop_holdoff_offset = 0;
silence->stop_holdoff_end = 0;
silence->mode = SILENCE_STOP;
}
}
*osamp = nrOfOutSamplesWritten;
if (silence->mode == SILENCE_STOP || *osamp == 0)
return SOX_EOF;
else
return SOX_SUCCESS;
}
static int sox_silence_stop(sox_effect_t * effp)
{
priv_t * silence = (priv_t *) effp->priv;
free(silence->window);
free(silence->start_holdoff);
free(silence->stop_holdoff);
return(SOX_SUCCESS);
}
static int lsx_kill(sox_effect_t * effp)
{
priv_t * silence = (priv_t *) effp->priv;
free(silence->start_duration_str);
free(silence->stop_duration_str);
return SOX_SUCCESS;
}
static sox_effect_handler_t sox_silence_effect = {
"silence",
"[ -l ] above_periods [ duration thershold[d|%%] ] [ below_periods duration threshold[d|%%]]",
SOX_EFF_MCHAN | SOX_EFF_MODIFY | SOX_EFF_LENGTH,
sox_silence_getopts,
sox_silence_start,
sox_silence_flow,
sox_silence_drain,
sox_silence_stop,
lsx_kill, sizeof(priv_t)
};
const sox_effect_handler_t *lsx_silence_effect_fn(void)
{
return &sox_silence_effect;
}