ref: c4c25dbfd47a5e84b43d40fad7b1df99996e3ae6
parent: 5202c4b7a010abbada58097189a596b86c80abff
author: David Bryant <david@wavpack.com>
date: Tue Oct 4 16:48:57 EDT 2022
performance and portability improvements, update to version 0.3 - allow compilation on 16-bit integer architectures (tested with ia16-gcc) - reduce memory allocation, especially when not using stereo or "fast" mode - special thanks to hayguen for suggestions and patches
--- a/main.c
+++ b/main.c
@@ -1,7 +1,7 @@
////////////////////////////////////////////////////////////////////////////
// **** AUDIO-STRETCH **** //
// Time Domain Harmonic Scaler //
-// Copyright (c) 2019 David Bryant //
+// Copyright (c) 2022 David Bryant //
// All Rights Reserved. //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
@@ -19,8 +19,8 @@
#include "stretch.h"
static const char *sign_on = "\n"
-" AUDIO-STRETCH Time Domain Harmonic Scaling Demo Version 0.2\n"
-" Copyright (c) 2019 David Bryant. All Rights Reserved.\n\n";
+" AUDIO-STRETCH Time Domain Harmonic Scaling Demo Version 0.3\n"
+" Copyright (c) 2022 David Bryant. All Rights Reserved.\n\n";
static const char *usage =
" Usage: AUDIO-STRETCH [-options] infile.wav outfile.wav\n\n"
@@ -66,7 +66,7 @@
#define WAVE_FORMAT_PCM 0x1
#define WAVE_FORMAT_EXTENSIBLE 0xfffe
-static int write_pcm_wav_header (FILE *outfile, size_t num_samples, int num_channels, int bytes_per_sample, int sample_rate);
+static int write_pcm_wav_header (FILE *outfile, uint32_t num_samples, int num_channels, int bytes_per_sample, uint32_t sample_rate);
#define BUFFER_SAMPLES 1024
@@ -76,7 +76,7 @@
{
int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, cycle_ratio = 0;
int upper_frequency = 333, lower_frequency = 55, min_period, max_period;
- int samples_to_process, insamples = 0, outsamples = 0;
+ uint32_t samples_to_process, insamples = 0, outsamples = 0;
char *infilename = NULL, *outfilename = NULL;
RiffChunkHeader riff_chunk_header;
WaveHeader WaveHeader = { 0 };
@@ -251,7 +251,7 @@
if (format == WAVE_FORMAT_PCM) {
if (WaveHeader.SampleRate < 8000 || WaveHeader.SampleRate > 48000) {
- fprintf (stderr, "\"%s\" sample rate is %d, must be 8000 to 48000!\n", infilename, WaveHeader.SampleRate);
+ fprintf (stderr, "\"%s\" sample rate is %lu, must be 8000 to 48000!\n", infilename, (unsigned long) WaveHeader.SampleRate);
return 1;
}
}
@@ -289,7 +289,7 @@
break;
}
else { // just ignore unknown chunks
- int bytes_to_eat = (chunk_header.ckSize + 1) & ~1L;
+ uint32_t bytes_to_eat = (chunk_header.ckSize + 1) & ~1L;
char dummy;
while (bytes_to_eat--)
@@ -334,12 +334,18 @@
return 1;
}
- int scaled_rate = scale_rate ? (int)(WaveHeader.SampleRate * ratio + 0.5) : WaveHeader.SampleRate;
+ uint32_t scaled_rate = scale_rate ? (uint32_t)(WaveHeader.SampleRate * ratio + 0.5) : WaveHeader.SampleRate;
write_pcm_wav_header (outfile, 0, WaveHeader.NumChannels, 2, scaled_rate);
- short *inbuffer = malloc (BUFFER_SAMPLES * WaveHeader.BlockAlign);
- short *outbuffer = malloc ((BUFFER_SAMPLES * 2 + max_period * 4) * WaveHeader.BlockAlign);
+ int16_t *inbuffer = malloc (BUFFER_SAMPLES * WaveHeader.BlockAlign);
+ int16_t *outbuffer = malloc ((BUFFER_SAMPLES * 2 + max_period * 4) * WaveHeader.BlockAlign);
+ if (!inbuffer || !outbuffer) {
+ fprintf (stderr, "can't allocate required memory!\n");
+ fclose (infile);
+ return 1;
+ }
+
while (1) {
int samples_read = fread (inbuffer, WaveHeader.BlockAlign,
samples_to_process >= BUFFER_SAMPLES ? BUFFER_SAMPLES : samples_to_process, infile);
@@ -376,15 +382,17 @@
fclose (outfile);
if (insamples && verbose_mode) {
- fprintf (stderr, "done, %d samples --> %d samples (ratio = %.3f)\n", insamples, outsamples, (float) outsamples / insamples);
+ fprintf (stderr, "done, %lu samples --> %lu samples (ratio = %.3f)\n",
+ (unsigned long) insamples, (unsigned long) outsamples, (float) outsamples / insamples);
if (scale_rate)
- fprintf (stderr, "sample rate changed from %d Hz to %d Hz\n", WaveHeader.SampleRate, scaled_rate);
+ fprintf (stderr, "sample rate changed from %lu Hz to %lu Hz\n",
+ (unsigned long) WaveHeader.SampleRate, (unsigned long) scaled_rate);
}
return 0;
}
-static int write_pcm_wav_header (FILE *outfile, size_t num_samples, int num_channels, int bytes_per_sample, int sample_rate)
+static int write_pcm_wav_header (FILE *outfile, uint32_t num_samples, int num_channels, int bytes_per_sample, uint32_t sample_rate)
{
RiffChunkHeader riffhdr;
ChunkHeader datahdr, fmthdr;
@@ -391,7 +399,7 @@
WaveHeader wavhdr;
int wavhdrsize = 16;
- size_t total_data_bytes = num_samples * bytes_per_sample * num_channels;
+ uint32_t total_data_bytes = num_samples * bytes_per_sample * num_channels;
memset (&wavhdr, 0, sizeof (wavhdr));
--- a/stretch.c
+++ b/stretch.c
@@ -1,7 +1,7 @@
////////////////////////////////////////////////////////////////////////////
// **** AUDIO-STRETCH **** //
// Time Domain Harmonic Scaler //
-// Copyright (c) 2019 David Bryant //
+// Copyright (c) 2022 David Bryant //
// All Rights Reserved. //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
@@ -14,7 +14,15 @@
// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
// 1/2 to 2 times its original length. This is done without altering any of
// the tonal characteristics.
+//
+// Use stereo (num_chans = 2), when both channels are from same source
+// and should contain approximately similar content.
+// For independent channels, prefer using multiple StretchHandle-instances.
+// see https://github.com/dbry/audio-stretch/issues/6
+// Multiple instances, of course, will consume more CPU load.
+// In addition, different output amounts need to be handled.
+
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@@ -26,16 +34,26 @@
#define MIN_PERIOD 24 /* minimum allowable pitch period */
#define MAX_PERIOD 2400 /* maximum allowable pitch period */
+#if INT_MAX == 32767
+#define MERGE_OFFSET 32768L /* promote to long before offset */
+#define abs32 labs /* use long abs to avoid UB */
+#else
+#define MERGE_OFFSET 32768
+#define abs32 abs
+#endif
+
+#define MAX_CORR UINT32_MAX /* maximum value for correlation ratios */
+
struct stretch_cnxt {
int num_chans, inbuff_samples, shortest, longest, tail, head, fast_mode;
- short *inbuff, *calcbuff;
+ int16_t *inbuff, *calcbuff;
float outsamples_error;
- unsigned int *results;
+ uint32_t *results;
};
-static void merge_blocks (short *output, short *input1, short *input2, int samples);
-static int find_period_fast (struct stretch_cnxt *cnxt, short *samples);
-static int find_period (struct stretch_cnxt *cnxt, short *samples);
+static void merge_blocks (int16_t *output, int16_t *input1, int16_t *input2, int samples);
+static int find_period_fast (struct stretch_cnxt *cnxt, int16_t *samples);
+static int find_period (struct stretch_cnxt *cnxt, int16_t *samples);
/*
* Initialize a context of the time stretching code. The shortest and longest periods
@@ -62,13 +80,17 @@
cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
if (cnxt) {
- cnxt->inbuff_samples = longest_period * num_channels * 8;
+ cnxt->inbuff_samples = longest_period * num_channels * 6;
cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
- cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
- cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
+
+ if (num_channels == 2 || fast_mode)
+ cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
+
+ if (fast_mode)
+ cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
}
- if (!cnxt || !cnxt->inbuff || !cnxt->calcbuff || !cnxt->results) {
+ if (!cnxt || !cnxt->inbuff || (num_channels == 2 && fast_mode && !cnxt->calcbuff) || (fast_mode && !cnxt->results)) {
fprintf (stderr, "stretch_init(): out of memory!\n");
return NULL;
}
@@ -102,7 +124,7 @@
* plus 3X the longest period (or 4X the longest period in "fast" mode).
*/
-int stretch_samples (StretchHandle handle, const short *samples, int num_samples, short *output, float ratio)
+int stretch_samples (StretchHandle handle, const int16_t *samples, int num_samples, int16_t *output, float ratio)
{
struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
int out_samples = 0;
@@ -216,7 +238,7 @@
/* flush any leftover samples out at normal speed */
-int stretch_flush (StretchHandle handle, short *output)
+int stretch_flush (StretchHandle handle, int16_t *output)
{
struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
@@ -254,10 +276,10 @@
* denominator need be completely recalculated.
*/
-static int find_period (struct stretch_cnxt *cnxt, short *samples)
+static int find_period (struct stretch_cnxt *cnxt, int16_t *samples)
{
- unsigned int sum, diff, factor, scaler, best_factor = 0;
- short *calcbuff = samples;
+ uint32_t sum, diff, factor, scaler, best_factor = 0;
+ int16_t *calcbuff = samples;
int period, best_period;
int i, j;
@@ -269,16 +291,16 @@
calcbuff = cnxt->calcbuff;
for (sum = i = j = 0; i < cnxt->longest * 2; i += 2)
- sum += abs (calcbuff [j++] = (samples [i] + samples [i+1]) >> 1);
+ sum += abs32 (calcbuff [j++] = ((int32_t) samples [i] + samples [i+1]) >> 1);
}
else
for (sum = i = 0; i < cnxt->longest; ++i)
- sum += abs (calcbuff [i]) + abs (calcbuff [i+cnxt->longest]);
+ sum += abs32 (calcbuff [i]) + abs32 (calcbuff [i+cnxt->longest]);
// if silence return longest period, else calculate scaler based on largest sum
if (sum)
- scaler = (UINT32_MAX - 1) / sum;
+ scaler = (MAX_CORR - 1) / sum;
else
return cnxt->longest;
@@ -285,13 +307,13 @@
/* accumulate sum for shortest period size */
for (sum = i = 0; i < period; ++i)
- sum += abs (calcbuff [i]) + abs (calcbuff [i+period]);
+ sum += abs32 (calcbuff [i]) + abs32 (calcbuff [i+period]);
/* this loop actually cycles through all period lengths */
while (1) {
- short *comp = calcbuff + period * 2;
- short *ref = calcbuff + period;
+ int16_t *comp = calcbuff + period * 2;
+ int16_t *ref = calcbuff + period;
/* compute sum of absolute differences */
@@ -298,7 +320,7 @@
diff = 0;
while (ref != calcbuff)
- diff += abs (*--ref - *--comp);
+ diff += abs32 ((int32_t) *--ref - *--comp);
/*
* Here we calculate and store the resulting correlation
@@ -307,7 +329,7 @@
* precision using integer math, we scale the sum.
*/
- factor = diff ? (sum * scaler) / diff : UINT32_MAX;
+ factor = diff ? (sum * scaler) / diff : MAX_CORR;
if (factor >= best_factor) {
best_factor = factor;
@@ -321,8 +343,7 @@
/* update accumulating sum and current period */
- sum += abs (calcbuff [period * 2]);
- sum += abs (calcbuff [period * 2 + 1]);
+ sum += abs32 (calcbuff [period * 2]) + abs32 (calcbuff [period * 2 + 1]);
period++;
}
@@ -339,9 +360,9 @@
* side of the peak are compared to calculate a more accurate center of the period.
*/
-static int find_period_fast (struct stretch_cnxt *cnxt, short *samples)
+static int find_period_fast (struct stretch_cnxt *cnxt, int16_t *samples)
{
- unsigned int sum, diff, scaler, best_factor = 0;
+ uint32_t sum, diff, scaler, best_factor = 0;
int period, best_period;
int i, j;
@@ -351,15 +372,15 @@
if (cnxt->num_chans == 2)
for (sum = i = j = 0; i < cnxt->longest * 2; i += 4)
- sum += abs (cnxt->calcbuff [j++] = (samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2);
+ sum += abs32 (cnxt->calcbuff [j++] = ((int32_t) samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2);
else
for (sum = i = j = 0; i < cnxt->longest * 2; i += 2)
- sum += abs (cnxt->calcbuff [j++] = (samples [i] + samples [i+1]) >> 1);
+ sum += abs32 (cnxt->calcbuff [j++] = ((int32_t) samples [i] + samples [i+1]) >> 1);
// if silence return longest period, else calculate scaler based on largest sum
if (sum)
- scaler = (UINT32_MAX - 1) / sum;
+ scaler = (MAX_CORR - 1) / sum;
else
return cnxt->longest;
@@ -366,13 +387,13 @@
/* accumulate sum for shortest period */
for (sum = i = 0; i < period; ++i)
- sum += abs (cnxt->calcbuff [i]) + abs (cnxt->calcbuff [i+period]);
+ sum += abs32 (cnxt->calcbuff [i]) + abs32 (cnxt->calcbuff [i+period]);
/* this loop actually cycles through all period lengths */
while (1) {
- short *comp = cnxt->calcbuff + period * 2;
- short *ref = cnxt->calcbuff + period;
+ int16_t *comp = cnxt->calcbuff + period * 2;
+ int16_t *ref = cnxt->calcbuff + period;
/* compute sum of absolute differences */
@@ -379,7 +400,7 @@
diff = 0;
while (ref != cnxt->calcbuff)
- diff += abs (*--ref - *--comp);
+ diff += abs32 ((int32_t) *--ref - *--comp);
/*
* Here we calculate and store the resulting correlation
@@ -388,7 +409,7 @@
* precision using integer math, we scale the sum.
*/
- cnxt->results [period] = diff ? (sum * scaler) / diff : UINT32_MAX;
+ cnxt->results [period] = diff ? (sum * scaler) / diff : MAX_CORR;
if (cnxt->results [period] >= best_factor) { /* check if best yet */
best_factor = cnxt->results [period];
@@ -402,18 +423,17 @@
/* update accumulating sum and current period */
- sum += abs (cnxt->calcbuff [period * 2]);
- sum += abs (cnxt->calcbuff [period * 2 + 1]);
+ sum += abs32 (cnxt->calcbuff [period * 2]) + abs32 (cnxt->calcbuff [period * 2 + 1]);
period++;
}
if (best_period * cnxt->num_chans * 2 != cnxt->shortest && best_period * cnxt->num_chans * 2 != cnxt->longest) {
- unsigned int high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
- unsigned int low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
+ uint32_t high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
+ uint32_t low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
- if (low_side_diff / 2 > high_side_diff)
+ if ((low_side_diff + 1) / 2 > high_side_diff)
best_period = best_period * 2 + 1;
- else if (high_side_diff / 2 > low_side_diff)
+ else if ((high_side_diff + 1) / 2 > low_side_diff)
best_period = best_period * 2 - 1;
else
best_period *= 2;
@@ -441,11 +461,11 @@
* calculated period is currently set for 2400 samples, we have plenty of margin.
*/
-static void merge_blocks (short *output, short *input1, short *input2, int samples)
+static void merge_blocks (int16_t *output, int16_t *input1, int16_t *input2, int samples)
{
int i;
for (i = 0; i < samples; ++i)
- output [i] = (int)(((unsigned int)(input1 [i] + 32768) * (samples - i) +
- (unsigned int)(input2 [i] + 32768) * i) / samples) - 32768;
+ output [i] = (int32_t)(((uint32_t)(input1 [i] + MERGE_OFFSET) * (samples - i) +
+ (uint32_t)(input2 [i] + MERGE_OFFSET) * i) / samples) - MERGE_OFFSET;
}
--- a/stretch.h
+++ b/stretch.h
@@ -19,12 +19,14 @@
// and should contain approximately similar content.
// For independent channels, prefer using multiple StretchHandle-instances.
// see https://github.com/dbry/audio-stretch/issues/6
-// Multiple instances, of course, wil consume more CPU load.
+// Multiple instances, of course, will consume more CPU load.
// In addition, different output amounts need to be handled.
#ifndef STRETCH_H
#define STRETCH_H
+#include <stdint.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -32,8 +34,8 @@
typedef void *StretchHandle;
StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
-int stretch_samples (StretchHandle handle, const short *samples, int num_samples, short *output, float ratio);
-int stretch_flush (StretchHandle handle, short *output);
+int stretch_samples (StretchHandle handle, const int16_t *samples, int num_samples, int16_t *output, float ratio);
+int stretch_flush (StretchHandle handle, int16_t *output);
void stretch_reset (StretchHandle handle);
void stretch_deinit (StretchHandle handle);