shithub: audio-stretch

Download patch

ref: c0576b103d2a13975e7e2644fa9b0a76020603ec
parent: b56508b18124746746909f74dfad5b626580f55f
author: David Bryant <david@wavpack.com>
date: Sun Oct 16 16:57:51 EDT 2022

issue #5: refinement and bug fixes on cascaded instances

--- a/main.c
+++ b/main.c
@@ -24,11 +24,12 @@
 
 static const char *usage =
 " Usage:     AUDIO-STRETCH [-options] infile.wav outfile.wav\n\n"
-" Options:  -r<n.n> = stretch ratio (0.5 to 2.0, default = 1.0)\n"
+" Options:  -r<n.n> = stretch ratio (0.25 to 4.0, default = 1.0)\n"
 "           -u<n>   = upper freq period limit (default = 333 Hz)\n"
 "           -l<n>   = lower freq period limit (default = 55 Hz)\n"
 "           -c      = cycle through all ratios, starting higher\n"
 "           -cc     = cycle through all ratios, starting lower\n"
+"           -d      = force dual instance even for shallow ratios\n"
 "           -s      = scale rate to preserve duration (not pitch)\n"
 "           -f      = fast pitch detection (default >= 32 kHz)\n"
 "           -n      = normal pitch detection (default < 32 kHz)\n"
@@ -74,8 +75,8 @@
 
 int main (argc, argv) int argc; char **argv;
 {
-    int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, cycle_ratio = 0;
-    int upper_frequency = 333, lower_frequency = 55, min_period, max_period;
+    int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, force_dual = 0, cycle_ratio = 0;
+    int buffer_samples = BUFFER_SAMPLES, upper_frequency = 333, lower_frequency = 55, min_period, max_period;
     uint32_t samples_to_process, insamples = 0, outsamples = 0;
     char *infilename = NULL, *outfilename = NULL;
     RiffChunkHeader riff_chunk_header;
@@ -137,6 +138,10 @@
                         cycle_ratio++;
                         break;
 
+                    case 'D': case 'd':
+                        force_dual = 1;
+                        break;
+
                     case 'F': case 'f':
                         force_fast = 1;
                         break;
@@ -310,7 +315,7 @@
     max_period = WaveHeader.SampleRate / lower_frequency;
     int flags = 0;
 
-    if (ratio < 0.5 || ratio > 2.0)
+    if (force_dual || ratio < 0.5 || ratio > 2.0)
         flags |= STRETCH_DUAL_FLAG;
 
     if ((force_fast || WaveHeader.SampleRate >= 32000) && !force_normal)
@@ -317,8 +322,9 @@
         flags |= STRETCH_FAST_FLAG;
 
     if (verbose_mode)
-        fprintf (stderr, "initializing stretch library with period range = %d to %d, %d channels, %s\n",
-            min_period, max_period, WaveHeader.NumChannels, (flags & STRETCH_FAST_FLAG) ? "fast mode" : "normal mode");
+        fprintf (stderr, "initializing stretch library with period range = %d to %d, %d channels, %s, %s\n",
+            min_period, max_period, WaveHeader.NumChannels, (flags & STRETCH_FAST_FLAG) ? "fast mode" : "normal mode",
+            (flags & STRETCH_DUAL_FLAG) ? "dual instance" : "single instance");
 
     if (!quiet_mode && ratio == 1.0 && !cycle_ratio)
         fprintf (stderr, "warning: a ratio of 1.0 will do nothing but copy the WAV file!\n");
@@ -343,9 +349,14 @@
     uint32_t scaled_rate = scale_rate ? (uint32_t)(WaveHeader.SampleRate * ratio + 0.5) : WaveHeader.SampleRate;
     write_pcm_wav_header (outfile, 0, WaveHeader.NumChannels, 2, scaled_rate);
 
-    int16_t *inbuffer = malloc (BUFFER_SAMPLES * WaveHeader.BlockAlign);
-    int16_t *outbuffer = malloc ((BUFFER_SAMPLES * 4 + max_period * 8) * WaveHeader.BlockAlign);
+    if (cycle_ratio)
+        ratio = (flags & STRETCH_DUAL_FLAG) ? 4.0 : 2.0;
 
+    int max_expected_samples = stretch_output_capacity (stretcher, buffer_samples, ratio);
+    int16_t *outbuffer = malloc (max_expected_samples * WaveHeader.BlockAlign);
+    int16_t *inbuffer = malloc (buffer_samples * WaveHeader.BlockAlign);
+    int max_generated_stretch = 0, max_generated_flush = 0;
+
     if (!inbuffer || !outbuffer) {
         fprintf (stderr, "can't allocate required memory!\n");
         fclose (infile);
@@ -354,23 +365,42 @@
 
     while (1) {
         int samples_read = fread (inbuffer, WaveHeader.BlockAlign,
-            samples_to_process >= BUFFER_SAMPLES ? BUFFER_SAMPLES : samples_to_process, infile);
+            samples_to_process >= buffer_samples ? buffer_samples : samples_to_process, infile);
         int samples_generated;
 
         insamples += samples_read;
         samples_to_process -= samples_read;
 
-        if (cycle_ratio)
-            ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 0.75 : -0.75)) + 1.25;
+        if (cycle_ratio) {
+            if (flags & STRETCH_DUAL_FLAG)
+                ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 1.875 : -1.875)) + 2.125;
+            else
+                ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 0.75 : -0.75)) + 1.25;
+        }
 
-        if (samples_read)
+        if (samples_read) {
             samples_generated = stretch_samples (stretcher, inbuffer, samples_read, outbuffer, ratio);
-        else
+
+            if (samples_generated > max_generated_stretch)
+                max_generated_stretch = samples_generated;
+        }
+        else {
             samples_generated = stretch_flush (stretcher, outbuffer);
 
+            if (samples_generated > max_generated_flush)
+                max_generated_flush = samples_generated;
+        }
+
         if (samples_generated) {
             fwrite (outbuffer, WaveHeader.BlockAlign, samples_generated, outfile);
             outsamples += samples_generated;
+
+            if (samples_generated > max_expected_samples) {
+                fprintf (stderr, "%s: generated samples (%d) exceeded expected (%d)!\n", samples_read ? "stretch" : "flush",
+                    samples_generated, max_expected_samples);
+                fclose (infile);
+                return 1;
+            }
         }
 
         if (!samples_read && !samples_generated)
@@ -393,6 +423,8 @@
         if (scale_rate)
             fprintf (stderr, "sample rate changed from %lu Hz to %lu Hz\n",
                 (unsigned long) WaveHeader.SampleRate, (unsigned long) scaled_rate);
+        fprintf (stderr, "max expected samples = %d, actually seen = %d stretch, %d flush\n",
+            max_expected_samples, max_generated_stretch, max_generated_flush);
     }
 
     return 0;
--- a/stretch.c
+++ b/stretch.c
@@ -69,10 +69,12 @@
 StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int flags)
 {
     struct stretch_cnxt *cnxt;
+    int max_periods = 3;
 
     if (flags & STRETCH_FAST_FLAG) {
         longest_period = (longest_period + 1) & ~1;
         shortest_period &= ~1;
+        max_periods = 4;
     }
 
     if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) {
@@ -83,7 +85,7 @@
     cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
 
     if (cnxt) {
-        cnxt->inbuff_samples = longest_period * num_channels * 6;
+        cnxt->inbuff_samples = longest_period * num_channels * max_periods;
         cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
 
         if (num_channels == 2 || (flags & STRETCH_FAST_FLAG))
@@ -105,7 +107,7 @@
 
     if (flags & STRETCH_DUAL_FLAG) {
         cnxt->next = stretch_init (shortest_period, longest_period, num_channels, flags & ~STRETCH_DUAL_FLAG);
-        cnxt->intermediate = calloc (longest_period * num_channels * 4, sizeof (*cnxt->intermediate));
+        cnxt->intermediate = calloc (longest_period * num_channels * max_periods, sizeof (*cnxt->intermediate));
     }
 
     return (StretchHandle) cnxt;
@@ -119,13 +121,49 @@
 void stretch_reset (StretchHandle handle)
 {
     struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+
     cnxt->head = cnxt->tail = cnxt->longest;
+    memset (cnxt->inbuff, 0, cnxt->tail * sizeof (*cnxt->inbuff));
 
     if (cnxt->next)
-        cnxt->next->head = cnxt->next->tail = cnxt->next->longest;
+        stretch_reset (cnxt->next);
 }
 
+/*
+ * Determine how many samples (per channel) should be reserved in 'output'-array
+ * for stretch_samples() and stretch_flush(). max_num_samples is the maximum for
+ * 'num_samples' when calling stretch_samples().
+ */
 
+int stretch_output_capacity (StretchHandle handle, int max_num_samples, float max_ratio)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+    int max_period = cnxt->longest / cnxt->num_chans;
+    int max_expected_samples;
+    float next_ratio;
+
+    if (cnxt->next) {
+        if (max_ratio < 0.5) {
+            next_ratio = max_ratio / 0.5;
+            max_ratio = 0.5;
+        }
+        else if (max_ratio > 2.0) {
+            next_ratio = max_ratio / 2.0;
+            max_ratio = 2.0;
+        }
+        else
+            next_ratio = 1.0;
+    }
+
+    max_expected_samples = (int) ceil (max_num_samples * ceil (max_ratio * 2.0) / 2.0) +
+        max_period * (cnxt->fast_mode ? 4 : 3);
+
+    if (cnxt->next)
+        max_expected_samples = stretch_output_capacity (cnxt->next, max_expected_samples, next_ratio);
+
+    return max_expected_samples;
+}
+
 /*
  * Process the specified samples with the given ratio (which is clipped to the
  * range 0.5 to 2.0). Note that the number of samples refers to total samples for
@@ -140,10 +178,21 @@
     struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
     int out_samples = 0, next_samples = 0;
     int16_t *outbuf = output;
+    float next_ratio;
 
     if (cnxt->next) {
         outbuf = cnxt->intermediate;
-        ratio = sqrt (ratio);
+
+        if (ratio < 0.5) {
+            next_ratio = ratio / 0.5;
+            ratio = 0.5;
+        }
+        else if (ratio > 2.0) {
+            next_ratio = ratio / 2.0;
+            ratio = 2.0;
+        }
+        else
+            next_ratio = 1.0;
     }
 
     num_samples *= cnxt->num_chans;
@@ -173,10 +222,17 @@
         /* while there are enough samples to process, do so */
 
         while (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * (cnxt->fast_mode ? 3 : 2)) {
-            int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) :
-                find_period (cnxt, cnxt->inbuff + cnxt->tail);
             float process_ratio;
+            int period;
 
+            if (ratio != 1.0 || cnxt->outsamples_error)
+                period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) :
+                    find_period (cnxt, cnxt->inbuff + cnxt->tail);
+            else
+                period = cnxt->longest;
+
+            // printf ("%d\n", period / cnxt->num_chans);
+
             /*
              * Once we have calculated the best-match period, there are 4 possible transformations
              * available to convert the input samples to output samples. Obviously we can simply
@@ -202,7 +258,12 @@
             }
             else if (process_ratio == 1.0) {
                 memcpy (outbuf + out_samples, cnxt->inbuff + cnxt->tail, period * 2 * sizeof (cnxt->inbuff [0]));
-                cnxt->outsamples_error += (period * 2.0) - (period * 2.0 * ratio);
+
+                if (ratio != 1.0)
+                    cnxt->outsamples_error += (period * 2.0) - (period * 2.0 * ratio);
+                else
+                    cnxt->outsamples_error = 0;
+
                 out_samples += period * 2;
                 cnxt->tail += period * 2;
             }
@@ -236,7 +297,7 @@
                 fprintf (stderr, "stretch_samples: fatal programming error: process_ratio == %g\n", process_ratio);
 
             if (cnxt->next) {
-                next_samples += stretch_samples (cnxt->next, outbuf, out_samples / cnxt->num_chans, output + next_samples * cnxt->num_chans, ratio);
+                next_samples += stretch_samples (cnxt->next, outbuf, out_samples / cnxt->num_chans, output + next_samples * cnxt->num_chans, next_ratio);
                 out_samples = 0;
             }
         }
@@ -258,17 +319,28 @@
     return cnxt->next ? next_samples : out_samples / cnxt->num_chans;
 }  
 
-/* flush any leftover samples out at normal speed */
+/*
+ * Flush any leftover samples out at normal speed. For cascaded dual instances this must be called
+ * twice to completely flush, or simply call it until it returns zero samples
+ */
 
 int stretch_flush (StretchHandle handle, int16_t *output)
 {
     struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
-    int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
+    int samples_leftover = (cnxt->head - cnxt->tail) / cnxt->num_chans;
+    int samples_flushed;
 
-    memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output));
-    cnxt->tail = cnxt->head;
+    if (cnxt->next && samples_leftover)
+        samples_flushed = stretch_samples (cnxt->next, cnxt->inbuff + cnxt->tail, samples_leftover, output, 1.0);
+    else if (cnxt->next)
+        samples_flushed = stretch_flush (cnxt->next, output);
+    else {
+        memcpy (output, cnxt->inbuff + cnxt->tail, samples_leftover * cnxt->num_chans * sizeof (*output));
+        samples_flushed = samples_leftover;
+    }
 
-    return samples_to_copy;
+    cnxt->tail = cnxt->head;
+    return samples_flushed;
 }
 
 /* free handle */
@@ -281,8 +353,10 @@
     free (cnxt->results);
     free (cnxt->inbuff);
 
-    if (cnxt->next)
+    if (cnxt->next) {
         stretch_deinit (cnxt->next);
+        free (cnxt->intermediate);
+    }
 
     free (cnxt);
 }
--- a/stretch.h
+++ b/stretch.h
@@ -37,6 +37,7 @@
 typedef void *StretchHandle;
 
 StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int flags);
+int stretch_output_capacity (StretchHandle handle, int max_num_samples, float max_ratio);
 int stretch_samples (StretchHandle handle, const int16_t *samples, int num_samples, int16_t *output, float ratio);
 int stretch_flush (StretchHandle handle, int16_t *output);
 void stretch_reset (StretchHandle handle);