ref: d9d2b5e927bd607d98b2b76d575f3ff1cd33531a
parent: 86ce8b4238e0c88562f4f32d65b84e373e6c530a
author: David Bryant <david@wavpack.com>
date: Tue Nov 26 14:05:52 EST 2019
dos2unix stretch.[ch]
--- a/stretch.c
+++ b/stretch.c
@@ -1,401 +1,401 @@
-////////////////////////////////////////////////////////////////////////////
-// **** AUDIO-STRETCH **** //
-// Time Domain Harmonic Scaler //
-// Copyright (c) 2019 David Bryant //
-// All Rights Reserved. //
-// Distributed under the BSD Software License (see license.txt) //
-////////////////////////////////////////////////////////////////////////////
-
-// stretch.c
-
-// Time Domain Harmonic Compression and Expansion
-//
-// This library performs time domain harmonic scaling with pitch detection
-// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
-// 1/2 to 2 times its original length. This is done without altering any of
-// the tonal characteristics.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-
-#include "stretch.h"
-
-#define MIN_PERIOD 24 /* minimum allowable pitch period */
-#define MAX_PERIOD 1024 /* maximum allowable pitch period */
-
-struct stretch_cnxt {
- int num_chans, inbuff_samples, shortest, longest, tail, head, verbatim, fast_mode;
- short *inbuff, *calcbuff;
- unsigned int *results;
-};
-
-static void merge_blocks (short *output, short *input1, short *input2, int samples);
-static int find_period_fast (struct stretch_cnxt *cnxt, short *samples);
-static int find_period (struct stretch_cnxt *cnxt, short *samples);
-
-/*
- * Initialize a context of the time stretching code. The shortest and longest periods
- * are specified here. The longest period determines the lowest fundamental frequency
- * that can be handled correctly. Note that higher frequencies can be handled than the
- * shortest period would suggest because multiple periods can be combined, and the
- * worst-case performance will suffer if too short a period is selected.
- */
-
-StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int fast_mode)
-{
- struct stretch_cnxt *cnxt;
-
- if (fast_mode) {
- longest_period = (longest_period + 1) & ~1;
- shortest_period &= ~1;
- }
-
- if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) {
- printf ("stretch_init(): invalid periods!\n");
- return NULL;
- }
-
- cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
-
- if (cnxt) {
- cnxt->inbuff_samples = longest_period * num_channels * 8;
- cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
- cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
- cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
- }
-
- if (!cnxt || !cnxt->inbuff || !cnxt->calcbuff || !cnxt->results) {
- fprintf (stderr, "stretch_init(): out of memory!\n");
- return NULL;
- }
-
- cnxt->head = cnxt->tail = cnxt->longest = longest_period * num_channels;
- cnxt->shortest = shortest_period * num_channels;
- cnxt->num_chans = num_channels;
- cnxt->fast_mode = fast_mode;
-
- return (StretchHandle) cnxt;
-}
-
-/*
- * Process the specified samples with the given ratio (which is clipped to the
- * range 0.5 to 2.0). Note that the number of samples refers to total samples for
- * both channels in stereo and can be as large as desired (samples are buffered
- * here). The exact number of samples output is not possible to determine in
- * advance, but it will be close to the number of input samples times the ratio
- * plus or minus a couple of the longest periods.
- */
-
-int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio)
-{
- struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
- int out_samples = 0;
-
- num_samples *= cnxt->num_chans;
-
- if (ratio < 0.5)
- ratio = 0.5;
- else if (ratio > 2.0)
- ratio = 2.0;
-
- /* while we have samples to do... */
-
- do {
- /* if there are more samples and room for them, copy in */
-
- if (num_samples && cnxt->head < cnxt->inbuff_samples) {
- int samples_to_copy = num_samples;
-
- if (samples_to_copy > cnxt->inbuff_samples - cnxt->head)
- samples_to_copy = cnxt->inbuff_samples - cnxt->head;
-
- memcpy (cnxt->inbuff + cnxt->head, samples, samples_to_copy * sizeof (cnxt->inbuff [0]));
- num_samples -= samples_to_copy;
- samples += samples_to_copy;
- cnxt->head += samples_to_copy;
- }
-
- /* while there are enough samples to process, do so */
-
- while (1) {
- if (cnxt->verbatim && cnxt->head > cnxt->tail) {
- int samples_to_copy = cnxt->verbatim;
- if (samples_to_copy > cnxt->head - cnxt->tail)
- samples_to_copy = cnxt->head - cnxt->tail;
-
- memcpy (output + out_samples, cnxt->inbuff + cnxt->tail, samples_to_copy * sizeof (cnxt->inbuff [0]));
- out_samples += samples_to_copy;
- cnxt->verbatim -= samples_to_copy;
- cnxt->tail += samples_to_copy;
- }
- else if (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * 2) {
- if (ratio == 1.0) {
- memcpy (output + out_samples, cnxt->inbuff + cnxt->tail,
- cnxt->longest * 2 * sizeof (cnxt->inbuff [0]));
-
- out_samples += cnxt->longest * 2;
- cnxt->tail += cnxt->longest * 2;
- }
- else {
- int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) :
- find_period (cnxt, cnxt->inbuff + cnxt->tail), incnt, outcnt;
-
- if (ratio < 1.0) {
- merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
- cnxt->inbuff + cnxt->tail + period, period);
-
- out_samples += period;
- cnxt->tail += period * 2;
- incnt = (outcnt = period) * 2;
- }
- else {
- merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
- cnxt->inbuff + cnxt->tail - period, period * 2);
-
- out_samples += period * 2;
- cnxt->tail += period;
- outcnt = (incnt = period) * 2;
- }
-
- cnxt->verbatim = (int) floor (((ratio * incnt) - outcnt) / (1.0 - ratio) / cnxt->num_chans + 0.5) * cnxt->num_chans;
-
- if (cnxt->verbatim < 0) // this should not happen...
- cnxt->verbatim = 0;
- }
- }
- else
- break;
- }
-
- /* if we're almost done with buffer, copy the rest back to beginning */
-
- if (cnxt->head == cnxt->inbuff_samples) {
- int samples_to_move = cnxt->inbuff_samples - cnxt->tail + cnxt->longest;
-
- memmove (cnxt->inbuff, cnxt->inbuff + cnxt->tail - cnxt->longest,
- samples_to_move * sizeof (cnxt->inbuff [0]));
-
- cnxt->head -= cnxt->tail - cnxt->longest;
- cnxt->tail = cnxt->longest;
- }
-
- } while (num_samples);
-
- return out_samples / cnxt->num_chans;
-}
-
-/* flush any leftover samples out at normal speed */
-
-int stretch_flush (StretchHandle handle, short *output)
-{
- struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
- int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
-
- memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output));
- cnxt->tail = cnxt->head;
-
- return samples_to_copy;
-}
-
-/* free handle */
-
-void stretch_deinit (StretchHandle handle)
-{
- struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
-
- free (cnxt->calcbuff);
- free (cnxt->results);
- free (cnxt->inbuff);
- free (cnxt);
-}
-
-/*
- * The pitch detection is done by finding the period that produces the
- * maximum value for the following correlation formula applied to two
- * consecutive blocks of the given period length:
- *
- * sum of the absolute values of each sample in both blocks
- * ---------------------------------------------------------------------
- * sum of the absolute differences of each corresponding pair of samples
- *
- * This formula was chosen for two reasons. First, it produces output values
- * that can directly compared regardless of the pitch period. Second, the
- * numerator can be accumulated for successive periods, and only the
- * denominator need be completely recalculated.
- */
-
-static int find_period (struct stretch_cnxt *cnxt, short *samples)
-{
- unsigned int sum, diff, factor, best_factor = 0;
- short *calcbuff = samples;
- int period, best_period;
- int i, j;
-
- period = best_period = cnxt->shortest / cnxt->num_chans;
-
- if (cnxt->num_chans == 2) {
- calcbuff = cnxt->calcbuff;
-
- for (i = j = 0; i < cnxt->longest * 2; i += 2)
- calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
- }
-
- /* accumulate sum for shortest period size */
-
- for (sum = i = 0; i < period; ++i)
- sum += abs (calcbuff [i]) + abs (calcbuff [i+period]);
-
- /* this loop actually cycles through all period lengths */
-
- while (1) {
- short *comp = calcbuff + period * 2;
- short *ref = calcbuff + period;
-
- /* compute sum of absolute differences */
-
- diff = 0;
-
- while (ref != calcbuff)
- diff += abs (*--ref - *--comp);
-
- /*
- * Here we calculate and store the resulting correlation
- * factor. Note that we must watch for a difference of
- * zero, meaning a perfect match. Also, for increased
- * precision using integer math, we scale the sum. Care
- * must be taken here to avoid possibility of overflow.
- */
-
- factor = diff ? (sum * 128) / diff : UINT32_MAX;
-
- if (factor >= best_factor) {
- best_factor = factor;
- best_period = period;
- }
-
- /* see if we're done */
-
- if (period * cnxt->num_chans == cnxt->longest)
- break;
-
- /* update accumulating sum and current period */
-
- sum += abs (calcbuff [period * 2]);
- sum += abs (calcbuff [period++ * 2 + 1]);
- }
-
- return best_period * cnxt->num_chans;
-}
-
-/*
- * This pitch detection function is similar to find_period() above, except that it
- * is optimized for speed. The audio data corresponding to two maximum periods is
- * averaged 2:1 into the calculation buffer, and then the calulations are done
- * for every other period length. Because the time is essentially proportional to
- * both the number of samples and the number of period lengths to try, this scheme
- * can reduce the time by a factor approaching 4x. The correlation results on either
- * side of the peak are compared to calculate a more accurate center of the period.
- */
-
-static int find_period_fast (struct stretch_cnxt *cnxt, short *samples)
-{
- unsigned int sum, diff, best_factor = 0;
- int period, best_period;
- int i, j;
-
- best_period = period = cnxt->shortest / (cnxt->num_chans * 2);
-
- /* first step is compressing data 2:1 into calcbuff */
-
- if (cnxt->num_chans == 2)
- for (i = j = 0; i < cnxt->longest * 2; i += 4)
- cnxt->calcbuff [j++] = (samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2;
- else
- for (i = j = 0; i < cnxt->longest * 2; i += 2)
- cnxt->calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
-
- /* accumulate sum for shortest period */
-
- for (sum = i = 0; i < period; ++i)
- sum += abs (cnxt->calcbuff [i]) + abs (cnxt->calcbuff [i+period]);
-
- /* this loop actually cycles through all period lengths */
-
- while (1) {
- short *comp = cnxt->calcbuff + period * 2;
- short *ref = cnxt->calcbuff + period;
-
- /* compute sum of absolute differences */
-
- diff = 0;
-
- while (ref != cnxt->calcbuff)
- diff += abs (*--ref - *--comp);
-
- /*
- * Here we calculate and store the resulting correlation
- * factor. Note that we must watch for a difference of
- * zero, meaning a perfect match. Also, for increased
- * precision using integer math, we scale the sum. Care
- * must be taken here to avoid possibility of overflow.
- */
-
- cnxt->results [period] = diff ? (sum * 128) / diff : UINT32_MAX;
-
- if (cnxt->results [period] >= best_factor) { /* check if best yet */
- best_factor = cnxt->results [period];
- best_period = period;
- }
-
- /* see if we're done */
-
- if (period * cnxt->num_chans * 2 == cnxt->longest)
- break;
-
- /* update accumulating sum and current period */
-
- sum += abs (cnxt->calcbuff [period * 2]);
- sum += abs (cnxt->calcbuff [period++ * 2 + 1]);
- }
-
- if (best_period * cnxt->num_chans * 2 != cnxt->shortest && best_period * cnxt->num_chans * 2 != cnxt->longest) {
- int high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
- int low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
-
- if (low_side_diff > high_side_diff * 2)
- best_period = best_period * 2 + 1;
- else if (high_side_diff > low_side_diff * 2)
- best_period = best_period * 2 - 1;
- else
- best_period *= 2;
- }
- else
- best_period *= 2; /* shortest or longest use as is */
-
- return best_period * cnxt->num_chans;
-}
-
-/*
- * To combine the two periods into one, each corresponding pair of samples
- * are averaged with a linearly sliding scale. At the beginning of the period
- * the first sample dominates, and at the end the second sample dominates. In
- * this way the resulting block blends with the previous and next blocks.
- *
- * The signed values are offset to unsigned for the calculation and then offset
- * back to signed. This is done to avoid the compression around zero that occurs
- * with calculations of this type on C implementations that round division toward
- * zero.
- */
-
-static void merge_blocks (short *output, short *input1, short *input2, int samples)
-{
- int i;
-
- for (i = 0; i < samples; ++i)
- output [i] = (((input1 [i] + 32768) * (samples - i) +
- (input2 [i] + 32768) * i) / samples) - 32768;
-}
-
+////////////////////////////////////////////////////////////////////////////
+// **** AUDIO-STRETCH **** //
+// Time Domain Harmonic Scaler //
+// Copyright (c) 2019 David Bryant //
+// All Rights Reserved. //
+// Distributed under the BSD Software License (see license.txt) //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.c
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// the tonal characteristics.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+#include "stretch.h"
+
+#define MIN_PERIOD 24 /* minimum allowable pitch period */
+#define MAX_PERIOD 1024 /* maximum allowable pitch period */
+
+struct stretch_cnxt {
+ int num_chans, inbuff_samples, shortest, longest, tail, head, verbatim, fast_mode;
+ short *inbuff, *calcbuff;
+ unsigned int *results;
+};
+
+static void merge_blocks (short *output, short *input1, short *input2, int samples);
+static int find_period_fast (struct stretch_cnxt *cnxt, short *samples);
+static int find_period (struct stretch_cnxt *cnxt, short *samples);
+
+/*
+ * Initialize a context of the time stretching code. The shortest and longest periods
+ * are specified here. The longest period determines the lowest fundamental frequency
+ * that can be handled correctly. Note that higher frequencies can be handled than the
+ * shortest period would suggest because multiple periods can be combined, and the
+ * worst-case performance will suffer if too short a period is selected.
+ */
+
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int fast_mode)
+{
+ struct stretch_cnxt *cnxt;
+
+ if (fast_mode) {
+ longest_period = (longest_period + 1) & ~1;
+ shortest_period &= ~1;
+ }
+
+ if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) {
+ printf ("stretch_init(): invalid periods!\n");
+ return NULL;
+ }
+
+ cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
+
+ if (cnxt) {
+ cnxt->inbuff_samples = longest_period * num_channels * 8;
+ cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
+ cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
+ cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
+ }
+
+ if (!cnxt || !cnxt->inbuff || !cnxt->calcbuff || !cnxt->results) {
+ fprintf (stderr, "stretch_init(): out of memory!\n");
+ return NULL;
+ }
+
+ cnxt->head = cnxt->tail = cnxt->longest = longest_period * num_channels;
+ cnxt->shortest = shortest_period * num_channels;
+ cnxt->num_chans = num_channels;
+ cnxt->fast_mode = fast_mode;
+
+ return (StretchHandle) cnxt;
+}
+
+/*
+ * Process the specified samples with the given ratio (which is clipped to the
+ * range 0.5 to 2.0). Note that the number of samples refers to total samples for
+ * both channels in stereo and can be as large as desired (samples are buffered
+ * here). The exact number of samples output is not possible to determine in
+ * advance, but it will be close to the number of input samples times the ratio
+ * plus or minus a couple of the longest periods.
+ */
+
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio)
+{
+ struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+ int out_samples = 0;
+
+ num_samples *= cnxt->num_chans;
+
+ if (ratio < 0.5)
+ ratio = 0.5;
+ else if (ratio > 2.0)
+ ratio = 2.0;
+
+ /* while we have samples to do... */
+
+ do {
+ /* if there are more samples and room for them, copy in */
+
+ if (num_samples && cnxt->head < cnxt->inbuff_samples) {
+ int samples_to_copy = num_samples;
+
+ if (samples_to_copy > cnxt->inbuff_samples - cnxt->head)
+ samples_to_copy = cnxt->inbuff_samples - cnxt->head;
+
+ memcpy (cnxt->inbuff + cnxt->head, samples, samples_to_copy * sizeof (cnxt->inbuff [0]));
+ num_samples -= samples_to_copy;
+ samples += samples_to_copy;
+ cnxt->head += samples_to_copy;
+ }
+
+ /* while there are enough samples to process, do so */
+
+ while (1) {
+ if (cnxt->verbatim && cnxt->head > cnxt->tail) {
+ int samples_to_copy = cnxt->verbatim;
+ if (samples_to_copy > cnxt->head - cnxt->tail)
+ samples_to_copy = cnxt->head - cnxt->tail;
+
+ memcpy (output + out_samples, cnxt->inbuff + cnxt->tail, samples_to_copy * sizeof (cnxt->inbuff [0]));
+ out_samples += samples_to_copy;
+ cnxt->verbatim -= samples_to_copy;
+ cnxt->tail += samples_to_copy;
+ }
+ else if (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * 2) {
+ if (ratio == 1.0) {
+ memcpy (output + out_samples, cnxt->inbuff + cnxt->tail,
+ cnxt->longest * 2 * sizeof (cnxt->inbuff [0]));
+
+ out_samples += cnxt->longest * 2;
+ cnxt->tail += cnxt->longest * 2;
+ }
+ else {
+ int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) :
+ find_period (cnxt, cnxt->inbuff + cnxt->tail), incnt, outcnt;
+
+ if (ratio < 1.0) {
+ merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
+ cnxt->inbuff + cnxt->tail + period, period);
+
+ out_samples += period;
+ cnxt->tail += period * 2;
+ incnt = (outcnt = period) * 2;
+ }
+ else {
+ merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
+ cnxt->inbuff + cnxt->tail - period, period * 2);
+
+ out_samples += period * 2;
+ cnxt->tail += period;
+ outcnt = (incnt = period) * 2;
+ }
+
+ cnxt->verbatim = (int) floor (((ratio * incnt) - outcnt) / (1.0 - ratio) / cnxt->num_chans + 0.5) * cnxt->num_chans;
+
+ if (cnxt->verbatim < 0) // this should not happen...
+ cnxt->verbatim = 0;
+ }
+ }
+ else
+ break;
+ }
+
+ /* if we're almost done with buffer, copy the rest back to beginning */
+
+ if (cnxt->head == cnxt->inbuff_samples) {
+ int samples_to_move = cnxt->inbuff_samples - cnxt->tail + cnxt->longest;
+
+ memmove (cnxt->inbuff, cnxt->inbuff + cnxt->tail - cnxt->longest,
+ samples_to_move * sizeof (cnxt->inbuff [0]));
+
+ cnxt->head -= cnxt->tail - cnxt->longest;
+ cnxt->tail = cnxt->longest;
+ }
+
+ } while (num_samples);
+
+ return out_samples / cnxt->num_chans;
+}
+
+/* flush any leftover samples out at normal speed */
+
+int stretch_flush (StretchHandle handle, short *output)
+{
+ struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+ int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
+
+ memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output));
+ cnxt->tail = cnxt->head;
+
+ return samples_to_copy;
+}
+
+/* free handle */
+
+void stretch_deinit (StretchHandle handle)
+{
+ struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+
+ free (cnxt->calcbuff);
+ free (cnxt->results);
+ free (cnxt->inbuff);
+ free (cnxt);
+}
+
+/*
+ * The pitch detection is done by finding the period that produces the
+ * maximum value for the following correlation formula applied to two
+ * consecutive blocks of the given period length:
+ *
+ * sum of the absolute values of each sample in both blocks
+ * ---------------------------------------------------------------------
+ * sum of the absolute differences of each corresponding pair of samples
+ *
+ * This formula was chosen for two reasons. First, it produces output values
+ * that can directly compared regardless of the pitch period. Second, the
+ * numerator can be accumulated for successive periods, and only the
+ * denominator need be completely recalculated.
+ */
+
+static int find_period (struct stretch_cnxt *cnxt, short *samples)
+{
+ unsigned int sum, diff, factor, best_factor = 0;
+ short *calcbuff = samples;
+ int period, best_period;
+ int i, j;
+
+ period = best_period = cnxt->shortest / cnxt->num_chans;
+
+ if (cnxt->num_chans == 2) {
+ calcbuff = cnxt->calcbuff;
+
+ for (i = j = 0; i < cnxt->longest * 2; i += 2)
+ calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
+ }
+
+ /* accumulate sum for shortest period size */
+
+ for (sum = i = 0; i < period; ++i)
+ sum += abs (calcbuff [i]) + abs (calcbuff [i+period]);
+
+ /* this loop actually cycles through all period lengths */
+
+ while (1) {
+ short *comp = calcbuff + period * 2;
+ short *ref = calcbuff + period;
+
+ /* compute sum of absolute differences */
+
+ diff = 0;
+
+ while (ref != calcbuff)
+ diff += abs (*--ref - *--comp);
+
+ /*
+ * Here we calculate and store the resulting correlation
+ * factor. Note that we must watch for a difference of
+ * zero, meaning a perfect match. Also, for increased
+ * precision using integer math, we scale the sum. Care
+ * must be taken here to avoid possibility of overflow.
+ */
+
+ factor = diff ? (sum * 128) / diff : UINT32_MAX;
+
+ if (factor >= best_factor) {
+ best_factor = factor;
+ best_period = period;
+ }
+
+ /* see if we're done */
+
+ if (period * cnxt->num_chans == cnxt->longest)
+ break;
+
+ /* update accumulating sum and current period */
+
+ sum += abs (calcbuff [period * 2]);
+ sum += abs (calcbuff [period++ * 2 + 1]);
+ }
+
+ return best_period * cnxt->num_chans;
+}
+
+/*
+ * This pitch detection function is similar to find_period() above, except that it
+ * is optimized for speed. The audio data corresponding to two maximum periods is
+ * averaged 2:1 into the calculation buffer, and then the calulations are done
+ * for every other period length. Because the time is essentially proportional to
+ * both the number of samples and the number of period lengths to try, this scheme
+ * can reduce the time by a factor approaching 4x. The correlation results on either
+ * side of the peak are compared to calculate a more accurate center of the period.
+ */
+
+static int find_period_fast (struct stretch_cnxt *cnxt, short *samples)
+{
+ unsigned int sum, diff, best_factor = 0;
+ int period, best_period;
+ int i, j;
+
+ best_period = period = cnxt->shortest / (cnxt->num_chans * 2);
+
+ /* first step is compressing data 2:1 into calcbuff */
+
+ if (cnxt->num_chans == 2)
+ for (i = j = 0; i < cnxt->longest * 2; i += 4)
+ cnxt->calcbuff [j++] = (samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2;
+ else
+ for (i = j = 0; i < cnxt->longest * 2; i += 2)
+ cnxt->calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
+
+ /* accumulate sum for shortest period */
+
+ for (sum = i = 0; i < period; ++i)
+ sum += abs (cnxt->calcbuff [i]) + abs (cnxt->calcbuff [i+period]);
+
+ /* this loop actually cycles through all period lengths */
+
+ while (1) {
+ short *comp = cnxt->calcbuff + period * 2;
+ short *ref = cnxt->calcbuff + period;
+
+ /* compute sum of absolute differences */
+
+ diff = 0;
+
+ while (ref != cnxt->calcbuff)
+ diff += abs (*--ref - *--comp);
+
+ /*
+ * Here we calculate and store the resulting correlation
+ * factor. Note that we must watch for a difference of
+ * zero, meaning a perfect match. Also, for increased
+ * precision using integer math, we scale the sum. Care
+ * must be taken here to avoid possibility of overflow.
+ */
+
+ cnxt->results [period] = diff ? (sum * 128) / diff : UINT32_MAX;
+
+ if (cnxt->results [period] >= best_factor) { /* check if best yet */
+ best_factor = cnxt->results [period];
+ best_period = period;
+ }
+
+ /* see if we're done */
+
+ if (period * cnxt->num_chans * 2 == cnxt->longest)
+ break;
+
+ /* update accumulating sum and current period */
+
+ sum += abs (cnxt->calcbuff [period * 2]);
+ sum += abs (cnxt->calcbuff [period++ * 2 + 1]);
+ }
+
+ if (best_period * cnxt->num_chans * 2 != cnxt->shortest && best_period * cnxt->num_chans * 2 != cnxt->longest) {
+ int high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
+ int low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
+
+ if (low_side_diff > high_side_diff * 2)
+ best_period = best_period * 2 + 1;
+ else if (high_side_diff > low_side_diff * 2)
+ best_period = best_period * 2 - 1;
+ else
+ best_period *= 2;
+ }
+ else
+ best_period *= 2; /* shortest or longest use as is */
+
+ return best_period * cnxt->num_chans;
+}
+
+/*
+ * To combine the two periods into one, each corresponding pair of samples
+ * are averaged with a linearly sliding scale. At the beginning of the period
+ * the first sample dominates, and at the end the second sample dominates. In
+ * this way the resulting block blends with the previous and next blocks.
+ *
+ * The signed values are offset to unsigned for the calculation and then offset
+ * back to signed. This is done to avoid the compression around zero that occurs
+ * with calculations of this type on C implementations that round division toward
+ * zero.
+ */
+
+static void merge_blocks (short *output, short *input1, short *input2, int samples)
+{
+ int i;
+
+ for (i = 0; i < samples; ++i)
+ output [i] = (((input1 [i] + 32768) * (samples - i) +
+ (input2 [i] + 32768) * i) / samples) - 32768;
+}
+
--- a/stretch.h
+++ b/stretch.h
@@ -1,37 +1,37 @@
-////////////////////////////////////////////////////////////////////////////
-// **** AUDIO-STRETCH **** //
-// Time Domain Harmonic Scaler //
-// Copyright (c) 2019 David Bryant //
-// All Rights Reserved. //
-// Distributed under the BSD Software License (see license.txt) //
-////////////////////////////////////////////////////////////////////////////
-
-// stretch.h
-
-// Time Domain Harmonic Compression and Expansion
-//
-// This library performs time domain harmonic scaling with pitch detection
-// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
-// 1/2 to 2 times its original length. This is done without altering any of
-// its tonal characteristics.
-
-#ifndef STRETCH_H
-#define STRETCH_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void *StretchHandle;
-
-StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
-int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
-int stretch_flush (StretchHandle handle, short *output);
-void stretch_deinit (StretchHandle handle);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
+////////////////////////////////////////////////////////////////////////////
+// **** AUDIO-STRETCH **** //
+// Time Domain Harmonic Scaler //
+// Copyright (c) 2019 David Bryant //
+// All Rights Reserved. //
+// Distributed under the BSD Software License (see license.txt) //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.h
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// its tonal characteristics.
+
+#ifndef STRETCH_H
+#define STRETCH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void *StretchHandle;
+
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
+int stretch_flush (StretchHandle handle, short *output);
+void stretch_deinit (StretchHandle handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+