ref: b6622037961ff521b215cbf43d680b1b37438d99
parent: 6e79c8982f988d5caa0984d438e979078389f868
author: robs <robs>
date: Wed Aug 26 12:50:04 EDT 2009
cvsd speedup
--- a/ChangeLog
+++ b/ChangeLog
@@ -35,6 +35,8 @@
File formats:
+ o CVSD encode/decode speed-ups. (Kimberly Rockwell, P. Chaintreuil)
+
Audio device drivers:
o Add native windows audio output driver. (Pavel Karneliuk)
--- a/src/cvsd.c
+++ b/src/cvsd.c
@@ -27,6 +27,18 @@
* June 1, 1998 - Chris Bagwell (cbagwell@sprynet.com)
* Fixed compile warnings reported by Kjetil Torgrim Homme
* <kjetilho@ifi.uio.no>
+ *
+ * June 20, 2006 - Kimberly Rockwell (pyxis13317 (at) yahoo.com)
+ * Speed optimization: Unrolled float_conv() loop in seperate
+ * functions for encoding and decoding. 15% speed up decoding.
+ *
+ * Aug. 24, 2009 - P. Chaintreuil (sox-cvsd-peep (at) parallaxshift.com)
+ * Speed optimization: Replaced calls to memmove() with a
+ * mirrored circular buffer. This doubles the size of the
+ * dec.output_filter (48 -> 96 floats) and enc.input_filter
+ * (16 -> 32 floats), but keeps the memory from having to
+ * be copied so many times. 56% speed increase decoding;
+ * less than 5% encoding speed increase.
*/
#include "sox_i.h"
@@ -47,6 +59,12 @@
/* ---------------------------------------------------------------------- */
+/* This float_conv() function is not used as more specialized/optimized
+ * versions exist below. However, those new versions are tied to
+ * very percise filters defined in cvsdfilt.h. If those are modified
+ * or different filters are found to be required, this function may
+ * be needed. Thus I leave it here for possible future use, but commented
+ * out to avoid compiler warnings about it not being used.
static float float_conv(float const *fp1, float const *fp2,int n)
{
float res = 0;
@@ -54,7 +72,91 @@
res += (*fp1++) * (*fp2++);
return res;
}
+*/
+static float float_conv_enc(float const *fp1, float const *fp2)
+{
+ /* This is a specialzed version of float_conv() for encoding
+ * which simply assumes a CVSD_ENC_FILTERLEN (16) length of
+ * the two arrays and unrolls that loop.
+ *
+ * fp1 should be the enc.input_filter array and must be
+ * CVSD_ENC_FILTERLEN (16) long.
+ *
+ * fp2 should be one of the enc_filter_xx_y() tables listed
+ * in cvsdfilt.h. At minimum, fp2 must be CVSD_ENC_FILTERLEN
+ * (16) entries long.
+ */
+ float res = 0;
+
+ /* unrolling loop */
+ res += fp1[0] * fp2[0];
+ res += fp1[1] * fp2[1];
+ res += fp1[2] * fp2[2];
+ res += fp1[3] * fp2[3];
+ res += fp1[4] * fp2[4];
+ res += fp1[5] * fp2[5];
+ res += fp1[6] * fp2[6];
+ res += fp1[7] * fp2[7];
+ res += fp1[8] * fp2[8];
+ res += fp1[9] * fp2[9];
+ res += fp1[10] * fp2[10];
+ res += fp1[11] * fp2[11];
+ res += fp1[12] * fp2[12];
+ res += fp1[13] * fp2[13];
+ res += fp1[14] * fp2[14];
+ res += fp1[15] * fp2[15];
+
+ return res;
+}
+
+static float float_conv_dec(float const *fp1, float const *fp2)
+{
+ /* This is a specialzed version of float_conv() for decoding
+ * which assumes a specific length and structure to the data
+ * in fp2.
+ *
+ * fp1 should be the dec.output_filter array and must be
+ * CVSD_DEC_FILTERLEN (48) long.
+ *
+ * fp2 should be one of the dec_filter_xx() tables listed
+ * in cvsdfilt.h. fp2 is assumed to be CVSD_DEC_FILTERLEN
+ * (48) entries long, is assumed to have 0.0 in the last
+ * entry, and is a symmetrical mirror around fp2[23] (ie,
+ * fp2[22] == fp2[24], fp2[0] == fp2[47], etc).
+ */
+ float res = 0;
+
+ /* unrolling loop, also taking advantage of the symmetry
+ * of the sampling rate array*/
+ res += (fp1[0] + fp1[46]) * fp2[0];
+ res += (fp1[1] + fp1[45]) * fp2[1];
+ res += (fp1[2] + fp1[44]) * fp2[2];
+ res += (fp1[3] + fp1[43]) * fp2[3];
+ res += (fp1[4] + fp1[42]) * fp2[4];
+ res += (fp1[5] + fp1[41]) * fp2[5];
+ res += (fp1[6] + fp1[40]) * fp2[6];
+ res += (fp1[7] + fp1[39]) * fp2[7];
+ res += (fp1[8] + fp1[38]) * fp2[8];
+ res += (fp1[9] + fp1[37]) * fp2[9];
+ res += (fp1[10] + fp1[36]) * fp2[10];
+ res += (fp1[11] + fp1[35]) * fp2[11];
+ res += (fp1[12] + fp1[34]) * fp2[12];
+ res += (fp1[13] + fp1[33]) * fp2[13];
+ res += (fp1[14] + fp1[32]) * fp2[14];
+ res += (fp1[15] + fp1[31]) * fp2[15];
+ res += (fp1[16] + fp1[30]) * fp2[16];
+ res += (fp1[17] + fp1[29]) * fp2[17];
+ res += (fp1[18] + fp1[28]) * fp2[18];
+ res += (fp1[19] + fp1[27]) * fp2[19];
+ res += (fp1[20] + fp1[26]) * fp2[20];
+ res += (fp1[21] + fp1[25]) * fp2[21];
+ res += (fp1[22] + fp1[24]) * fp2[22];
+ res += (fp1[23]) * fp2[23];
+
+ return res;
+}
+
/* ---------------------------------------------------------------------- */
/*
* some remarks about the implementation of the CVSD decoder
@@ -124,8 +226,10 @@
/*
* zero the filter
*/
- for(fp1 = p->c.dec.output_filter, i = CVSD_DEC_FILTERLEN; i > 0; i--)
+ for(fp1 = p->c.dec.output_filter, i = CVSD_DEC_FILTERLEN*2; i > 0; i--)
*fp1++ = 0;
+ /* initialize mirror circular buffer offset to anything sane. */
+ p->c.dec.offset = CVSD_DEC_FILTERLEN - 1;
return (SOX_SUCCESS);
}
@@ -145,9 +249,11 @@
/*
* zero the filter
*/
- for(fp1 = p->c.enc.input_filter, i = CVSD_ENC_FILTERLEN; i > 0; i--)
+ for(fp1 = p->c.enc.input_filter, i = CVSD_ENC_FILTERLEN*2; i > 0; i--)
*fp1++ = 0;
p->c.enc.recon_int = 0;
+ /* initialize mirror circular buffer offset to anything sane. */
+ p->c.enc.offset = CVSD_ENC_FILTERLEN - 1;
return(SOX_SUCCESS);
}
@@ -205,21 +311,32 @@
p->com.mla_int *= p->com.mla_tc0;
if ((p->com.overload == 0) || (p->com.overload == 7))
p->com.mla_int += p->com.mla_tc1;
- memmove(p->c.dec.output_filter+1, p->c.dec.output_filter,
- sizeof(p->c.dec.output_filter)-sizeof(float));
+
+ /* shift output filter window in mirror cirular buffer. */
+ if (p->c.dec.offset != 0)
+ --p->c.dec.offset;
+ else p->c.dec.offset = CVSD_DEC_FILTERLEN - 1;
+ /* write into both halves of the mirror circular buffer */
if (p->com.overload & 1)
- p->c.dec.output_filter[0] = p->com.mla_int;
+ {
+ p->c.dec.output_filter[p->c.dec.offset] = p->com.mla_int;
+ p->c.dec.output_filter[p->c.dec.offset + CVSD_DEC_FILTERLEN] = p->com.mla_int;
+ }
else
- p->c.dec.output_filter[0] = -p->com.mla_int;
+ {
+ p->c.dec.output_filter[p->c.dec.offset] = -p->com.mla_int;
+ p->c.dec.output_filter[p->c.dec.offset + CVSD_DEC_FILTERLEN] = -p->com.mla_int;
+ }
+
/*
* check if the next output is due
*/
p->com.phase += p->com.phase_inc;
if (p->com.phase >= 4) {
- oval = float_conv(p->c.dec.output_filter,
- (p->cvsd_rate < 24000) ?
- dec_filter_16 : dec_filter_32,
- CVSD_DEC_FILTERLEN);
+ oval = float_conv_dec(
+ p->c.dec.output_filter + p->c.dec.offset,
+ (p->cvsd_rate < 24000) ?
+ dec_filter_16 : dec_filter_32);
lsx_debug_more("input %d %f\n", debug_count, p->com.mla_int);
lsx_debug_more("recon %d %f\n", debug_count, oval);
debug_count++;
@@ -251,19 +368,27 @@
if (p->com.phase >= 4) {
if (done >= nsamp)
return done;
- memmove(p->c.enc.input_filter+1, p->c.enc.input_filter,
- sizeof(p->c.enc.input_filter)-sizeof(float));
- p->c.enc.input_filter[0] = (*buf++) /
- ((float)SOX_SAMPLE_MAX);
+
+ /* shift input filter window in mirror cirular buffer. */
+ if (p->c.enc.offset != 0)
+ --p->c.enc.offset;
+ else p->c.enc.offset = CVSD_ENC_FILTERLEN - 1;
+
+ /* write into both halves of the mirror circular buffer */
+ p->c.enc.input_filter[p->c.enc.offset] =
+ p->c.enc.input_filter[p->c.enc.offset
+ + CVSD_ENC_FILTERLEN] =
+ (*buf++) /
+ ((float)SOX_SAMPLE_MAX);
done++;
}
p->com.phase &= 3;
/* insert input filter here! */
- inval = float_conv(p->c.enc.input_filter,
+ inval = float_conv_enc(
+ p->c.enc.input_filter + p->c.enc.offset,
(p->cvsd_rate < 24000) ?
(enc_filter_16[(p->com.phase >= 2)]) :
- (enc_filter_32[p->com.phase]),
- CVSD_ENC_FILTERLEN);
+ (enc_filter_32[p->com.phase]));
/*
* encode one bit
*/
--- a/src/cvsd.h
+++ b/src/cvsd.h
@@ -36,11 +36,15 @@
} com;
union {
struct {
- float output_filter[CVSD_DEC_FILTERLEN];
+ /* mirror circular buffer */
+ float output_filter[CVSD_DEC_FILTERLEN*2];
+ unsigned offset; /* into output_filter; always in first half */
} dec;
struct {
float recon_int;
- float input_filter[CVSD_ENC_FILTERLEN];
+ /* mirror circular buffer */
+ float input_filter[CVSD_ENC_FILTERLEN*2];
+ unsigned offset; /* into input_filter; always in first half */
} enc;
} c;
struct {