ref: 11dba8d902983e271da4a7499ef56e4d7bd7111f
parent: cd159fd1ec8ae64e6cd1b69854034560b5f1c419
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sun Aug 14 13:56:45 EDT 2016
Apply deemphasis to both channels in the same loop for the simple case This makes the decoder ~2.5% faster on x86 because the stereo loop takes the same processing time as one mono loop due to the dependency chain
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -177,6 +177,36 @@
}
#endif /* CUSTOM_MODES */
+#ifndef CUSTOM_MODES
+/* Special case for stereo with no downsampling and no accumulation. This is
+ quite common and we can make it faster by processing both channels in the
+ same loop, reducing overhead due to the dependency loop in the IIR filter. */
+static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
+ celt_sig *mem)
+{
+ celt_sig * OPUS_RESTRICT x0;
+ celt_sig * OPUS_RESTRICT x1;
+ celt_sig m0, m1;
+ int j;
+ x0=in[0];
+ x1=in[1];
+ m0 = mem[0];
+ m1 = mem[1];
+ for (j=0;j<N;j++)
+ {
+ celt_sig tmp0, tmp1;
+ /* Add VERY_SMALL to x[] first to reduce dependency chain. */
+ tmp0 = x0[j] + VERY_SMALL + m0;
+ tmp1 = x1[j] + VERY_SMALL + m1;
+ m0 = MULT16_32_Q15(coef0, tmp0);
+ m1 = MULT16_32_Q15(coef0, tmp1);
+ pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0));
+ pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
+ }
+ mem[0] = m0;
+ mem[1] = m1;
+}
+#endif
#ifndef RESYNTH
static
@@ -190,6 +220,14 @@
opus_val16 coef0;
VARDECL(celt_sig, scratch);
SAVE_STACK;
+#ifndef CUSTOM_MODES
+ /* Short version for common case. */
+ if (downsample == 1 && C == 2 && !accum)
+ {
+ deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
+ return;
+ }
+#endif
#ifndef FIXED_POINT
(void)accum;
celt_assert(accum==0);