ref: 9f7e502e0683bfbf8bfe9ba48220df27c258bf9e
parent: 7f3fb20185e4f23392b6a9674c274738a58e7283
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Aug 12 19:49:35 EDT 2016
Reducing dependencies in deemphasis() Reordering the add with VERY_SMALL changes the dependencies cycle from 2 add + 1 mul (11 cycles on haswell) to 1 add + 1 mul (8 cycles). This makes the entire decoder about 1.5% faster.
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -225,7 +225,7 @@
/* Shortcut for the standard (non-custom modes) case */
for (j=0;j<N;j++)
{
- celt_sig tmp = x[j] + m + VERY_SMALL;
+ celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
scratch[j] = tmp;
}
@@ -246,7 +246,7 @@
{
for (j=0;j<N;j++)
{
- celt_sig tmp = x[j] + m + VERY_SMALL;
+ celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
}