shithub: opus

Download patch

ref: 9f7e502e0683bfbf8bfe9ba48220df27c258bf9e
parent: 7f3fb20185e4f23392b6a9674c274738a58e7283
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Aug 12 19:49:35 EDT 2016

Reducing dependencies in deemphasis()

Reordering the add with VERY_SMALL changes the dependencies cycle from 2 add + 1 mul
(11 cycles on haswell) to 1 add + 1 mul (8 cycles). This makes the entire decoder about
1.5% faster.

--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -225,7 +225,7 @@
          /* Shortcut for the standard (non-custom modes) case */
          for (j=0;j<N;j++)
          {
-            celt_sig tmp = x[j] + m + VERY_SMALL;
+            celt_sig tmp = x[j] + VERY_SMALL + m;
             m = MULT16_32_Q15(coef0, tmp);
             scratch[j] = tmp;
          }
@@ -246,7 +246,7 @@
          {
             for (j=0;j<N;j++)
             {
-               celt_sig tmp = x[j] + m + VERY_SMALL;
+               celt_sig tmp = x[j] + VERY_SMALL + m;
                m = MULT16_32_Q15(coef0, tmp);
                y[j*C] = SCALEOUT(SIG2WORD16(tmp));
             }