ref: 51644d58847510189688dea64903c9e1e965fc27
parent: ab9082c00bf02763d47bab09698b37e4ef32d12f
author: Siarhei Volkau <lis8215@gmail.com>
date: Sat Aug 23 13:40:32 EDT 2025
refactor: _celt_lpc performance improvement The code faster because: - avoids 64-bit shift on each iteration - matches multiply-accumulate pattern - might be autovectorized (not verified) Signed-off-by: Siarhei Volkau <lis8215@gmail.com> Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -59,8 +59,15 @@
for (i = 0; i < p; i++) {
/* Sum up this iteration's reflection coefficient */
opus_val32 rr = 0;
+#if defined (FIXED_POINT) && OPUS_FAST_INT64
+ opus_int64 acc = 0;
for (j = 0; j < i; j++)
+ acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]);
+ rr = (opus_val32)SHR(acc, 31);
+#else
+ for (j = 0; j < i; j++)
rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+#endif
rr += SHR32(ac[i + 1],6);
r = -frac_div32(SHL32(rr,6), error);
/* Update LPC coefficients and total error */
--
⑨