ref: ce06772d7c0c616825b745e6ae9aadc7d9258af9
parent: c8eb062332e89a541e54c725d27022c184e4a457
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Fri May 23 13:01:21 EDT 2025
Better MDCT/combfilter accuracy with MULT32_32_P31
--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@@ -55,8 +55,8 @@
#define SAMP_MIN -SAMP_MAX
#ifdef ENABLE_QEXT
-# define S_MUL(a,b) MULT32_32_Q31(b, a)
-# define S_MUL2(a,b) MULT32_32_Q31(b, a)
+# define S_MUL(a,b) MULT32_32_P31(b, a)
+# define S_MUL2(a,b) MULT32_32_P31(b, a)
#else
# define S_MUL(a,b) MULT16_32_Q15(b, a)
# define S_MUL2(a,b) MULT16_32_Q16(b, a)
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -184,7 +184,7 @@
#ifdef ENABLE_QEXT
typedef opus_val32 celt_coef;
#define COEF_ONE Q31ONE
-#define MULT_COEF_32(a, b) MULT32_32_Q31(a,b)
+#define MULT_COEF_32(a, b) MULT32_32_P31(a,b)
#define MAC_COEF_32_ARM(c, a, b) ADD32((c), MULT32_32_Q32(a,b))
#define MULT_COEF(a, b) MULT32_32_Q31(a,b)
#define MULT_COEF_TAPS(a, b) SHL32(MULT16_16(a,b), 1)
@@ -346,6 +346,7 @@
#define MULT32_32_Q16(a,b) ((a)*(b))
#define MULT32_32_Q31(a,b) ((a)*(b))
+#define MULT32_32_P31(a,b) ((a)*(b))
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -176,6 +176,10 @@
+ MULT_COEF_32(g10,x2)
+ MULT_COEF_32(g11,ADD32(x1,x3))
+ MULT_COEF_32(g12,ADD32(x0,x4));
+#ifdef FIXED_POINT
+ /* A bit of bias seems to help here. */
+ y[i] = SUB32(y[i], 1);
+#endif
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
@@ -237,6 +241,10 @@
+ MULT_COEF_32(MULT_COEF(f,g10),x2)
+ MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3))
+ MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4));
+#ifdef FIXED_POINT
+ /* A bit of bias seems to help here. */
+ y[i] = SUB32(y[i], 3);
+#endif
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
--- a/celt/fixed_debug.h
+++ b/celt/fixed_debug.h
@@ -41,8 +41,10 @@
extern opus_int64 celt_mips;
#endif
+#define MULT16_16U(a,b) ((opus_uint32)(a)*(opus_uint32)(b))
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
+#define MULT32_32_P31(a,b) ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(128+(opus_int32)(MULT16_16U(((a)&0x0000ffff),((b)&0x0000ffff))>>(16+7)) + SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),7) + SHR32(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),7), 8) )
#define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -71,6 +71,14 @@
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
#endif
+/** 32x32 multiplication, followed by a 31-bit shift right (with rounding). Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT32_32_P31(a,b) ((opus_val32)SHR(1073741824+(opus_int64)(a)*(opus_int64)(b),31))
+#else
+#define MULT16_16U(a,b) ((opus_uint32)(a)*(opus_uint32)(b))
+#define MULT32_32_P31(a,b) ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(128+(opus_int32)SHR(MULT16_16U(((a)&0x0000ffff),((b)&0x0000ffff)),16+7) + SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),7) + SHR32(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),7), 8) )
+#endif
+
/** 32x32 multiplication, followed by a 32-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT32_32_Q32(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),32))
--
⑨