shithub: opus

Download patch

ref: ce06772d7c0c616825b745e6ae9aadc7d9258af9
parent: c8eb062332e89a541e54c725d27022c184e4a457
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Fri May 23 13:01:21 EDT 2025

Better MDCT/combfilter accuracy with MULT32_32_P31

--- a/celt/_kiss_fft_guts.h
+++ b/celt/_kiss_fft_guts.h
@@ -55,8 +55,8 @@
 #define SAMP_MIN -SAMP_MAX
 
 #ifdef ENABLE_QEXT
-#   define S_MUL(a,b) MULT32_32_Q31(b, a)
-#   define S_MUL2(a,b) MULT32_32_Q31(b, a)
+#   define S_MUL(a,b) MULT32_32_P31(b, a)
+#   define S_MUL2(a,b) MULT32_32_P31(b, a)
 #else
 #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 #   define S_MUL2(a,b) MULT16_32_Q16(b, a)
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -184,7 +184,7 @@
 #ifdef ENABLE_QEXT
 typedef opus_val32 celt_coef;
 #define COEF_ONE Q31ONE
-#define MULT_COEF_32(a, b) MULT32_32_Q31(a,b)
+#define MULT_COEF_32(a, b) MULT32_32_P31(a,b)
 #define MAC_COEF_32_ARM(c, a, b) ADD32((c), MULT32_32_Q32(a,b))
 #define MULT_COEF(a, b) MULT32_32_Q31(a,b)
 #define MULT_COEF_TAPS(a, b) SHL32(MULT16_16(a,b), 1)
@@ -346,6 +346,7 @@
 
 #define MULT32_32_Q16(a,b)     ((a)*(b))
 #define MULT32_32_Q31(a,b)     ((a)*(b))
+#define MULT32_32_P31(a,b)     ((a)*(b))
 
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
 #define MAC16_32_Q16(c,a,b)     ((c)+(a)*(b))
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -176,6 +176,10 @@
                + MULT_COEF_32(g10,x2)
                + MULT_COEF_32(g11,ADD32(x1,x3))
                + MULT_COEF_32(g12,ADD32(x0,x4));
+#ifdef FIXED_POINT
+      /* A bit of bias seems to help here. */
+      y[i] = SUB32(y[i], 1);
+#endif
       y[i] = SATURATE(y[i], SIG_SAT);
       x4=x3;
       x3=x2;
@@ -237,6 +241,10 @@
                + MULT_COEF_32(MULT_COEF(f,g10),x2)
                + MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3))
                + MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4));
+#ifdef FIXED_POINT
+      /* A bit of bias seems to help here. */
+      y[i] = SUB32(y[i], 3);
+#endif
       y[i] = SATURATE(y[i], SIG_SAT);
       x4=x3;
       x3=x2;
--- a/celt/fixed_debug.h
+++ b/celt/fixed_debug.h
@@ -41,8 +41,10 @@
 extern opus_int64 celt_mips;
 #endif
 
+#define MULT16_16U(a,b) ((opus_uint32)(a)*(opus_uint32)(b))
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
+#define MULT32_32_P31(a,b) ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(128+(opus_int32)(MULT16_16U(((a)&0x0000ffff),((b)&0x0000ffff))>>(16+7)) + SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),7) + SHR32(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),7), 8) )
 #define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
--- a/celt/fixed_generic.h
+++ b/celt/fixed_generic.h
@@ -71,6 +71,14 @@
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
 #endif
 
+/** 32x32 multiplication, followed by a 31-bit shift right (with rounding). Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT32_32_P31(a,b) ((opus_val32)SHR(1073741824+(opus_int64)(a)*(opus_int64)(b),31))
+#else
+#define MULT16_16U(a,b) ((opus_uint32)(a)*(opus_uint32)(b))
+#define MULT32_32_P31(a,b) ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(128+(opus_int32)SHR(MULT16_16U(((a)&0x0000ffff),((b)&0x0000ffff)),16+7) + SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),7) + SHR32(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),7), 8) )
+#endif
+
 /** 32x32 multiplication, followed by a 32-bit shift right. Results fits in 32 bits */
 #if OPUS_FAST_INT64
 #define MULT32_32_Q32(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),32))
--