ref: 325585954d0875afe6c0cbb6d9cf2c95db3d22cb
parent: 81fc1497b1e5092e2a3df5945705c252b66bd74d
author: Siarhei Volkau <lis8215@gmail.com>
date: Fri Aug 22 10:00:01 EDT 2025
MIPS: tune vq for MIPS I observed that overridden exp_rotation1 is fully matches default C version. Overridden renormalise_vector in turn differs only in celt_inner_prod calculation, so instead of tuning existing overrides it's worth to remove them compltely and tune celt_inner_prod instead. This change gives minor performance improvement for both DSP and non-DSP MIPS. Signed-off-by: Siarhei Volkau <lis8215@gmail.com> Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@@ -40,6 +40,7 @@
#define MIPS_MAC(acc,a,b) \
__builtin_mips_madd((acc), (int)(a), (int)(b))
+#define OVERRIDE_CELT_INNER_PROD
#define OVERRIDE_DUAL_INNER_PROD
#define OVERRIDE_XCORR_KERNEL
@@ -49,11 +50,50 @@
#define accumulator_t opus_int32
#define MIPS_MAC MAC16_16
+#define OVERRIDE_CELT_INNER_PROD
#define OVERRIDE_DUAL_INNER_PROD
#define OVERRIDE_XCORR_KERNEL
#endif /* any other MIPS */
+
+#if defined(OVERRIDE_CELT_INNER_PROD)
+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x,
+ const opus_val16 *y, int N, int arch)
+{
+ int j;
+ accumulator_t acc = 0;
+
+ (void)arch;
+
+ for (j = 0; j < N - 3; j += 4)
+ {
+ acc = MIPS_MAC(acc, x[j], y[j]);
+ acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+ acc = MIPS_MAC(acc, x[j+2], y[j+2]);
+ acc = MIPS_MAC(acc, x[j+3], y[j+3]);
+ }
+
+ switch (N & 3) {
+ case 3:
+ acc = MIPS_MAC(acc, x[j], y[j]);
+ acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+ acc = MIPS_MAC(acc, x[j+2], y[j+2]);
+ break;
+ case 2:
+ acc = MIPS_MAC(acc, x[j], y[j]);
+ acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+ break;
+ case 1:
+ acc = MIPS_MAC(acc, x[j], y[j]);
+ break;
+ case 0:
+ break;
+ }
+
+ return (opus_val32)acc;
+}
+#endif /* OVERRIDE_CELT_INNER_PROD */
#if defined(OVERRIDE_DUAL_INNER_PROD)
static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
--- a/celt/mips/vq_mipsr1.h
+++ /dev/null
@@ -1,114 +1,0 @@
-/* Copyright (c) 2007-2008 CSIRO
- Copyright (c) 2007-2009 Xiph.Org Foundation
- Written by Jean-Marc Valin */
-/*
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef VQ_MIPSR1_H__
-#define VQ_MIPSR1_H__
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "mathops.h"
-#include "arch.h"
-
-#define OVERRIDE_vq_exp_rotation1
-static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
-{
- int i;
- opus_val16 ms;
- celt_norm *Xptr;
- Xptr = X;
- ms = NEG16(s);
- for (i=0;i<len-stride;i++)
- {
- celt_norm x1, x2;
- x1 = Xptr[0];
- x2 = Xptr[stride];
- Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
- *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
- }
- Xptr = &X[len-2*stride-1];
- for (i=len-2*stride-1;i>=0;i--)
- {
- celt_norm x1, x2;
- x1 = Xptr[0];
- x2 = Xptr[stride];
- Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
- *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
- }
-}
-
-#define OVERRIDE_renormalise_vector
-void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
-{
- int i;
-#ifdef FIXED_POINT
- int k;
-#endif
- long long acc = EPSILON;
- opus_val32 E;
- opus_val16 g;
- opus_val32 t;
- celt_norm *xptr = X;
- int X0, X1;
-
- (void)arch;
-
- /*if(N %4)
- printf("error");*/
- for (i=0;i<N-2;i+=2)
- {
- X0 = (int)*xptr++;
- X1 = (int)*xptr++;
- acc = __builtin_mips_madd(acc, X0, X0);
- acc = __builtin_mips_madd(acc, X1, X1);
- }
-
- for (;i<N;i++)
- {
- X0 = (int)*xptr++;
- acc = __builtin_mips_madd(acc, X0, X0);
- }
-
- E = (opus_val32)acc;
-#ifdef FIXED_POINT
- k = celt_ilog2(E)>>1;
-#endif
- t = VSHR32(E, 2*(k-7));
- g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
-
- xptr = X;
- for (i=0;i<N;i++)
- {
- *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
- xptr++;
- }
- /*return celt_sqrt(E);*/
-}
-
-#endif /* VQ_MIPSR1_H__ */
--- a/celt/vq.c
+++ b/celt/vq.c
@@ -39,10 +39,6 @@
#include "rate.h"
#include "pitch.h"
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
-#include "mips/vq_mipsr1.h"
-#endif
-
#ifndef OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
--- a/celt_headers.mk
+++ b/celt_headers.mk
@@ -48,7 +48,6 @@
celt/mips/kiss_fft_mipsr1.h \
celt/mips/mdct_mipsr1.h \
celt/mips/pitch_mipsr1.h \
-celt/mips/vq_mipsr1.h \
celt/x86/pitch_sse.h \
celt/x86/vq_sse.h \
celt/x86/x86_arch_macros.h \
--
⑨