shithub: opus

Download patch

ref: 325585954d0875afe6c0cbb6d9cf2c95db3d22cb
parent: 81fc1497b1e5092e2a3df5945705c252b66bd74d
author: Siarhei Volkau <lis8215@gmail.com>
date: Fri Aug 22 10:00:01 EDT 2025

MIPS: tune vq for MIPS

I observed that overridden exp_rotation1 is fully
matches default C version.

Overridden renormalise_vector in turn differs only
in celt_inner_prod calculation, so instead of tuning
existing overrides it's worth to remove them compltely
and tune celt_inner_prod instead.

This change gives minor performance improvement for
both DSP and non-DSP MIPS.

Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>

--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@@ -40,6 +40,7 @@
 #define MIPS_MAC(acc,a,b) \
     __builtin_mips_madd((acc), (int)(a), (int)(b))
 
+#define OVERRIDE_CELT_INNER_PROD
 #define OVERRIDE_DUAL_INNER_PROD
 #define OVERRIDE_XCORR_KERNEL
 
@@ -49,11 +50,50 @@
 #define accumulator_t opus_int32
 #define MIPS_MAC MAC16_16
 
+#define OVERRIDE_CELT_INNER_PROD
 #define OVERRIDE_DUAL_INNER_PROD
 #define OVERRIDE_XCORR_KERNEL
 
 #endif /* any other MIPS */
 
+
+#if defined(OVERRIDE_CELT_INNER_PROD)
+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x,
+      const opus_val16 *y, int N, int arch)
+{
+   int j;
+   accumulator_t acc = 0;
+
+   (void)arch;
+
+   for (j = 0; j < N - 3; j += 4)
+   {
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+      acc = MIPS_MAC(acc, x[j+2], y[j+2]);
+      acc = MIPS_MAC(acc, x[j+3], y[j+3]);
+   }
+
+   switch (N & 3) {
+   case 3:
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+      acc = MIPS_MAC(acc, x[j+2], y[j+2]);
+      break;
+   case 2:
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
+      break;
+   case 1:
+      acc = MIPS_MAC(acc, x[j],   y[j]);
+      break;
+   case 0:
+      break;
+   }
+
+   return (opus_val32)acc;
+}
+#endif /* OVERRIDE_CELT_INNER_PROD */
 
 #if defined(OVERRIDE_DUAL_INNER_PROD)
 static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
--- a/celt/mips/vq_mipsr1.h
+++ /dev/null
@@ -1,114 +1,0 @@
-/* Copyright (c) 2007-2008 CSIRO
-   Copyright (c) 2007-2009 Xiph.Org Foundation
-   Written by Jean-Marc Valin */
-/*
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef VQ_MIPSR1_H__
-#define VQ_MIPSR1_H__
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "mathops.h"
-#include "arch.h"
-
-#define OVERRIDE_vq_exp_rotation1
-static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
-{
-   int i;
-   opus_val16 ms;
-   celt_norm *Xptr;
-   Xptr = X;
-   ms = NEG16(s);
-   for (i=0;i<len-stride;i++)
-   {
-      celt_norm x1, x2;
-      x1 = Xptr[0];
-      x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
-      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
-   }
-   Xptr = &X[len-2*stride-1];
-   for (i=len-2*stride-1;i>=0;i--)
-   {
-      celt_norm x1, x2;
-      x1 = Xptr[0];
-      x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
-      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
-   }
-}
-
-#define OVERRIDE_renormalise_vector
-void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
-{
-   int i;
-#ifdef FIXED_POINT
-   int k;
-#endif
-   long long acc = EPSILON;
-   opus_val32 E;
-   opus_val16 g;
-   opus_val32 t;
-   celt_norm *xptr = X;
-   int X0, X1;
-
-   (void)arch;
-
-   /*if(N %4)
-       printf("error");*/
-   for (i=0;i<N-2;i+=2)
-   {
-      X0 = (int)*xptr++;
-      X1 = (int)*xptr++;
-      acc = __builtin_mips_madd(acc, X0, X0);
-      acc = __builtin_mips_madd(acc, X1, X1);
-   }
-
-   for (;i<N;i++)
-   {
-      X0 = (int)*xptr++;
-      acc = __builtin_mips_madd(acc, X0, X0);
-   }
-
-   E = (opus_val32)acc;
-#ifdef FIXED_POINT
-   k = celt_ilog2(E)>>1;
-#endif
-   t = VSHR32(E, 2*(k-7));
-   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
-
-   xptr = X;
-   for (i=0;i<N;i++)
-   {
-      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
-      xptr++;
-   }
-   /*return celt_sqrt(E);*/
-}
-
-#endif /* VQ_MIPSR1_H__ */
--- a/celt/vq.c
+++ b/celt/vq.c
@@ -39,10 +39,6 @@
 #include "rate.h"
 #include "pitch.h"
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
-#include "mips/vq_mipsr1.h"
-#endif
-
 #ifndef OVERRIDE_vq_exp_rotation1
 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
 {
--- a/celt_headers.mk
+++ b/celt_headers.mk
@@ -48,7 +48,6 @@
 celt/mips/kiss_fft_mipsr1.h \
 celt/mips/mdct_mipsr1.h \
 celt/mips/pitch_mipsr1.h \
-celt/mips/vq_mipsr1.h \
 celt/x86/pitch_sse.h \
 celt/x86/vq_sse.h \
 celt/x86/x86_arch_macros.h \
--