shithub: opus

--- a/celt/mips/fixed_generic_mipsr1.h

+++ b/celt/mips/fixed_generic_mipsr1.h

@@ -35,6 +35,9 @@

 #if defined (__mips_dsp) && __mips == 32

+typedef short v2i16 __attribute__((vector_size(4)));

+typedef char  v2i8  __attribute__((vector_size(4)));

 #undef MULT16_32_Q16

 static inline int MULT16_32_Q16(int a, int b)

@@ -74,6 +77,75 @@

     int r = a * b;

     return __builtin_mips_shra_r_w(r, 15);

+}

+#define OVERRIDE_CELT_MAXABS16

+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)

+{

+   int i;

+   v2i16 v2max = (v2i16){ 0, 0 };

+   v2i16 x01, x23;

+   const v2i16 *x2;

+   opus_val16 maxlo, maxhi;

+   int loops;

+   if ((long)x & 2 && len > 0) {

+      v2max = (v2i16){ 0, ABS16(*x) };

+      x++;

+      len--;

+   }

+   x2 = __builtin_assume_aligned(x, 4);

+   loops = len / 4;

+   for (i = 0; i < loops; i++)

+   {

+       x01 = *x2++;

+       x23 = *x2++;

+       x01 = __builtin_mips_absq_s_ph(x01);

+       x23 = __builtin_mips_absq_s_ph(x23);

+       __builtin_mips_cmp_lt_ph(v2max, x01);

+       v2max = __builtin_mips_pick_ph(x01, v2max);

+       __builtin_mips_cmp_lt_ph(v2max, x23);

+       v2max = __builtin_mips_pick_ph(x23, v2max);

+   }

+   switch (len & 3) {

+   case 3:

+       x01 = __builtin_mips_absq_s_ph(*x2);

+       __builtin_mips_cmp_lt_ph(v2max, x01);

+       v2max = __builtin_mips_pick_ph(x01, v2max);

+       maxlo = EXTRACT16((opus_val32)v2max);

+       maxhi = EXTRACT16((opus_val32)v2max >> 16);

+       maxlo = MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));

+       break;

+   case 2:

+       x01 = __builtin_mips_absq_s_ph(*x2);

+       __builtin_mips_cmp_lt_ph(v2max, x01);

+       v2max = __builtin_mips_pick_ph(x01, v2max);

+       maxlo = EXTRACT16((opus_val32)v2max);

+       maxhi = EXTRACT16((opus_val32)v2max >> 16);

+       maxlo = MAX16(maxlo, maxhi);

+       break;

+   case 1:

+       maxlo = EXTRACT16((opus_val32)v2max);

+       maxhi = EXTRACT16((opus_val32)v2max >> 16);

+       return MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));

+       break;

+   case 0:

+       maxlo = EXTRACT16((opus_val32)v2max);

+       maxhi = EXTRACT16((opus_val32)v2max >> 16);

+       maxlo = MAX16(maxlo, maxhi);

+       break;

+   default:

+       __builtin_unreachable();

+   }

+   /* C version might return 0x8000, this one can't

+    * because abs is saturated here. Since result

+    * used only for determine dynamic range

+    * in ilog2-like context it's worth to add 1

+    * for proper magnitude whether saturated

+    */

+   return (opus_val32)maxlo + 1;

 #elif __mips == 32

--

⑨