shithub: opus

--- a/celt/mips/celt_mipsr1.h

+++ b/celt/mips/celt_mipsr1.h

@@ -97,19 +97,18 @@

       opus_val16 f;

       opus_val32 res;

+      long long acc;

       f = MULT16_16_Q15(window[i],window[i]);

       x0= x[i-T1+2];

-      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));

+      acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);

+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));

+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));

+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2);

+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));

+      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));

+      res = __builtin_mips_extr_w(acc, 15);

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));

-      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));

       y[i] = x[i] + res;

       x4=x3;

@@ -134,13 +133,14 @@

    for (i=overlap;i<N;i++)

       opus_val32 res;

+      long long acc;

       x0=x[i-T1+2];

-      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));

+      acc = __builtin_mips_mult((int)g10, (int)x2);

+      acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1));

+      acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0));

+      res = __builtin_mips_extr_w(acc, 15);

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));

-      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));

       y[i] = x[i] + res;

       x4=x3;

       x3=x2;

--- a/celt/mips/fixed_generic_mipsr1.h

+++ b/celt/mips/fixed_generic_mipsr1.h

@@ -35,38 +35,30 @@

 #undef MULT16_32_Q15_ADD

 static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {

-    int m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

-    return m;

+    long long acc = __builtin_mips_mult(a, b);

+    acc = __builtin_mips_madd(acc, c, d);

+    return __builtin_mips_extr_w(acc, 15);

 #undef MULT16_32_Q15_SUB

 static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {

-    int m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

-    return m;

+    long long acc = __builtin_mips_mult(a, b);

+    acc = __builtin_mips_msub(acc, c, d);

+    return __builtin_mips_extr_w(acc, 15);

 #undef MULT16_16_Q15_ADD

 static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {

-    int m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

-    return m;

+    long long acc = __builtin_mips_mult(a, b);

+    acc = __builtin_mips_madd(acc, c, d);

+    return __builtin_mips_extr_w(acc, 15);

 #undef MULT16_16_Q15_SUB

 static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {

-    int m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

-    return m;

+    long long acc = __builtin_mips_mult(a, b);

+    acc = __builtin_mips_msub(acc, c, d);

+    return __builtin_mips_extr_w(acc, 15);

@@ -73,54 +65,42 @@

 #undef MULT16_32_Q16

 static inline int MULT16_32_Q16(int a, int b)

-    int c;

-    asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));

-    return c;

+    long long acc = __builtin_mips_mult(a, b);

+    return __builtin_mips_extr_w(acc, 16);

 #undef MULT16_32_P16

 static inline int MULT16_32_P16(int a, int b)

-    int c;

-    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));

-    asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));

-    return c;

+    long long acc = __builtin_mips_mult(a, b);

+    return __builtin_mips_extr_r_w(acc, 16);

 #undef MULT16_32_Q15

 static inline int MULT16_32_Q15(int a, int b)

-    int c;

-    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));

-    return c;

+    long long acc = __builtin_mips_mult(a, b);

+    return __builtin_mips_extr_w(acc, 15);

 #undef MULT32_32_Q31

 static inline int MULT32_32_Q31(int a, int b)

-    int r;

-    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));

-    return r;

+    long long acc = __builtin_mips_mult(a, b);

+    return __builtin_mips_extr_w(acc, 31);

 #undef PSHR32

 static inline int PSHR32(int a, int shift)

-    int r;

-    asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));

-    return r;

+    return __builtin_mips_shra_r_w(a, shift);

 #undef MULT16_16_P15

 static inline int MULT16_16_P15(int a, int b)

-    int r;

-    asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));

-    asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r):  "0" (r), "i"(15));

-    return r;

+    int r = a * b;

+    return __builtin_mips_shra_r_w(r, 15);

 #endif /* CELT_FIXED_GENERIC_MIPSR1_H */

--- a/celt/mips/kiss_fft_mipsr1.h

+++ b/celt/mips/kiss_fft_mipsr1.h

@@ -37,20 +37,16 @@

 #undef S_MUL_ADD

 static inline int S_MUL_ADD(int a, int b, int c, int d) {

-    int m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

-    return m;

+    long long acc = __builtin_mips_mult(a, b);

+    acc = __builtin_mips_madd(acc, c, d);

+    return __builtin_mips_extr_w(acc, 15);

 #undef S_MUL_SUB

 static inline int S_MUL_SUB(int a, int b, int c, int d) {

-    int m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

-    return m;

+    long long acc = __builtin_mips_mult(a, b);

+    acc = __builtin_mips_msub(acc, c, d);

+    return __builtin_mips_extr_w(acc, 15);

 #undef C_MUL

@@ -58,13 +54,12 @@

 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {

     kiss_fft_cpx m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));

-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));

-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));

+    long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r);

+    long long acc2 = __builtin_mips_mult((int)a.r, (int)b.i);

+    acc1 = __builtin_mips_msub(acc1, (int)a.i, (int)b.i);

+    acc2 = __builtin_mips_madd(acc2, (int)a.i, (int)b.r);

+    m.r = __builtin_mips_extr_w(acc1, 15);

+    m.i = __builtin_mips_extr_w(acc2, 15);

     return m;

 #undef C_MULC

@@ -72,13 +67,12 @@

 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {

     kiss_fft_cpx m;

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));

-    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));

-    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));

-    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));

-    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));

+    long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r);

+    long long acc2 = __builtin_mips_mult((int)a.i, (int)b.r);

+    acc1 = __builtin_mips_madd(acc1, (int)a.i, (int)b.i);

+    acc2 = __builtin_mips_msub(acc2, (int)a.r, (int)b.i);

+    m.r = __builtin_mips_extr_w(acc1, 15);

+    m.i = __builtin_mips_extr_w(acc2, 15);

     return m;

--- a/celt/mips/pitch_mipsr1.h

+++ b/celt/mips/pitch_mipsr1.h

@@ -39,26 +39,22 @@

       int N, opus_val32 *xy1, opus_val32 *xy2, int arch)

    int j;

-   opus_val32 xy01=0;

-   opus_val32 xy02=0;

+   long long acc1 = 0;

+   long long acc2 = 0;

    (void)arch;

-   asm volatile("MULT $ac1, $0, $0");

-   asm volatile("MULT $ac2, $0, $0");

    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */

-   for (j=0;j<N;j++)

+   for (j=0;j<N;j+=2)

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));

-      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));

-      ++j;

-      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));

-      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));

+       acc1 = __builtin_mips_madd(acc1, (int)x[j],   (int)y01[j]);

+       acc2 = __builtin_mips_madd(acc2, (int)x[j],   (int)y02[j]);

+       acc1 = __builtin_mips_madd(acc1, (int)x[j+1], (int)y01[j+1]);

+       acc2 = __builtin_mips_madd(acc2, (int)x[j+1], (int)y02[j+1]);

-   asm volatile ("mflo %0, $ac1": "=r"(xy01));

-   asm volatile ("mflo %0, $ac2": "=r"(xy02));

-   *xy1 = xy01;

-   *xy2 = xy02;

+   *xy1 = (opus_val32)acc1;

+   *xy2 = (opus_val32)acc2;

 static inline void xcorr_kernel_mips(const opus_val16 * x,

--- a/celt/mips/vq_mipsr1.h

+++ b/celt/mips/vq_mipsr1.h

@@ -70,7 +70,8 @@

 #ifdef FIXED_POINT

    int k;

 #endif

-   opus_val32 E = EPSILON;

+   long long acc = EPSILON;

+   opus_val32 E;

    opus_val16 g;

    opus_val32 t;

    celt_norm *xptr = X;

@@ -78,26 +79,23 @@

    (void)arch;

-   asm volatile("mult $ac1, $0, $0");

-   asm volatile("MTLO %0, $ac1" : :"r" (E));

    /*if(N %4)

        printf("error");*/

    for (i=0;i<N-2;i+=2)

       X0 = (int)*xptr++;

-      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));

       X1 = (int)*xptr++;

-      asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));

+      acc = __builtin_mips_madd(acc, X0, X0);

+      acc = __builtin_mips_madd(acc, X1, X1);

    for (;i<N;i++)

       X0 = (int)*xptr++;

-      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));

+      acc = __builtin_mips_madd(acc, X0, X0);

-   asm volatile("MFLO %0, $ac1" : "=r" (E));

+   E = (opus_val32)acc;

 #ifdef FIXED_POINT

    k = celt_ilog2(E)>>1;

 #endif

--

⑨