shithub: opus

--- a/celt/celt_lpc.c

+++ b/celt/celt_lpc.c

@@ -158,7 +158,17 @@

       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);

       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);

       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);

-      xcorr_kernel(rnum, x+i-ord, sum, ord, arch);

+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)

+      {

+         opus_val32 sum_c[4];

+         memcpy(sum_c, sum, sizeof(sum_c));

+         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);

+#endif

+         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);

+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)

+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);

+      }

+#endif

       y[i  ] = SROUND16(sum[0], SIG_SHIFT);

       y[i+1] = SROUND16(sum[1], SIG_SHIFT);

       y[i+2] = SROUND16(sum[2], SIG_SHIFT);

@@ -222,8 +232,17 @@

       sum[1]=_x[i+1];

       sum[2]=_x[i+2];

       sum[3]=_x[i+3];

-      xcorr_kernel(rden, y+i, sum, ord, arch);

+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)

+      {

+         opus_val32 sum_c[4];

+         memcpy(sum_c, sum, sizeof(sum_c));

+         xcorr_kernel_c(rden, y+i, sum_c, ord);

+#endif

+         xcorr_kernel(rden, y+i, sum, ord, arch);

+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)

+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);

+      }

+#endif

       /* Patch up the result to compensate for the fact that this is an IIR */

       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);

       _y[i  ] = sum[0];

--- a/celt/pitch.c

+++ b/celt/pitch.c

@@ -262,7 +262,16 @@

    for (i=0;i<max_pitch-3;i+=4)

       opus_val32 sum[4]={0,0,0,0};

-      xcorr_kernel(_x, _y+i, sum, len, arch);

+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)

+      {

+         opus_val32 sum_c[4]={0,0,0,0};

+         xcorr_kernel_c(_x, _y+i, sum_c, len);

+#endif

+         xcorr_kernel(_x, _y+i, sum, len, arch);

+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)

+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);

+      }

+#endif

       xcorr[i]=sum[0];

       xcorr[i+1]=sum[1];

       xcorr[i+2]=sum[2];

--- a/celt/x86/celt_lpc_sse4_1.c

+++ b/celt/x86/celt_lpc_sse4_1.c

@@ -64,9 +64,16 @@

       opus_val32 sums[4] = {0};

       __m128i vecSum, vecX;

-      xcorr_kernel(rnum, x+i-ord, sums, ord, arch);

+#if defined(OPUS_CHECK_ASM)

+      {

+         opus_val32 sums_c[4] = {0};

+         xcorr_kernel_c(rnum, x+i-ord, sums, ord);

+#endif

+         xcorr_kernel(rnum, x+i-ord, sums, ord, arch);

+#if defined(OPUS_CHECK_ASM)

+         celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0);

+      }

+#endif

       vecSum = _mm_loadu_si128((__m128i *)sums);

       vecSum = _mm_add_epi32(vecSum, vecNoA);

       vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);

--

⑨