shithub: opus

Download patch

ref: d503125101116d2b399287824d7902b6351b691d
parent: 65b131ec0984973166973878b14506f782704a3f
author: Timothy B. Terriberry <tterribe@xiph.org>
date: Fri Feb 9 12:26:35 EST 2024

Add check-asm for fixed-point xcorr_kernel().

Compare the output of xcorr_kernel() against the results of
 xcorr_kernel_c() when configured with --enable-check-asm.
Currently this is only checked in fixed point, as a float check
 requires more sophisticated error analysis and may need to be
 customized for each vector implementation.

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>

--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -158,7 +158,17 @@
       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
-      xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+      {
+         opus_val32 sum_c[4];
+         memcpy(sum_c, sum, sizeof(sum_c));
+         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
+#endif
+         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+      }
+#endif
       y[i  ] = SROUND16(sum[0], SIG_SHIFT);
       y[i+1] = SROUND16(sum[1], SIG_SHIFT);
       y[i+2] = SROUND16(sum[2], SIG_SHIFT);
@@ -222,8 +232,17 @@
       sum[1]=_x[i+1];
       sum[2]=_x[i+2];
       sum[3]=_x[i+3];
-      xcorr_kernel(rden, y+i, sum, ord, arch);
-
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+      {
+         opus_val32 sum_c[4];
+         memcpy(sum_c, sum, sizeof(sum_c));
+         xcorr_kernel_c(rden, y+i, sum_c, ord);
+#endif
+         xcorr_kernel(rden, y+i, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+      }
+#endif
       /* Patch up the result to compensate for the fact that this is an IIR */
       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
       _y[i  ] = sum[0];
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -262,7 +262,16 @@
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
-      xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+      {
+         opus_val32 sum_c[4]={0,0,0,0};
+         xcorr_kernel_c(_x, _y+i, sum_c, len);
+#endif
+         xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+      }
+#endif
       xcorr[i]=sum[0];
       xcorr[i+1]=sum[1];
       xcorr[i+2]=sum[2];
--- a/celt/x86/celt_lpc_sse4_1.c
+++ b/celt/x86/celt_lpc_sse4_1.c
@@ -64,9 +64,16 @@
    {
       opus_val32 sums[4] = {0};
       __m128i vecSum, vecX;
-
-      xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
-
+#if defined(OPUS_CHECK_ASM)
+      {
+         opus_val32 sums_c[4] = {0};
+         xcorr_kernel_c(rnum, x+i-ord, sums, ord);
+#endif
+         xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
+#if defined(OPUS_CHECK_ASM)
+         celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0);
+      }
+#endif
       vecSum = _mm_loadu_si128((__m128i *)sums);
       vecSum = _mm_add_epi32(vecSum, vecNoA);
       vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
--