ref: d503125101116d2b399287824d7902b6351b691d
parent: 65b131ec0984973166973878b14506f782704a3f
author: Timothy B. Terriberry <tterribe@xiph.org>
date: Fri Feb 9 12:26:35 EST 2024
Add check-asm for fixed-point xcorr_kernel(). Compare the output of xcorr_kernel() against the results of xcorr_kernel_c() when configured with --enable-check-asm. Currently this is only checked in fixed point, as a float check requires more sophisticated error analysis and may need to be customized for each vector implementation. Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -158,7 +158,17 @@
sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
- xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ {
+ opus_val32 sum_c[4];
+ memcpy(sum_c, sum, sizeof(sum_c));
+ xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
+#endif
+ xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+ }
+#endif
y[i ] = SROUND16(sum[0], SIG_SHIFT);
y[i+1] = SROUND16(sum[1], SIG_SHIFT);
y[i+2] = SROUND16(sum[2], SIG_SHIFT);
@@ -222,8 +232,17 @@
sum[1]=_x[i+1];
sum[2]=_x[i+2];
sum[3]=_x[i+3];
- xcorr_kernel(rden, y+i, sum, ord, arch);
-
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ {
+ opus_val32 sum_c[4];
+ memcpy(sum_c, sum, sizeof(sum_c));
+ xcorr_kernel_c(rden, y+i, sum_c, ord);
+#endif
+ xcorr_kernel(rden, y+i, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+ }
+#endif
/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0];
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -262,7 +262,16 @@
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
- xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ {
+ opus_val32 sum_c[4]={0,0,0,0};
+ xcorr_kernel_c(_x, _y+i, sum_c, len);
+#endif
+ xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+ }
+#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
--- a/celt/x86/celt_lpc_sse4_1.c
+++ b/celt/x86/celt_lpc_sse4_1.c
@@ -64,9 +64,16 @@
{
opus_val32 sums[4] = {0};
__m128i vecSum, vecX;
-
- xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
-
+#if defined(OPUS_CHECK_ASM)
+ {
+ opus_val32 sums_c[4] = {0};
+ xcorr_kernel_c(rnum, x+i-ord, sums, ord);
+#endif
+ xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
+#if defined(OPUS_CHECK_ASM)
+ celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0);
+ }
+#endif
vecSum = _mm_loadu_si128((__m128i *)sums);
vecSum = _mm_add_epi32(vecSum, vecNoA);
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
--
⑨