ref: 16778617b8af13ed9b54bcbeeb311ec94552ad02
parent: 50e37a320850daae24546913f119f7923eb33eef
author: lieff <lieff@users.noreply.github.com>
date: Sat Jan 27 11:14:58 EST 2018
add MINIMP3_ONLY_SIMD define, which strips some generic code.
--- a/README.md
+++ b/README.md
@@ -78,6 +78,7 @@
Note that you must define ``MINIMP3_IMPLEMENTATION`` in exactly one source file.
You can ``#include`` ``minimp3.h`` in as many files as you like.
Also you can use ``MINIMP3_ONLY_MP3`` define to strip MP1/MP2 decoding code.
+MINIMP3_ONLY_SIMD define controls generic (non SSE/NEON) code generation (always enabled on x64/arm64 targets).
Then. we decode the input stream frame-by-frame:
--- a/minimp3.h
+++ b/minimp3.h
@@ -80,6 +80,11 @@
#define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))
#define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))
+#if defined(_M_X64) || defined(_M_ARM64) || (defined(__x86_64__) && defined(__SSE2__)) || defined(__aarch64__)
+// x64 always have SSE2, arm64 always have neon, no need for generic code
+#define MINIMP3_ONLY_SIMD
+#endif
+
#if defined(_MSC_VER) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
# include <immintrin.h>
# define HAVE_SSE 1
@@ -95,7 +100,7 @@
# define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))
# define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
typedef __m128 f4;
-#ifdef _MSC_VER
+#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
#define minimp3_cpuid __cpuid
#else
static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
@@ -124,6 +129,9 @@
#endif
static int have_simd()
{
+#ifdef MINIMP3_ONLY_SIMD
+ return 1;
+#else
static int g_have_simd;
int CPUInfo[4];
#ifdef MINIMP3_TEST
@@ -145,6 +153,7 @@
#endif
g_have_simd = 1;
return 0;
+#endif
}
#elif defined(__arm)
# include <arm_neon.h>
@@ -162,7 +171,7 @@
# define VREV(x) vrev64q_f32(x)
typedef float32x4_t f4;
static int have_simd()
-{
+{ // TODO: detect neon for !MINIMP3_ONLY_SIMD
return 1;
}
#else
@@ -982,6 +991,7 @@
VSTORE(grbuf + 14 - i, VREV(vd));
}
#endif
+#ifndef MINIMP3_ONLY_SIMD
for(; i < 8; i++)
{
float u = grbuf[18 + i];
@@ -989,6 +999,7 @@
grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
}
+#endif
}
}
@@ -1310,6 +1321,9 @@
}
} else
#endif
+#ifdef MINIMP3_ONLY_SIMD
+ {}
+#else
for (; k < n; k++)
{
float t[4][8], *x, *y = grbuf + k;
@@ -1368,6 +1382,7 @@
y[2*18] = t[1][7];
y[3*18] = t[3][7];
}
+#endif
}
static short mp3d_scale_pcm(float sample)
@@ -1493,6 +1508,9 @@
}
} else
#endif
+#ifdef MINIMP3_ONLY_SIMD
+ {}
+#else
for (i = 14; i >= 0; i--)
{
#define LOAD(k) float w0 = *w++; float w1 = *w++; float * vz = &zlin[4*i - k*64]; float * vy = &zlin[4*i - (15 - k)*64];
@@ -1521,6 +1539,7 @@
dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
}
+#endif
}
static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, short *pcm, float *lins)
--- a/minimp3_test.c
+++ b/minimp3_test.c
@@ -1,4 +1,5 @@
//#define MINIMP3_ONLY_MP3
+//#define MINIMP3_ONLY_SIMD
#define MINIMP3_IMPLEMENTATION
#include "minimp3.h"
#include <stdio.h>
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -10,6 +10,6 @@
scripts/test.sh
gcov minimp3_test.c
-gcc -O2 -g -Wall -Wextra -fno-asynchronous-unwind-tables -fno-stack-protector -ffunction-sections \
--fdata-sections -Wl,--gc-sections -o minimp3 minimp3_test.c -lm
+gcc -O2 -g -Wall -Wextra -Wmissing-prototypes -Werror -fno-asynchronous-unwind-tables -fno-stack-protector \
+-ffunction-sections -fdata-sections -Wl,--gc-sections -o minimp3 minimp3_test.c -lm
scripts/test.sh