shithub: mp3dec

Download patch

ref: 16778617b8af13ed9b54bcbeeb311ec94552ad02
parent: 50e37a320850daae24546913f119f7923eb33eef
author: lieff <lieff@users.noreply.github.com>
date: Sat Jan 27 11:14:58 EST 2018

add MINIMP3_ONLY_SIMD define, which strips some generic code.

--- a/README.md
+++ b/README.md
@@ -78,6 +78,7 @@
 Note that you must define ``MINIMP3_IMPLEMENTATION`` in exactly one source file.
 You can ``#include`` ``minimp3.h`` in as many files as you like.
 Also you can use ``MINIMP3_ONLY_MP3`` define to strip MP1/MP2 decoding code.
+MINIMP3_ONLY_SIMD define controls generic (non SSE/NEON) code generation (always enabled on x64/arm64 targets).
 
 Then. we decode the input stream frame-by-frame:
 
--- a/minimp3.h
+++ b/minimp3.h
@@ -80,6 +80,11 @@
 #define MINIMP3_MIN(a, b)           ((a) > (b) ? (b) : (a))
 #define MINIMP3_MAX(a, b)           ((a) < (b) ? (b) : (a))
 
+#if defined(_M_X64) || defined(_M_ARM64) || (defined(__x86_64__) && defined(__SSE2__)) || defined(__aarch64__)
+// x64 always have SSE2, arm64 always have neon, no need for generic code
+#define MINIMP3_ONLY_SIMD
+#endif
+
 #if defined(_MSC_VER) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
 #   include <immintrin.h>
 #   define HAVE_SSE 1
@@ -95,7 +100,7 @@
 #   define VMUL_S(x, s)  _mm_mul_ps(x, _mm_set1_ps(s))
 #   define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
 typedef __m128 f4;
-#ifdef _MSC_VER
+#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
 #define minimp3_cpuid __cpuid
 #else
 static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
@@ -124,6 +129,9 @@
 #endif
 static int have_simd()
 {
+#ifdef MINIMP3_ONLY_SIMD
+    return 1;
+#else
     static int g_have_simd;
     int CPUInfo[4];
 #ifdef MINIMP3_TEST
@@ -145,6 +153,7 @@
 #endif
     g_have_simd = 1;
     return 0;
+#endif
 }
 #elif defined(__arm)
 #   include <arm_neon.h>
@@ -162,7 +171,7 @@
 #   define VREV(x) vrev64q_f32(x)
 typedef float32x4_t f4;
 static int have_simd()
-{
+{   // TODO: detect neon for !MINIMP3_ONLY_SIMD
     return 1;
 }
 #else
@@ -982,6 +991,7 @@
             VSTORE(grbuf + 14 - i, VREV(vd));
         }
 #endif
+#ifndef MINIMP3_ONLY_SIMD
         for(; i < 8; i++)
         {
             float u = grbuf[18 + i];
@@ -989,6 +999,7 @@
             grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
             grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
         }
+#endif
     }
 }
 
@@ -1310,6 +1321,9 @@
         }
     } else
 #endif
+#ifdef MINIMP3_ONLY_SIMD
+    {}
+#else
     for (; k < n; k++)
     {
         float t[4][8], *x, *y = grbuf + k;
@@ -1368,6 +1382,7 @@
         y[2*18] = t[1][7];
         y[3*18] = t[3][7];
     }
+#endif
 }
 
 static short mp3d_scale_pcm(float sample)
@@ -1493,6 +1508,9 @@
         }
     } else
 #endif
+#ifdef MINIMP3_ONLY_SIMD
+    {}
+#else
     for (i = 14; i >= 0; i--)
     {
 #define LOAD(k) float w0 = *w++; float w1 = *w++; float * vz = &zlin[4*i - k*64]; float * vy = &zlin[4*i - (15 - k)*64];
@@ -1521,6 +1539,7 @@
         dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
         dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
     }
+#endif
 }
 
 static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, short *pcm, float *lins)
--- a/minimp3_test.c
+++ b/minimp3_test.c
@@ -1,4 +1,5 @@
 //#define MINIMP3_ONLY_MP3
+//#define MINIMP3_ONLY_SIMD
 #define MINIMP3_IMPLEMENTATION
 #include "minimp3.h"
 #include <stdio.h>
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -10,6 +10,6 @@
 scripts/test.sh
 gcov minimp3_test.c
 
-gcc -O2 -g -Wall -Wextra -fno-asynchronous-unwind-tables -fno-stack-protector -ffunction-sections \
--fdata-sections -Wl,--gc-sections -o minimp3 minimp3_test.c -lm
+gcc -O2 -g -Wall -Wextra -Wmissing-prototypes -Werror -fno-asynchronous-unwind-tables -fno-stack-protector \
+-ffunction-sections -fdata-sections -Wl,--gc-sections -o minimp3 minimp3_test.c -lm
 scripts/test.sh