ref: fcda8e4a7dca01b8ac73219d7cc38aee6d3de318
parent: 41a57875ac525c47ff92e823218eabdb45c7bad0
author: Matthew Wang <mjw7@princeton.edu>
date: Tue Jul 21 12:26:03 EDT 2020
improving popcount function, use compiler builtins when possible
--- a/leaf/Src/leaf-analysis.c
+++ b/leaf/Src/leaf-analysis.c
@@ -1061,7 +1061,7 @@
z->_info = (tZeroCrossingInfo*) mpool_alloc(sizeof(tZeroCrossingInfo) * z->_size, m);
- for (uint i = 0; i < z->_size; i++)
+ for (unsigned i = 0; i < z->_size; i++)
tZeroCrossingInfo_initToPool(&z->_info[i], mp);
z->_pos = 0;
@@ -1446,21 +1446,33 @@
if (shift == 0)
{
- for (uint i = 0; i != b->_mid_array; ++i)
+ for (unsigned i = 0; i != b->_mid_array; ++i)
+ {
// built in compiler popcount functions should be faster but we want this to be portable
// could try to add some define that call the correct function depending on compiler
// or let the user pointer popcount() to whatever they want
// something to look into...
+#ifdef __GNUC__
+ count += __builtin_popcount(*p1++ ^ *p2++);
+#elif _MSC_VER
+ count += __popcnt(*p1++ ^ *p2++);
+#endif
count += popcount(*p1++ ^ *p2++);
+ }
}
else
{
const int shift2 = value_size - shift;
- for (uint i = 0; i != b->_mid_array; ++i)
+ for (unsigned i = 0; i != b->_mid_array; ++i)
{
unsigned int v = *p2++ >> shift;
v |= *p2 << shift2;
- count += popcount(*p1++ ^ v);
+#ifdef __GNUC__
+ count += __builtin_popcount(*p1++ ^ *p2++);
+#elif _MSC_VER
+ count += __popcnt(*p1++ ^ *p2++);
+#endif
+ count += popcount(*p1++ ^ *p2++);
}
}
return count;
--- a/leaf/Src/leaf-delay.c
+++ b/leaf/Src/leaf-delay.c
@@ -58,7 +58,7 @@
void tDelay_clear(tDelay* const dl)
{
_tDelay* d = *dl;
- for (uint i = 0; i < d->maxDelay; i++)
+ for (unsigned i = 0; i < d->maxDelay; i++)
{
d->buff[i] = 0;
}
@@ -203,7 +203,7 @@
void tLinearDelay_clear(tLinearDelay* const dl)
{
_tLinearDelay* d = *dl;
- for (uint i = 0; i < d->maxDelay; i++)
+ for (unsigned i = 0; i < d->maxDelay; i++)
{
d->buff[i] = 0;
}
@@ -397,7 +397,7 @@
void tHermiteDelay_clear(tHermiteDelay* const dl)
{
_tHermiteDelay* d = *dl;
- for (uint i = 0; i < d->maxDelay; i++)
+ for (unsigned i = 0; i < d->maxDelay; i++)
{
d->buff[i] = 0;
}
@@ -593,7 +593,7 @@
void tAllpassDelay_clear(tAllpassDelay* const dl)
{
_tAllpassDelay* d = *dl;
- for (uint i = 0; i < d->maxDelay; i++)
+ for (unsigned i = 0; i < d->maxDelay; i++)
{
d->buff[i] = 0;
}
@@ -763,7 +763,7 @@
void tTapeDelay_clear(tTapeDelay* const dl)
{
_tTapeDelay* d = *dl;
- for (uint i = 0; i < d->maxDelay; i++)
+ for (unsigned i = 0; i < d->maxDelay; i++)
{
d->buff[i] = 0;
}
--- a/leaf/Src/leaf-math.c
+++ b/leaf/Src/leaf-math.c
@@ -731,10 +731,16 @@
// something to look into...
int popcount(unsigned int x)
{
- int c = 0;
- for (; x != 0; x &= x - 1)
- c++;
- return c;
+// int c = 0;
+// for (; x != 0; x &= x - 1)
+// c++;
+// return c;
+ unsigned long long y;
+ y = x * 0x0002000400080010ULL;
+ y = y & 0x1111111111111111ULL;
+ y = y * 0x1111111111111111ULL;
+ y = y >> 60;
+ return (int) y;
}
float median3f(float a, float b, float c)
--- a/leaf/Src/leaf-sampling.c
+++ b/leaf/Src/leaf-sampling.c
@@ -80,7 +80,7 @@
void tBuffer_read(tBuffer* const sb, float* buff, uint32_t len)
{
_tBuffer* s = *sb;
- for (uint i = 0; i < s->bufferLength; i++)
+ for (unsigned i = 0; i < s->bufferLength; i++)
{
if (i < len) s->buff[i] = buff[i];
else s->buff[i] = 0.f;
@@ -129,7 +129,7 @@
void tBuffer_clear (tBuffer* const sb)
{
_tBuffer* s = *sb;
- for (uint i = 0; i < s->bufferLength; i++)
+ for (unsigned i = 0; i < s->bufferLength; i++)
{
s->buff[i] = 0.f;
}