ref: 8a4ce82e5f10aa333f411850304a5a2d2e455dc1
parent: 352ab3f5f8eceb576afdfccbde7ab7cf93f69433
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Oct 29 00:31:04 EDT 2024
use faster popcount and bit reversal if available
--- /dev/null
+++ b/bitreverse.c
@@ -1,0 +1,16 @@
+#include "platform.h"
+
+uint32_t
+__builtin_bitreverse32(uint32_t x)
+{
+ uint32_t m;
+
+ x = (x >> 16) | (x << 16); m = 0xff00ff00;
+ x = ((x & m) >> 8) | ((x & ~m) << 8);
+ m = 0xf0f0f0f0;
+ x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
+ x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
+ x = ((x & m) >> 1) | ((x & ~m) << 1);
+
+ return x;
+}
--- a/bitvector-ops.c
+++ b/bitvector-ops.c
@@ -5,36 +5,6 @@
// greater than this # of words we use malloc instead of alloca
#define MALLOC_CUTOFF 2000
-static inline
-uint32_t count_bits(uint32_t b)
-{
- b = b - ((b>>1)&0x55555555);
- b = ((b>>2)&0x33333333) + (b&0x33333333);
- b = ((b>>4)+b)&0x0f0f0f0f;
- b += (b>>8);
- b += (b>>16);
- return b & 0x3f;
-}
-
-uint32_t
-bitreverse(uint32_t x)
-{
- uint32_t m;
-
-#ifdef __INTEL_COMPILER
- x = _bswap(x);
-#else
- x = (x >> 16) | (x << 16); m = 0xff00ff00;
- x = ((x & m) >> 8) | ((x & ~m) << 8);
-#endif
- m = 0xf0f0f0f0;
- x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
- x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
- x = ((x & m) >> 1) | ((x & ~m) << 1);
-
- return x;
-}
-
// shift all bits in a long bit vector
// n is # of int32s to consider, s is shift distance
// lowest bit-index is bit 0 of word 0
@@ -311,11 +281,11 @@
nw = (soffs+nbits+31)>>5;
// first, reverse the words while reversing bit order within each word
for(i = 0; i < nw/2; i++){
- dest[i] = bitreverse(src[nw-i-1]);
- dest[nw-i-1] = bitreverse(src[i]);
+ dest[i] = __builtin_bitreverse32(src[nw-i-1]);
+ dest[nw-i-1] = __builtin_bitreverse32(src[i]);
}
if(nw&0x1)
- dest[i] = bitreverse(src[i]);
+ dest[i] = __builtin_bitreverse32(src[i]);
tail = (soffs+nbits)&31;
if(tail)
@@ -333,11 +303,11 @@
nw = (offs+nbits+31)>>5;
temp = (nw > MALLOC_CUTOFF) ? LLT_ALLOC(nw*4) : a;
for(i = 0; i < nw/2; i++){
- temp[i] = bitreverse(b[nw-i-1]);
- temp[nw-i-1] = bitreverse(b[i]);
+ temp[i] = __builtin_bitreverse32(b[nw-i-1]);
+ temp[nw-i-1] = __builtin_bitreverse32(b[i]);
}
if(nw & 1)
- temp[i] = bitreverse(b[i]);
+ temp[i] = __builtin_bitreverse32(b[i]);
tail = (offs+nbits)&31;
bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
@@ -357,15 +327,15 @@
nw = ((uint64_t)offs+nbits+31)>>5;
if(nw == 1)
- return count_bits(b[0] & (lomask(nbits)<<offs));
+ return __builtin_popcount(b[0] & (lomask(nbits)<<offs));
- ans = count_bits(b[0]>>offs); // first end cap
+ ans = __builtin_popcount(b[0]>>offs); // first end cap
for(i = 1; i < nw-1; i++)
- ans += count_bits(b[i]);
+ ans += __builtin_popcount(b[i]);
ntail = (offs + (uint32_t)nbits) & 31;
- ans += count_bits(b[i] & (ntail > 0 ? lomask(ntail) : ONES32)); // last end cap
+ ans += __builtin_popcount(b[i] & (ntail > 0 ? lomask(ntail) : ONES32)); // last end cap
return ans;
}
--- a/bitvector.c
+++ b/bitvector.c
@@ -72,69 +72,3 @@
{
return b[n>>5] & (1<<(n&31));
}
-
-static int
-ntz(uint32_t x)
-{
- int n;
-
- if(x == 0)
- return 32;
- n = 1;
- if((x & 0x0000FFFF) == 0){
- n = n +16;
- x = x >>16;
- }
- if((x & 0x000000FF) == 0){
- n = n + 8;
- x = x >> 8;
- }
- if((x & 0x0000000F) == 0){
- n = n + 4;
- x = x >> 4;
- }
- if((x & 0x00000003) == 0){
- n = n + 2;
- x = x >> 2;
- }
- return n - (x & 1);
-}
-
-// given a bitvector of n bits, starting at bit n0 find the next
-// set bit, including n0.
-// returns n if no set bits.
-uint32_t
-bitvector_next(uint32_t *b, uint64_t n0, uint64_t n)
-{
- if(n0 >= n)
- return n;
-
- uint32_t i = n0>>5;
- uint32_t nb = n0&31;
- uint32_t nw = (n+31)>>5;
- uint32_t w;
-
- if(i < nw-1 || (n&31) == 0)
- w = b[i]>>nb;
- else
- w = (b[i]&lomask(n&31)) >> nb;
- if(w != 0)
- return ntz(w) + n0;
- if(i == nw-1)
- return n;
- i++;
- while(i < nw-1){
- w = b[i];
- if(w != 0)
- return ntz(w) + (i<<5);
- i++;
- }
- w = b[i];
- nb = n&31;
- i = ntz(w);
- if(nb == 0)
- return i + (n-32);
- if(i >= nb)
- return n;
- return i + (n-nb);
-}
--- a/bitvector.h
+++ b/bitvector.h
@@ -1,13 +1,11 @@
// a mask with n set lo or hi bits
#define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1)
-uint32_t bitreverse(uint32_t x);
uint32_t *bitvector_new(uint64_t n, int initzero);
uint32_t *bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero);
size_t bitvector_nwords(uint64_t nbits);
void bitvector_set(uint32_t *b, uint64_t n, uint32_t c);
uint32_t bitvector_get(uint32_t *b, uint64_t n);
-uint32_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n);
void bitvector_shr(uint32_t *b, size_t n, uint32_t s);
void bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s);
void bitvector_shl(uint32_t *b, size_t n, uint32_t s);
--- a/meson.build
+++ b/meson.build
@@ -83,6 +83,20 @@
cc = meson.get_compiler('c')
+bitreverse32_code = '''#include <stdio.h>
+int main(int argc, char **argv) { return __builtin_bitreverse32(argc); }
+'''
+have_bitreverse32 = cc.links(bitreverse32_code, name: '__builtin_bitreverse32')
+
+if have_bitreverse32
+ add_project_arguments(
+ '-DHAVE_BITREVERSE32',
+ language: 'c',
+ )
+else
+ src += ['bitreverse.c']
+endif
+
if cc.get_id() == 'clang'
add_project_arguments(
'-D__wchar_t=__please_no_wchar_t_thank_you',
--- a/mkfile
+++ b/mkfile
@@ -12,6 +12,7 @@
OFILES=\
3rd/mt19937-64.$O\
3rd/wcwidth.$O\
+ bitreverse.$O\
bitvector-ops.$O\
bitvector.$O\
builtins.$O\
@@ -27,6 +28,7 @@
iostream.$O\
main_plan9.$O\
operators.$O\
+ popcount.$O\
print.$O\
ptrhash.$O\
random.$O\
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -115,3 +115,6 @@
typedef enum { false, true } bool;
int wcwidth(Rune c);
+
+uint32_t __builtin_popcount(uint32_t);
+uint32_t __builtin_bitreverse32(uint32_t x);
--- /dev/null
+++ b/popcount.c
@@ -1,0 +1,12 @@
+#include "platform.h"
+
+uint32_t
+__builtin_popcount(uint32_t b)
+{
+ b = b - ((b>>1)&0x55555555);
+ b = ((b>>2)&0x33333333) + (b&0x33333333);
+ b = ((b>>4)+b)&0x0f0f0f0f;
+ b += (b>>8);
+ b += (b>>16);
+ return b & 0x3f;
+}
--- a/posix/platform.h
+++ b/posix/platform.h
@@ -71,3 +71,7 @@
#include "mp.h"
#include "utf.h"
+
+#ifndef HAVE_BITREVERSE32
+uint32_t __builtin_bitreverse32(uint32_t x);
+#endif