shithub: femtolisp

Download patch

ref: 8a4ce82e5f10aa333f411850304a5a2d2e455dc1
parent: 352ab3f5f8eceb576afdfccbde7ab7cf93f69433
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Oct 29 00:31:04 EDT 2024

use faster popcount and bit reversal if available

--- /dev/null
+++ b/bitreverse.c
@@ -1,0 +1,16 @@
+#include "platform.h"
+
+uint32_t
+__builtin_bitreverse32(uint32_t x)
+{
+	uint32_t m;
+
+	x = (x >> 16)      | (x << 16);       m = 0xff00ff00;
+	x = ((x & m) >> 8) | ((x & ~m) << 8);
+	m = 0xf0f0f0f0;
+	x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
+	x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
+	x = ((x & m) >> 1) | ((x & ~m) << 1);
+
+	return x;
+}
--- a/bitvector-ops.c
+++ b/bitvector-ops.c
@@ -5,36 +5,6 @@
 // greater than this # of words we use malloc instead of alloca
 #define MALLOC_CUTOFF 2000
 
-static inline
-uint32_t count_bits(uint32_t b)
-{
-	b = b - ((b>>1)&0x55555555);
-	b = ((b>>2)&0x33333333) + (b&0x33333333);
-	b = ((b>>4)+b)&0x0f0f0f0f;
-	b += (b>>8);
-	b += (b>>16);
-	return b & 0x3f;
-}
-
-uint32_t
-bitreverse(uint32_t x)
-{
-	uint32_t m;
-
-#ifdef __INTEL_COMPILER
-	x = _bswap(x);
-#else
-	x = (x >> 16)      | (x << 16);       m = 0xff00ff00;
-	x = ((x & m) >> 8) | ((x & ~m) << 8);
-#endif
-	m = 0xf0f0f0f0;
-	x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
-	x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
-	x = ((x & m) >> 1) | ((x & ~m) << 1);
-
-	return x;
-}
-
 // shift all bits in a long bit vector
 // n is # of int32s to consider, s is shift distance
 // lowest bit-index is bit 0 of word 0
@@ -311,11 +281,11 @@
 	nw = (soffs+nbits+31)>>5;
 	// first, reverse the words while reversing bit order within each word
 	for(i = 0; i < nw/2; i++){
-		dest[i] = bitreverse(src[nw-i-1]);
-		dest[nw-i-1] = bitreverse(src[i]);
+		dest[i] = __builtin_bitreverse32(src[nw-i-1]);
+		dest[nw-i-1] = __builtin_bitreverse32(src[i]);
 	}
 	if(nw&0x1)
-		dest[i] = bitreverse(src[i]);
+		dest[i] = __builtin_bitreverse32(src[i]);
 
 	tail = (soffs+nbits)&31;
 	if(tail)
@@ -333,11 +303,11 @@
 	nw = (offs+nbits+31)>>5;
 	temp = (nw > MALLOC_CUTOFF) ? LLT_ALLOC(nw*4) : a;
 	for(i = 0; i < nw/2; i++){
-		temp[i]	= bitreverse(b[nw-i-1]);
-		temp[nw-i-1] = bitreverse(b[i]);
+		temp[i]	= __builtin_bitreverse32(b[nw-i-1]);
+		temp[nw-i-1] = __builtin_bitreverse32(b[i]);
 	}
 	if(nw & 1)
-		temp[i] = bitreverse(b[i]);
+		temp[i] = __builtin_bitreverse32(b[i]);
 
 	tail = (offs+nbits)&31;
 	bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
@@ -357,15 +327,15 @@
 	nw = ((uint64_t)offs+nbits+31)>>5;
 
 	if(nw == 1)
-		return count_bits(b[0] & (lomask(nbits)<<offs));
+		return __builtin_popcount(b[0] & (lomask(nbits)<<offs));
 
-	ans = count_bits(b[0]>>offs);  // first end cap
+	ans = __builtin_popcount(b[0]>>offs);  // first end cap
 
 	for(i = 1; i < nw-1; i++)
-		ans += count_bits(b[i]);
+		ans += __builtin_popcount(b[i]);
 
 	ntail = (offs + (uint32_t)nbits) & 31;
-	ans += count_bits(b[i] & (ntail > 0 ? lomask(ntail) : ONES32));  // last end cap
+	ans += __builtin_popcount(b[i] & (ntail > 0 ? lomask(ntail) : ONES32));  // last end cap
 
 	return ans;
 }
--- a/bitvector.c
+++ b/bitvector.c
@@ -72,69 +72,3 @@
 {
 	return b[n>>5] & (1<<(n&31));
 }
-
-static int
-ntz(uint32_t x)
-{
-	int n;
-
-	if(x == 0)
-		return 32;
-	n = 1;
-	if((x & 0x0000FFFF) == 0){
-		n = n +16;
-		x = x >>16;
-	}
-	if((x & 0x000000FF) == 0){
-		n = n + 8;
-		x = x >> 8;
-	}
-	if((x & 0x0000000F) == 0){
-		n = n + 4;
-		x = x >> 4;
-	}
-	if((x & 0x00000003) == 0){
-		n = n + 2;
-		x = x >> 2;
-	}
-	return n - (x & 1);
-}
-
-// given a bitvector of n bits, starting at bit n0 find the next
-// set bit, including n0.
-// returns n if no set bits.
-uint32_t
-bitvector_next(uint32_t *b, uint64_t n0, uint64_t n)
-{
-	if(n0 >= n)
-		return n;
-
-	uint32_t i = n0>>5;
-	uint32_t nb = n0&31;
-	uint32_t nw = (n+31)>>5;
-	uint32_t w;
-
-	if(i < nw-1 || (n&31) == 0)
-		w = b[i]>>nb;
-	else
-		w = (b[i]&lomask(n&31)) >> nb;
-	if(w != 0)
-		return ntz(w) + n0;
-	if(i == nw-1)
-		return n;
-	i++;
-	while(i < nw-1){
-		w = b[i];
-		if(w != 0)
-			return ntz(w) + (i<<5);
-		i++;
-	}
-	w = b[i];
-	nb = n&31;
-	i = ntz(w);
-	if(nb == 0)
-		return i + (n-32);
-	if(i >= nb)
-		return n;
-	return i + (n-nb);
-}
--- a/bitvector.h
+++ b/bitvector.h
@@ -1,13 +1,11 @@
 // a mask with n set lo or hi bits
 #define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1)
 
-uint32_t bitreverse(uint32_t x);
 uint32_t *bitvector_new(uint64_t n, int initzero);
 uint32_t *bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero);
 size_t bitvector_nwords(uint64_t nbits);
 void bitvector_set(uint32_t *b, uint64_t n, uint32_t c);
 uint32_t bitvector_get(uint32_t *b, uint64_t n);
-uint32_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n);
 void bitvector_shr(uint32_t *b, size_t n, uint32_t s);
 void bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s);
 void bitvector_shl(uint32_t *b, size_t n, uint32_t s);
--- a/meson.build
+++ b/meson.build
@@ -83,6 +83,20 @@
 
 cc = meson.get_compiler('c')
 
+bitreverse32_code = '''#include <stdio.h>
+int main(int argc, char **argv) { return __builtin_bitreverse32(argc); }
+'''
+have_bitreverse32 = cc.links(bitreverse32_code, name: '__builtin_bitreverse32')
+
+if have_bitreverse32
+	add_project_arguments(
+		'-DHAVE_BITREVERSE32',
+		language: 'c',
+	)
+else
+	src += ['bitreverse.c']
+endif
+
 if cc.get_id() == 'clang'
 	add_project_arguments(
 		'-D__wchar_t=__please_no_wchar_t_thank_you',
--- a/mkfile
+++ b/mkfile
@@ -12,6 +12,7 @@
 OFILES=\
 	3rd/mt19937-64.$O\
 	3rd/wcwidth.$O\
+	bitreverse.$O\
 	bitvector-ops.$O\
 	bitvector.$O\
 	builtins.$O\
@@ -27,6 +28,7 @@
 	iostream.$O\
 	main_plan9.$O\
 	operators.$O\
+	popcount.$O\
 	print.$O\
 	ptrhash.$O\
 	random.$O\
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -115,3 +115,6 @@
 typedef enum { false, true } bool;
 
 int wcwidth(Rune c);
+
+uint32_t __builtin_popcount(uint32_t);
+uint32_t __builtin_bitreverse32(uint32_t x);
--- /dev/null
+++ b/popcount.c
@@ -1,0 +1,12 @@
+#include "platform.h"
+
+uint32_t
+__builtin_popcount(uint32_t b)
+{
+	b = b - ((b>>1)&0x55555555);
+	b = ((b>>2)&0x33333333) + (b&0x33333333);
+	b = ((b>>4)+b)&0x0f0f0f0f;
+	b += (b>>8);
+	b += (b>>16);
+	return b & 0x3f;
+}
--- a/posix/platform.h
+++ b/posix/platform.h
@@ -71,3 +71,7 @@
 
 #include "mp.h"
 #include "utf.h"
+
+#ifndef HAVE_BITREVERSE32
+uint32_t __builtin_bitreverse32(uint32_t x);
+#endif