ref: e4cc50bddd8707dc8feffc623eda092a86e03d8e
parent: 1639dd6badc1c236034e3cac5b4da2f209b040a0
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Nov 12 22:44:50 EST 2024
remove bitvector ops
--- a/bitreverse.c
+++ /dev/null
@@ -1,16 +1,0 @@
-#include "platform.h"
-
-uint32_t
-__builtin_bitreverse32(uint32_t x)
-{
- uint32_t m;
-
- x = (x >> 16) | (x << 16); m = 0xff00ff00;
- x = ((x & m) >> 8) | ((x & ~m) << 8);
- m = 0xf0f0f0f0;
- x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
- x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
- x = ((x & m) >> 1) | ((x & ~m) << 1);
-
- return x;
-}
--- a/bitvector-ops.c
+++ /dev/null
@@ -1,447 +1,0 @@
-#include "llt.h"
-
-#define ONES32 ((uint32_t)0xffffffffUL)
-
-// greater than this # of words we use malloc instead of alloca
-#define MALLOC_CUTOFF 2000
-
-// shift all bits in a long bit vector
-// n is # of int32s to consider, s is shift distance
-// lowest bit-index is bit 0 of word 0
-// TODO: handle boundary case of shift distance >= data size?
-void
-bitvector_shr(uint32_t *b, size_t n, uint32_t s)
-{
- uint32_t i;
- if(s == 0 || n == 0)
- return;
- i = s >> 5;
- if(i){
- n -= i;
- memmove(b, &b[i], n*4);
- memset(&b[n], 0, i*4);
- s &= 31;
- }
- for(i = 0; i < n-1; i++)
- b[i] = (b[i] >> s) | (b[i+1] << (32-s));
- b[i] >>= s;
-}
-
-// out-of-place version, good for re-aligning a strided submatrix to
-// linear representation when a copy is needed
-// assumes that dest has the same amount of space as source, even if it
-// wouldn't have been necessary to hold the shifted bits
-void
-bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s)
-{
- uint32_t i, j;
- if(n == 0)
- return;
- if(s == 0){
- memmove(dest, b, n*4);
- return;
- }
- j = s >> 5;
- if(j){
- n -= j;
- memset(&dest[n], 0, j*4);
- s &= 31;
- b = &b[j];
- }
- for(i = 0; i < n-1; i++)
- dest[i] = (b[i] >> s) | (b[i+1] << (32-s));
- dest[i] = b[i]>>s;
-}
-
-void
-bitvector_shl(uint32_t *b, size_t n, uint32_t s)
-{
- uint32_t i, scrap = 0, temp;
- if(s == 0 || n == 0)
- return;
- i = s >> 5;
- if(i){
- n -= i;
- memmove(&b[i], b, n*4);
- memset(b, 0, i*4);
- s &= 31;
- b = &b[i];
- }
- for(i = 0; i < n; i++){
- temp = (b[i] << s) | scrap;
- scrap = b[i] >> (32-s);
- b[i] = temp;
- }
-}
-
-// if dest has more space than source, set scrap to true to keep the
-// top bits that would otherwise be shifted out
-void
-bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap)
-{
- uint32_t i, j, sc = 0;
- if(n == 0)
- return;
- if(s == 0){
- memmove(dest, b, n*4);
- return;
- }
- j = s >> 5;
- if(j){
- n -= j;
- memset(dest, 0, j*4);
- s &= 31;
- dest = &dest[j];
- }
- for(i = 0; i < n; i++){
- dest[i] = (b[i] << s) | sc;
- sc = b[i] >> (32-s);
- }
- if(scrap)
- dest[i] = sc;
-}
-
-// set nbits to c, starting at given bit offset
-// assumes offs < 32
-void
-bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = (lomask(nbits)<<offs);
- if(c)
- b[0] |= mask;
- else
- b[0] &= ~mask;
- return;
- }
-
- mask = lomask(offs);
- if(c)
- b[0] |= ~mask;
- else
- b[0] &= mask;
-
- mask = c ? ONES32 : 0;
-
- for(i = 1; i < nw-1; i++)
- b[i] = mask;
-
- tail = (offs+nbits) & 31;
- if(tail == 0)
- b[i] = mask;
- else{
- mask = lomask(tail);
- if(c)
- b[i] |= mask;
- else
- b[i] &= ~mask;
- }
-}
-
-void
-bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = lomask(nbits)<<offs;
- b[0] ^= mask;
- return;
- }
-
- mask = ~lomask(offs);
- b[0] ^= mask;
-
- for(i = 1; i < nw-1; i++)
- b[i] = ~b[i];
-
- tail = (offs+nbits)&31;
- if(tail == 0)
- b[i] = ~b[i];
- else{
- mask = lomask(tail);
- b[i] ^= mask;
- }
-}
-
-// constant-space bit vector copy in a single pass, with arbitrary
-// offsets and lengths. to get this right, there are 16 cases to handle!
-#define BITVECTOR_COPY_OP(name, OP) \
-void \
-bitvector_##name(uint32_t *dest, uint32_t doffs, uint32_t *src, uint32_t soffs, uint32_t nbits) \
-{ \
- uint32_t i, s, nw, tail, snw, mask, scrap; \
- if(nbits == 0) \
- return; \
- nw = (doffs+nbits+31)>>5; \
- if(soffs == doffs){ \
- if(nw == 1){ \
- mask = (lomask(nbits)<<doffs); \
- dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
- return; \
- } \
- mask = ~lomask(doffs); \
- dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
- for(i = 1; i < nw-1; i++) \
- dest[i] = OP(src[i]); \
- tail = (doffs+nbits)&31; \
- if(tail == 0) \
- dest[i] = src[i]; \
- else { \
- mask = lomask(tail); \
- dest[i] = (dest[i] & ~mask) | (OP(src[i]) & mask); \
- } \
- return; \
- } \
- snw = (soffs+nbits+31)>>5; \
- if(soffs < doffs){ \
- s = doffs-soffs; \
- if(nw == 1){ \
- mask = lomask(nbits) << doffs; \
- dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
- return; \
- } \
- mask = ~lomask(doffs); \
- dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
- scrap = OP(src[0])>>(32-s); \
- for(i = 1; i < snw-1; i++){ \
- dest[i] = (OP(src[i])<<s) | scrap; \
- scrap = OP(src[i])>>(32-s); \
- } \
- tail = (doffs+nbits)&31; \
- mask = tail ? lomask(tail) : ONES32; \
- if(snw == nw) \
- dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s)|scrap) & mask); \
- else{ /* snw < nw */ \
- if(snw == 1) \
- dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s) | scrap) & mask); \
- else{ \
- dest[i] = (OP(src[i])<<s) | scrap; \
- scrap = OP(src[i])>>(32-s); \
- i++; \
- dest[i] = (dest[i] & ~mask) | (scrap & mask); \
- } \
- } \
- }else{ \
- s = soffs-doffs; \
- if(snw == 1){ \
- mask = (lomask(nbits)<<doffs); \
- dest[0] = (dest[0] & ~mask) | ((OP(src[0])>>s) & mask); \
- return; \
- } \
- if(nw == 1){ \
- mask = (lomask(nbits)<<doffs); \
- dest[0] = (dest[0] & ~mask) | \
- (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
- return; \
- } \
- mask = ~lomask(doffs); \
- dest[0] = (dest[0] & ~mask) | (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
- for(i = 1; i < nw-1; i++) \
- dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \
- tail = (doffs+nbits)&31; \
- mask = tail ? lomask(tail) : ONES32; \
- if(snw == nw){ \
- dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \
- } \
- else /* snw > nw */ { \
- dest[i] = (dest[i] & ~mask) | \
- (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \
- } \
- } \
-}
-
-#define BV_COPY(a) (a)
-#define BV_NOT(a) (~(a))
-BITVECTOR_COPY_OP(copy, BV_COPY)
-BITVECTOR_COPY_OP(not_to, BV_NOT)
-
-// copy from source to dest while reversing bit-order
-// assumes dest offset == 0
-// assumes source and dest don't overlap
-// assumes offset < 32
-void
-bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits)
-{
- uint32_t i, nw, tail;
-
- if(nbits == 0)
- return;
-
- nw = (soffs+nbits+31)>>5;
- // first, reverse the words while reversing bit order within each word
- for(i = 0; i < nw/2; i++){
- dest[i] = __builtin_bitreverse32(src[nw-i-1]);
- dest[nw-i-1] = __builtin_bitreverse32(src[i]);
- }
- if(nw&0x1)
- dest[i] = __builtin_bitreverse32(src[i]);
-
- tail = (soffs+nbits)&31;
- if(tail)
- bitvector_shr(dest, nw, 32-tail);
-}
-
-void
-bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, *temp, a[MALLOC_CUTOFF];
-
- if(nbits == 0)
- return;
-
- nw = (offs+nbits+31)>>5;
- temp = (nw > MALLOC_CUTOFF) ? LLT_ALLOC(nw*4) : a;
- for(i = 0; i < nw/2; i++){
- temp[i] = __builtin_bitreverse32(b[nw-i-1]);
- temp[nw-i-1] = __builtin_bitreverse32(b[i]);
- }
- if(nw & 1)
- temp[i] = __builtin_bitreverse32(b[i]);
-
- tail = (offs+nbits)&31;
- bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
- if(nw > MALLOC_CUTOFF)
- LLT_FREE(temp);
-}
-
-uint64_t
-bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits)
-{
- size_t i, nw;
- uint32_t ntail;
- uint64_t ans;
-
- if(nbits == 0)
- return 0;
- nw = ((uint64_t)offs+nbits+31)>>5;
-
- if(nw == 1)
- return __builtin_popcount(b[0] & (lomask(nbits)<<offs));
-
- ans = __builtin_popcount(b[0]>>offs); // first end cap
-
- for(i = 1; i < nw-1; i++)
- ans += __builtin_popcount(b[i]);
-
- ntail = (offs + (uint32_t)nbits) & 31;
- ans += __builtin_popcount(b[i] & (ntail > 0 ? lomask(ntail) : ONES32)); // last end cap
-
- return ans;
-}
-
-uint32_t
-bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return 0;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = (lomask(nbits)<<offs);
- if((b[0] & mask) != mask)
- return 1;
- return 0;
- }
-
- mask = ~lomask(offs);
- if((b[0] & mask) != mask)
- return 1;
-
- for(i = 1; i < nw-1; i++)
- if(b[i] != ONES32)
- return 1;
-
- tail = (offs+nbits)&31;
- if(tail == 0)
- return b[i] != ONES32;
- mask = lomask(tail);
- return (b[i] & mask) != mask;
-}
-
-uint32_t
-bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return 0;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = lomask(nbits)<<offs;
- return (b[0] & mask) != 0;
- }
-
- mask = ~lomask(offs);
- if((b[0] & mask) != 0)
- return 1;
-
- for(i = 1; i < nw-1; i++){
- if(b[i] != 0)
- return 1;
- }
-
- tail = (offs+nbits)&31;
- if(tail == 0)
- return b[i] != 0;
- return (b[i] & lomask(tail)) != 0;
-}
-
-static void
-adjust_offset_to(uint32_t *dest, uint32_t *src, uint32_t nw, uint32_t soffs, uint32_t newoffs)
-{
- if(newoffs > soffs)
- bitvector_shl_to(dest, src, nw, newoffs-soffs, 1);
- else
- bitvector_shr_to(dest, src, nw, soffs-newoffs);
-}
-
-#define BITVECTOR_BINARY_OP_TO(opname, OP) \
-void \
-bitvector_##opname##_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits) \
-{ \
- uint32_t nw = (doffs+nbits+31)>>5; \
- uint32_t atmp[MALLOC_CUTOFF+1]; \
- uint32_t *temp = nw>MALLOC_CUTOFF ? LLT_ALLOC((nw+1)*4) : atmp; \
- uint32_t i, anw, bnw; \
- if(aoffs == boffs){ \
- anw = (aoffs+nbits+31)>>5; \
- }else if(aoffs == doffs){ \
- bnw = (boffs+nbits+31)>>5; \
- adjust_offset_to(temp, b, bnw, boffs, aoffs); \
- b = temp; \
- anw = nw; \
- }else{ \
- anw = (aoffs+nbits+31)>>5; \
- bnw = (boffs+nbits+31)>>5; \
- adjust_offset_to(temp, a, anw, aoffs, boffs); \
- a = temp; \
- aoffs = boffs; \
- anw = bnw; \
- } \
- for(i = 0; i < anw; i++) \
- temp[i] = OP(a[i], b[i]); \
- bitvector_copy(dest, doffs, temp, aoffs, nbits); \
- if(nw>MALLOC_CUTOFF) \
- LLT_FREE(temp); \
-}
-
-#define BV_AND(a, b) ((a)&(b))
-#define BV_OR(a, b) ((a)|(b))
-#define BV_XOR(a, b) ((a)^(b))
-BITVECTOR_BINARY_OP_TO(and, BV_AND)
-BITVECTOR_BINARY_OP_TO(or, BV_OR)
-BITVECTOR_BINARY_OP_TO(xor, BV_XOR)
--- a/bitvector.h
+++ b/bitvector.h
@@ -1,24 +1,5 @@
-// a mask with n set lo or hi bits
-#define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1)
-
uint32_t *bitvector_new(uint64_t n, int initzero);
uint32_t *bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero);
size_t bitvector_nwords(uint64_t nbits);
void bitvector_set(uint32_t *b, uint64_t n, uint32_t c);
uint32_t bitvector_get(uint32_t *b, uint64_t n);
-void bitvector_shr(uint32_t *b, size_t n, uint32_t s);
-void bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s);
-void bitvector_shl(uint32_t *b, size_t n, uint32_t s);
-void bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap);
-void bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits);
-void bitvector_copy(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
-void bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits);
-void bitvector_not_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
-void bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits);
-void bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits);
-void bitvector_and_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-void bitvector_or_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-void bitvector_xor_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-uint64_t bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits);
-uint32_t bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits);
-uint32_t bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits);
--- a/meson.build
+++ b/meson.build
@@ -57,7 +57,6 @@
'3rd/utf/runeistype.c',
'3rd/utf/runetotype.c',
'3rd/utf/utfnlen.c',
- 'bitvector-ops.c',
'bitvector.c',
'builtins.c',
'cvalues.c',
@@ -85,20 +84,6 @@
]
cc = meson.get_compiler('c')
-
-bitreverse32_code = '''#include <stdio.h>
-int main(int argc, char **argv) { return __builtin_bitreverse32(argc); }
-'''
-have_bitreverse32 = cc.links(bitreverse32_code, name: '__builtin_bitreverse32')
-
-if have_bitreverse32
- add_project_arguments(
- '-DHAVE_BITREVERSE32',
- language: 'c',
- )
-else
- src += ['bitreverse.c']
-endif
if cc.get_id() == 'clang'
add_project_arguments(
--- a/mkfile
+++ b/mkfile
@@ -13,8 +13,6 @@
3rd/mt19937-64.$O\
3rd/wcwidth.$O\
3rd/spooky.$O\
- bitreverse.$O\
- bitvector-ops.$O\
bitvector.$O\
builtins.$O\
cvalues.$O\
@@ -30,7 +28,6 @@
iostream.$O\
main_plan9.$O\
operators.$O\
- popcount.$O\
print.$O\
ptrhash.$O\
random.$O\
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -119,6 +119,3 @@
typedef enum { false, true } bool;
int wcwidth(Rune c);
-
-uint32_t __builtin_popcount(uint32_t);
-uint32_t __builtin_bitreverse32(uint32_t x);
--- a/popcount.c
+++ /dev/null
@@ -1,12 +1,0 @@
-#include "platform.h"
-
-uint32_t
-__builtin_popcount(uint32_t b)
-{
- b = b - ((b>>1)&0x55555555);
- b = ((b>>2)&0x33333333) + (b&0x33333333);
- b = ((b>>4)+b)&0x0f0f0f0f;
- b += (b>>8);
- b += (b>>16);
- return b & 0x3f;
-}