ref: ca17b8e78d4ff54ecfca3985c7acc16a09f543dc
parent: e1bc44b385260be39793806042eb66a56d8ec646
author: mag <mag-one@autistici.org>
date: Sun May 21 15:08:04 EDT 2023
llt stuff moved into the project root - lltinit.c renamed llt.c - int2str.c moved into llt.c.
--- a/3rd/wcwidth.c
+++ b/3rd/wcwidth.c
@@ -15,7 +15,7 @@
* https://github.com/termux/termux-packages/tree/master/packages/libandroid-support
*/
-#include "llt.h"
+#include "../llt.h"
struct width_interval {
int start;
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
TARG=flisp
CFLAGS?=-O2 -g
-CFLAGS+=-Wall -Wextra -Wno-parentheses -std=c99 -I3rd -Illt -Iposix
+CFLAGS+=-Wall -Wextra -Wno-parentheses -std=c99 -I3rd -Iposix
LDFLAGS?=
OBJS=\
@@ -21,18 +21,17 @@
print.o\
equal.o\
types.o\
- llt/bitvector-ops.o\
- llt/bitvector.o\
- llt/dump.o\
- llt/hashing.o\
- llt/htable.o\
- llt/int2str.o\
- llt/ios.o\
- llt/lltinit.o\
- llt/ptrhash.o\
- llt/random.o\
- llt/timefuncs.o\
- llt/utf8.o\
+ bitvector-ops.o\
+ bitvector.o\
+ dump.o\
+ hashing.o\
+ htable.o\
+ ios.o\
+ llt.o\
+ ptrhash.o\
+ random.o\
+ timefuncs.o\
+ utf8.o\
3rd/mp/mpadd.o\
3rd/mp/mpaux.o\
3rd/mp/mpcmp.o\
--- /dev/null
+++ b/bitvector-ops.c
@@ -1,0 +1,477 @@
+#include "llt.h"
+
+#define ONES32 ((uint32_t)0xffffffffUL)
+
+// greater than this # of words we use malloc instead of alloca
+#define MALLOC_CUTOFF 2000
+
+static inline
+uint32_t count_bits(uint32_t b)
+{
+ b = b - ((b>>1)&0x55555555);
+ b = ((b>>2)&0x33333333) + (b&0x33333333);
+ b = ((b>>4)+b)&0x0f0f0f0f;
+ b += (b>>8);
+ b += (b>>16);
+ return b & 0x3f;
+}
+
+uint32_t
+bitreverse(uint32_t x)
+{
+ uint32_t m;
+
+#ifdef __INTEL_COMPILER
+ x = _bswap(x);
+#else
+ x = (x >> 16) | (x << 16); m = 0xff00ff00;
+ x = ((x & m) >> 8) | ((x & ~m) << 8);
+#endif
+ m = 0xf0f0f0f0;
+ x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
+ x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
+ x = ((x & m) >> 1) | ((x & ~m) << 1);
+
+ return x;
+}
+
+// shift all bits in a long bit vector
+// n is # of int32s to consider, s is shift distance
+// lowest bit-index is bit 0 of word 0
+// TODO: handle boundary case of shift distance >= data size?
+void
+bitvector_shr(uint32_t *b, size_t n, uint32_t s)
+{
+ uint32_t i;
+ if(s == 0 || n == 0)
+ return;
+ i = s >> 5;
+ if(i){
+ n -= i;
+ memmove(b, &b[i], n*4);
+ memset(&b[n], 0, i*4);
+ s &= 31;
+ }
+ for(i = 0; i < n-1; i++)
+ b[i] = (b[i] >> s) | (b[i+1] << (32-s));
+ b[i] >>= s;
+}
+
+// out-of-place version, good for re-aligning a strided submatrix to
+// linear representation when a copy is needed
+// assumes that dest has the same amount of space as source, even if it
+// wouldn't have been necessary to hold the shifted bits
+void
+bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s)
+{
+ uint32_t i, j;
+ if(n == 0)
+ return;
+ if(s == 0){
+ memmove(dest, b, n*4);
+ return;
+ }
+ j = s >> 5;
+ if(j){
+ n -= j;
+ memset(&dest[n], 0, j*4);
+ s &= 31;
+ b = &b[j];
+ }
+ for(i = 0; i < n-1; i++)
+ dest[i] = (b[i] >> s) | (b[i+1] << (32-s));
+ dest[i] = b[i]>>s;
+}
+
+void
+bitvector_shl(uint32_t *b, size_t n, uint32_t s)
+{
+ uint32_t i, scrap = 0, temp;
+ if(s == 0 || n == 0)
+ return;
+ i = s >> 5;
+ if(i){
+ n -= i;
+ memmove(&b[i], b, n*4);
+ memset(b, 0, i*4);
+ s &= 31;
+ b = &b[i];
+ }
+ for(i = 0; i < n; i++){
+ temp = (b[i] << s) | scrap;
+ scrap = b[i] >> (32-s);
+ b[i] = temp;
+ }
+}
+
+// if dest has more space than source, set scrap to true to keep the
+// top bits that would otherwise be shifted out
+void
+bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap)
+{
+ uint32_t i, j, sc = 0;
+ if(n == 0)
+ return;
+ if(s == 0){
+ memmove(dest, b, n*4);
+ return;
+ }
+ j = s >> 5;
+ if(j){
+ n -= j;
+ memset(dest, 0, j*4);
+ s &= 31;
+ dest = &dest[j];
+ }
+ for(i = 0; i < n; i++){
+ dest[i] = (b[i] << s) | sc;
+ sc = b[i] >> (32-s);
+ }
+ if(scrap)
+ dest[i] = sc;
+}
+
+// set nbits to c, starting at given bit offset
+// assumes offs < 32
+void
+bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits)
+{
+ uint32_t i, nw, tail, mask;
+
+ if(nbits == 0)
+ return;
+ nw = (offs+nbits+31)>>5;
+
+ if(nw == 1){
+ mask = (lomask(nbits)<<offs);
+ if(c)
+ b[0] |= mask;
+ else
+ b[0] &= ~mask;
+ return;
+ }
+
+ mask = lomask(offs);
+ if(c)
+ b[0] |= ~mask;
+ else
+ b[0] &= mask;
+
+ mask = c ? ONES32 : 0;
+
+ for(i = 1; i < nw-1; i++)
+ b[i] = mask;
+
+ tail = (offs+nbits) & 31;
+ if(tail == 0)
+ b[i] = mask;
+ else{
+ mask = lomask(tail);
+ if(c)
+ b[i] |= mask;
+ else
+ b[i] &= ~mask;
+ }
+}
+
+void
+bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits)
+{
+ uint32_t i, nw, tail, mask;
+
+ if(nbits == 0)
+ return;
+ nw = (offs+nbits+31)>>5;
+
+ if(nw == 1){
+ mask = lomask(nbits)<<offs;
+ b[0] ^= mask;
+ return;
+ }
+
+ mask = ~lomask(offs);
+ b[0] ^= mask;
+
+ for(i = 1; i < nw-1; i++)
+ b[i] = ~b[i];
+
+ tail = (offs+nbits)&31;
+ if(tail == 0)
+ b[i] = ~b[i];
+ else{
+ mask = lomask(tail);
+ b[i] ^= mask;
+ }
+}
+
+// constant-space bit vector copy in a single pass, with arbitrary
+// offsets and lengths. to get this right, there are 16 cases to handle!
+#define BITVECTOR_COPY_OP(name, OP) \
+void \
+bitvector_##name(uint32_t *dest, uint32_t doffs, uint32_t *src, uint32_t soffs, uint32_t nbits) \
+{ \
+ uint32_t i, s, nw, tail, snw, mask, scrap; \
+ if(nbits == 0) \
+ return; \
+ nw = (doffs+nbits+31)>>5; \
+ if(soffs == doffs){ \
+ if(nw == 1){ \
+ mask = (lomask(nbits)<<doffs); \
+ dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
+ return; \
+ } \
+ mask = ~lomask(doffs); \
+ dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
+ for(i = 1; i < nw-1; i++) \
+ dest[i] = OP(src[i]); \
+ tail = (doffs+nbits)&31; \
+ if(tail == 0) \
+ dest[i] = src[i]; \
+ else { \
+ mask = lomask(tail); \
+ dest[i] = (dest[i] & ~mask) | (OP(src[i]) & mask); \
+ } \
+ return; \
+ } \
+ snw = (soffs+nbits+31)>>5; \
+ if(soffs < doffs){ \
+ s = doffs-soffs; \
+ if(nw == 1){ \
+ mask = lomask(nbits) << doffs; \
+ dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
+ return; \
+ } \
+ mask = ~lomask(doffs); \
+ dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
+ scrap = OP(src[0])>>(32-s); \
+ for(i = 1; i < snw-1; i++){ \
+ dest[i] = (OP(src[i])<<s) | scrap; \
+ scrap = OP(src[i])>>(32-s); \
+ } \
+ tail = (doffs+nbits)&31; \
+ mask = tail ? lomask(tail) : ONES32; \
+ if(snw == nw) \
+ dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s)|scrap) & mask); \
+ else{ /* snw < nw */ \
+ if(snw == 1) \
+ dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s) | scrap) & mask); \
+ else{ \
+ dest[i] = (OP(src[i])<<s) | scrap; \
+ scrap = OP(src[i])>>(32-s); \
+ i++; \
+ dest[i] = (dest[i] & ~mask) | (scrap & mask); \
+ } \
+ } \
+ }else{ \
+ s = soffs-doffs; \
+ if(snw == 1){ \
+ mask = (lomask(nbits)<<doffs); \
+ dest[0] = (dest[0] & ~mask) | ((OP(src[0])>>s) & mask); \
+ return; \
+ } \
+ if(nw == 1){ \
+ mask = (lomask(nbits)<<doffs); \
+ dest[0] = (dest[0] & ~mask) | \
+ (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
+ return; \
+ } \
+ mask = ~lomask(doffs); \
+ dest[0] = (dest[0] & ~mask) | (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
+ for(i = 1; i < nw-1; i++) \
+ dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \
+ tail = (doffs+nbits)&31; \
+ mask = tail ? lomask(tail) : ONES32; \
+ if(snw == nw){ \
+ dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \
+ } \
+ else /* snw > nw */ { \
+ dest[i] = (dest[i] & ~mask) | \
+ (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \
+ } \
+ } \
+}
+
+#define BV_COPY(a) (a)
+#define BV_NOT(a) (~(a))
+BITVECTOR_COPY_OP(copy, BV_COPY)
+BITVECTOR_COPY_OP(not_to, BV_NOT)
+
+// copy from source to dest while reversing bit-order
+// assumes dest offset == 0
+// assumes source and dest don't overlap
+// assumes offset < 32
+void
+bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits)
+{
+ uint32_t i, nw, tail;
+
+ if(nbits == 0)
+ return;
+
+ nw = (soffs+nbits+31)>>5;
+ // first, reverse the words while reversing bit order within each word
+ for(i = 0; i < nw/2; i++){
+ dest[i] = bitreverse(src[nw-i-1]);
+ dest[nw-i-1] = bitreverse(src[i]);
+ }
+ if(nw&0x1)
+ dest[i] = bitreverse(src[i]);
+
+ tail = (soffs+nbits)&31;
+ if(tail)
+ bitvector_shr(dest, nw, 32-tail);
+}
+
+void
+bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits)
+{
+ uint32_t i, nw, tail, *temp, a[MALLOC_CUTOFF];
+
+ if(nbits == 0)
+ return;
+
+ nw = (offs+nbits+31)>>5;
+ temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : a;
+ for(i = 0; i < nw/2; i++){
+ temp[i] = bitreverse(b[nw-i-1]);
+ temp[nw-i-1] = bitreverse(b[i]);
+ }
+ if(nw & 1)
+ temp[i] = bitreverse(b[i]);
+
+ tail = (offs+nbits)&31;
+ bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
+ if(nw > MALLOC_CUTOFF)
+ free(temp);
+}
+
+uint64_t
+bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits)
+{
+ size_t i, nw;
+ uint32_t ntail;
+ uint64_t ans;
+
+ if(nbits == 0)
+ return 0;
+ nw = ((uint64_t)offs+nbits+31)>>5;
+
+ if(nw == 1)
+ return count_bits(b[0] & (lomask(nbits)<<offs));
+
+ ans = count_bits(b[0]>>offs); // first end cap
+
+ for(i = 1; i < nw-1; i++)
+ ans += count_bits(b[i]);
+
+ ntail = (offs + (uint32_t)nbits) & 31;
+ ans += count_bits(b[i] & (ntail > 0 ? lomask(ntail) : ONES32)); // last end cap
+
+ return ans;
+}
+
+uint32_t
+bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits)
+{
+ uint32_t i, nw, tail, mask;
+
+ if(nbits == 0)
+ return 0;
+ nw = (offs+nbits+31)>>5;
+
+ if(nw == 1){
+ mask = (lomask(nbits)<<offs);
+ if((b[0] & mask) != mask)
+ return 1;
+ return 0;
+ }
+
+ mask = ~lomask(offs);
+ if((b[0] & mask) != mask)
+ return 1;
+
+ for(i = 1; i < nw-1; i++)
+ if(b[i] != ONES32)
+ return 1;
+
+ tail = (offs+nbits)&31;
+ if(tail == 0)
+ return b[i] != ONES32;
+ mask = lomask(tail);
+ return (b[i] & mask) != mask;
+}
+
+uint32_t
+bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits)
+{
+ uint32_t i, nw, tail, mask;
+
+ if(nbits == 0)
+ return 0;
+ nw = (offs+nbits+31)>>5;
+
+ if(nw == 1){
+ mask = lomask(nbits)<<offs;
+ return (b[0] & mask) != 0;
+ }
+
+ mask = ~lomask(offs);
+ if((b[0] & mask) != 0)
+ return 1;
+
+ for(i = 1; i < nw-1; i++){
+ if(b[i] != 0)
+ return 1;
+ }
+
+ tail = (offs+nbits)&31;
+ if(tail == 0)
+ return b[i] != 0;
+ return (b[i] & lomask(tail)) != 0;
+}
+
+static void
+adjust_offset_to(uint32_t *dest, uint32_t *src, uint32_t nw, uint32_t soffs, uint32_t newoffs)
+{
+ if(newoffs > soffs)
+ bitvector_shl_to(dest, src, nw, newoffs-soffs, 1);
+ else
+ bitvector_shr_to(dest, src, nw, soffs-newoffs);
+}
+
+#define BITVECTOR_BINARY_OP_TO(opname, OP) \
+void \
+bitvector_##opname##_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits) \
+{ \
+ uint32_t nw = (doffs+nbits+31)>>5; \
+ uint32_t atmp[MALLOC_CUTOFF+1]; \
+ uint32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : atmp; \
+ uint32_t i, anw, bnw; \
+ if(aoffs == boffs){ \
+ anw = (aoffs+nbits+31)>>5; \
+ }else if(aoffs == doffs){ \
+ bnw = (boffs+nbits+31)>>5; \
+ adjust_offset_to(temp, b, bnw, boffs, aoffs); \
+ b = temp; \
+ anw = nw; \
+ }else{ \
+ anw = (aoffs+nbits+31)>>5; \
+ bnw = (boffs+nbits+31)>>5; \
+ adjust_offset_to(temp, a, anw, aoffs, boffs); \
+ a = temp; \
+ aoffs = boffs; \
+ anw = bnw; \
+ } \
+ for(i = 0; i < anw; i++) \
+ temp[i] = OP(a[i], b[i]); \
+ bitvector_copy(dest, doffs, temp, aoffs, nbits); \
+ if(nw>MALLOC_CUTOFF) \
+ free(temp); \
+}
+
+#define BV_AND(a, b) ((a)&(b))
+#define BV_OR(a, b) ((a)|(b))
+#define BV_XOR(a, b) ((a)^(b))
+BITVECTOR_BINARY_OP_TO(and, BV_AND)
+BITVECTOR_BINARY_OP_TO(or, BV_OR)
+BITVECTOR_BINARY_OP_TO(xor, BV_XOR)
--- /dev/null
+++ b/bitvector.c
@@ -1,0 +1,140 @@
+/*
+ bit vector primitives
+
+ todo:
+ * reverse
+ * nreverse
+ (- rotate left/right)
+ * shl_to
+ * not
+ - shr_row, shl_row
+
+ These routines are the back end supporting bit matrices. Many operations
+ on bit matrices are slow (such as accessing or setting a single element!)
+ but certain operations are privileged and lend themselves to extremely
+ efficient implementation due to the bit-vector nature of machine integers.
+ These are:
+ done:
+ & | $ ~ copy reverse fill sum prod
+ todo:
+ shift trans rowswap
+ would be nice:
+ channel interleave
+
+ Important note:
+ Out-of-place functions always assume dest and source have the same amount
+ of space available.
+
+ shr_to, shl_to, not_to, and reverse_to assume source and dest don't overlap
+ and_to, or_to, and xor_to allow overlap.
+*/
+
+#include "llt.h"
+
+uint32_t *
+bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero)
+{
+ uint32_t *p;
+ size_t sz = ((newsz+31)>>5) * sizeof(uint32_t);
+ p = LLT_REALLOC(b, sz);
+ if(p == nil)
+ return nil;
+ if(initzero && newsz>oldsz){
+ size_t osz = ((oldsz+31)>>5) * sizeof(uint32_t);
+ memset(&p[osz/sizeof(uint32_t)], 0, sz-osz);
+ }
+ return p;
+}
+
+uint32_t *
+bitvector_new(uint64_t n, int initzero)
+{
+ return bitvector_resize(nil, 0, n, initzero);
+}
+
+size_t
+bitvector_nwords(uint64_t nbits)
+{
+ return (nbits+31)>>5;
+}
+
+void
+bitvector_set(uint32_t *b, uint64_t n, uint32_t c)
+{
+ if(c)
+ b[n>>5] |= 1<<(n&31);
+ else
+ b[n>>5] &= ~(1<<(n&31));
+}
+
+uint32_t
+bitvector_get(uint32_t *b, uint64_t n)
+{
+ return b[n>>5] & (1<<(n&31));
+}
+
+static int
+ntz(uint32_t x)
+{
+ int n;
+
+ if(x == 0)
+ return 32;
+ n = 1;
+ if((x & 0x0000FFFF) == 0){
+ n = n +16;
+ x = x >>16;
+ }
+ if((x & 0x000000FF) == 0){
+ n = n + 8;
+ x = x >> 8;
+ }
+ if((x & 0x0000000F) == 0){
+ n = n + 4;
+ x = x >> 4;
+ }
+ if((x & 0x00000003) == 0){
+ n = n + 2;
+ x = x >> 2;
+ }
+ return n - (x & 1);
+}
+
+// given a bitvector of n bits, starting at bit n0 find the next
+// set bit, including n0.
+// returns n if no set bits.
+uint32_t
+bitvector_next(uint32_t *b, uint64_t n0, uint64_t n)
+{
+ if(n0 >= n)
+ return n;
+
+ uint32_t i = n0>>5;
+ uint32_t nb = n0&31;
+ uint32_t nw = (n+31)>>5;
+ uint32_t w;
+
+ if(i < nw-1 || (n&31) == 0)
+ w = b[i]>>nb;
+ else
+ w = (b[i]&lomask(n&31)) >> nb;
+ if(w != 0)
+ return ntz(w) + n0;
+ if(i == nw-1)
+ return n;
+ i++;
+ while(i < nw-1){
+ w = b[i];
+ if(w != 0)
+ return ntz(w) + (i<<5);
+ i++;
+ }
+ w = b[i];
+ nb = n&31;
+ i = ntz(w);
+ if(nb == 0)
+ return i + (n-32);
+ if(i >= nb)
+ return n;
+ return i + (n-nb);
+}
--- /dev/null
+++ b/bitvector.h
@@ -1,0 +1,23 @@
+uint32_t bitreverse(uint32_t x);
+uint32_t *bitvector_new(uint64_t n, int initzero);
+uint32_t *bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero);
+size_t bitvector_nwords(uint64_t nbits);
+void bitvector_set(uint32_t *b, uint64_t n, uint32_t c);
+uint32_t bitvector_get(uint32_t *b, uint64_t n);
+uint32_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n);
+void bitvector_shr(uint32_t *b, size_t n, uint32_t s);
+void bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s);
+void bitvector_shl(uint32_t *b, size_t n, uint32_t s);
+void bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap);
+void bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits);
+void bitvector_copy(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
+void bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits);
+void bitvector_not_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
+void bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits);
+void bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits);
+void bitvector_and_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
+void bitvector_or_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
+void bitvector_xor_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
+uint64_t bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits);
+uint32_t bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits);
+uint32_t bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits);
--- a/builtins.c
+++ b/builtins.c
@@ -6,6 +6,8 @@
#include "flisp.h"
#include "operators.h"
#include "cvalues.h"
+#include "timefuncs.h"
+#include "random.h"
size_t
llength(value_t v)
--- /dev/null
+++ b/dump.c
@@ -1,0 +1,35 @@
+#include "llt.h"
+
+static char hexdig[] = "0123456789abcdef";
+
+/*
+ display a given number of bytes from a buffer, with the first
+ address label being startoffs
+*/
+void
+hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs)
+{
+ size_t offs = 0;
+ size_t i, pos;
+ char ch, linebuffer[16], hexc[4];
+ static const char *spc50 = " ";
+
+ hexc[2] = hexc[3] = ' ';
+ do{
+ ios_printf(dest, "%.8x ", offs+startoffs);
+ pos = 10;
+ for(i = 0; i < 16 && offs < len; i++, offs++){
+ ch = buffer[offs];
+ linebuffer[i] = (ch < 32 || ch >= 0x7f) ? '.' : ch;
+ hexc[0] = hexdig[((uint8_t)ch)>>4];
+ hexc[1] = hexdig[ch & 0x0f];
+ pos += ios_write(dest, hexc, (i == 7 || i == 15) ? 4 : 3);
+ }
+ for(; i < 16; i++)
+ linebuffer[i] = ' ';
+ ios_write(dest, spc50, 60-pos);
+ ios_putc('|', dest);
+ ios_write(dest, linebuffer, 16);
+ ios_write(dest, "|\n", 2);
+ }while(offs < len);
+}
--- a/equal.c
+++ b/equal.c
@@ -4,6 +4,7 @@
#include "opcodes.h"
#include "cvalues.h"
#include "equal.h"
+#include "hashing.h"
#define BOUNDED_COMPARE_BOUND 128
#define BOUNDED_HASH_BOUND 16384
--- a/flisp.c
+++ b/flisp.c
@@ -14,6 +14,7 @@
#include "print.h"
#include "read.h"
#include "equal.h"
+#include "hashing.h"
typedef struct {
char *name;
--- /dev/null
+++ b/hashing.c
@@ -1,0 +1,75 @@
+#include "llt.h"
+
+lltuint_t
+nextipow2(lltuint_t i)
+{
+ if(i == 0)
+ return 1;
+ i |= i >> 1;
+ i |= i >> 2;
+ i |= i >> 4;
+ i |= i >> 8;
+ i |= i >> 16;
+#ifdef BITS64
+ i |= i >> 32;
+#endif
+ i++;
+ return i ? i : TOP_BIT;
+}
+
+uint32_t
+int32hash(uint32_t a)
+{
+ a = (a+0x7ed55d16) + (a<<12);
+ a = (a^0xc761c23c) ^ (a>>19);
+ a = (a+0x165667b1) + (a<<5);
+ a = (a+0xd3a2646c) ^ (a<<9);
+ a = (a+0xfd7046c5) + (a<<3);
+ a = (a^0xb55a4f09) ^ (a>>16);
+ return a;
+}
+
+uint64_t
+int64hash(uint64_t key)
+{
+ key = (~key) + (key << 21); // key = (key << 21) - key - 1;
+ key = key ^ (key >> 24);
+ key = (key + (key << 3)) + (key << 8); // key * 265
+ key = key ^ (key >> 14);
+ key = (key + (key << 2)) + (key << 4); // key * 21
+ key = key ^ (key >> 28);
+ key = key + (key << 31);
+ return key;
+}
+
+uint32_t
+int64to32hash(uint64_t key)
+{
+ key = (~key) + (key << 18); // key = (key << 18) - key - 1;
+ key = key ^ (key >> 31);
+ key = key * 21; // key = (key + (key << 2)) + (key << 4);
+ key = key ^ (key >> 11);
+ key = key + (key << 6);
+ key = key ^ (key >> 22);
+ return (uint32_t)key;
+}
+
+#include "lookup3.c"
+
+uint64_t
+memhash(const char* buf, size_t n)
+{
+ uint32_t c = 0xcafe8881, b = 0x4d6a087c;
+
+ hashlittle2(buf, n, &c, &b);
+ return (uint64_t)c | (((uint64_t)b)<<32);
+}
+
+uint32_t
+memhash32(const char* buf, size_t n)
+{
+ uint32_t c = 0xcafe8881, b = 0x4d6a087c;
+
+ hashlittle2(buf, n, &c, &b);
+ return c;
+}
--- /dev/null
+++ b/hashing.h
@@ -1,0 +1,11 @@
+#ifndef HASHING_H_
+#define HASHING_H_
+
+lltuint_t nextipow2(lltuint_t i);
+uint32_t int32hash(uint32_t a);
+uint64_t int64hash(uint64_t key);
+uint32_t int64to32hash(uint64_t key);
+uint64_t memhash(const char* buf, size_t n);
+uint32_t memhash32(const char* buf, size_t n);
+
+#endif
--- /dev/null
+++ b/htable.c
@@ -1,0 +1,53 @@
+/*
+ functions common to all hash table instantiations
+*/
+
+#include "llt.h"
+#include "htable.h"
+#include "hashing.h"
+
+htable_t *
+htable_new(htable_t *h, size_t size)
+{
+ if(size <= HT_N_INLINE/2){
+ h->size = size = HT_N_INLINE;
+ h->table = &h->_space[0];
+ }else{
+ size = nextipow2(size);
+ size *= 2; // 2 pointers per key/value pair
+ size *= 2; // aim for 50% occupancy
+ h->size = size;
+ h->table = LLT_ALLOC(size*sizeof(void*));
+ }
+ if(h->table == nil)
+ return nil;
+ size_t i;
+ for(i = 0; i < size; i++)
+ h->table[i] = HT_NOTFOUND;
+ return h;
+}
+
+void
+htable_free(htable_t *h)
+{
+ if(h->table != &h->_space[0])
+ LLT_FREE(h->table);
+}
+
+// empty and reduce size
+void
+htable_reset(htable_t *h, size_t sz)
+{
+ sz = nextipow2(sz);
+ if(h->size > sz*4 && h->size > HT_N_INLINE){
+ size_t newsz = sz*4;
+ void **newtab = LLT_REALLOC(h->table, newsz*sizeof(void*));
+ if(newtab == nil)
+ return;
+ h->size = newsz;
+ h->table = newtab;
+ }
+ size_t i, hsz = h->size;
+ for(i = 0; i < hsz; i++)
+ h->table[i] = HT_NOTFOUND;
+}
--- /dev/null
+++ b/htable.h
@@ -1,0 +1,22 @@
+#ifndef __HTABLE_H_
+#define __HTABLE_H_
+
+#define HT_N_INLINE 32
+
+typedef struct {
+ size_t size;
+ void **table;
+ void *_space[HT_N_INLINE];
+}htable_t;
+
+// define this to be an invalid key/value
+#define HT_NOTFOUND ((void*)1)
+
+// initialize and free
+htable_t *htable_new(htable_t *h, size_t size);
+void htable_free(htable_t *h);
+
+// clear and (possibly) change size
+void htable_reset(htable_t *h, size_t sz);
+
+#endif
--- /dev/null
+++ b/htable.inc
@@ -1,0 +1,147 @@
+//-*- mode:c -*-
+
+/*
+ include this file and call HTIMPL to generate an implementation
+*/
+
+#define hash_size(h) ((h)->size/2)
+
+// compute empirical max-probe for a given size
+#define max_probe(size) ((size) <= (HT_N_INLINE*2) ? (HT_N_INLINE/2) : (size)>>3)
+
+#define HTIMPL(HTNAME, HFUNC, EQFUNC) \
+static void ** \
+HTNAME##_lookup_bp(htable_t *h, void *key) \
+{ \
+ lltuint_t hv; \
+ size_t i, orig, index, iter; \
+ size_t newsz, sz = hash_size(h); \
+ size_t maxprobe = max_probe(sz); \
+ void **tab = h->table; \
+ void **ol; \
+ \
+ hv = HFUNC((uintptr_t)key); \
+retry_bp: \
+ iter = 0; \
+ index = (hv & (sz-1)) * 2; \
+ sz *= 2; \
+ orig = index; \
+ \
+ do{ \
+ if(tab[index+1] == HT_NOTFOUND){ \
+ tab[index] = key; \
+ return &tab[index+1]; \
+ } \
+ if(EQFUNC(key, tab[index])) \
+ return &tab[index+1]; \
+ index = (index+2) & (sz-1); \
+ iter++; \
+ if(iter > maxprobe) \
+ break; \
+ }while(index != orig); \
+ \
+ /* table full */ \
+ /* quadruple size, rehash, retry the insert */ \
+ /* it's important to grow the table really fast; otherwise we waste */ \
+ /* lots of time rehashing all the keys over and over. */ \
+ sz = h->size; \
+ ol = h->table; \
+ if(sz >= (1<<19) || (sz <= (1<<8))) \
+ newsz = sz<<1; \
+ else if(sz <= HT_N_INLINE) \
+ newsz = HT_N_INLINE; \
+ else \
+ newsz = sz<<2; \
+ tab = (void**)LLT_ALLOC(newsz*sizeof(void*)); \
+ if(tab == nil) \
+ return nil; \
+ for(i = 0; i < newsz; i++) \
+ tab[i] = HT_NOTFOUND; \
+ h->table = tab; \
+ h->size = newsz; \
+ for(i = 0; i < sz; i += 2) { \
+ if(ol[i+1] != HT_NOTFOUND) \
+ (*HTNAME##_lookup_bp(h, ol[i])) = ol[i+1]; \
+ } \
+ if(ol != &h->_space[0]) \
+ LLT_FREE(ol); \
+ sz = hash_size(h); \
+ maxprobe = max_probe(sz); \
+ tab = h->table; \
+ goto retry_bp; \
+} \
+ \
+void \
+HTNAME##_put(htable_t *h, void *key, void *val) \
+{ \
+ void **bp = HTNAME##_lookup_bp(h, key); \
+ *bp = val; \
+} \
+ \
+void ** \
+HTNAME##_bp(htable_t *h, void *key) \
+{ \
+ return HTNAME##_lookup_bp(h, key); \
+} \
+ \
+/* returns bp if key is in hash, otherwise nil */ \
+/* if return is non-nil and *bp == HT_NOTFOUND then key was deleted */ \
+static void ** \
+HTNAME##_peek_bp(htable_t *h, void *key) \
+{ \
+ size_t sz = hash_size(h); \
+ size_t maxprobe = max_probe(sz); \
+ void **tab = h->table; \
+ size_t index = (HFUNC((uintptr_t)key) & (sz-1)) * 2; \
+ sz *= 2; \
+ size_t orig = index; \
+ size_t iter = 0; \
+ \
+ do { \
+ if(tab[index] == HT_NOTFOUND) \
+ return nil; \
+ if(EQFUNC(key, tab[index])) \
+ return &tab[index+1]; \
+ \
+ index = (index+2) & (sz-1); \
+ iter++; \
+ if(iter > maxprobe) \
+ break; \
+ }while(index != orig); \
+ \
+ return nil; \
+} \
+ \
+void *\
+HTNAME##_get(htable_t *h, void *key) \
+{ \
+ void **bp = HTNAME##_peek_bp(h, key); \
+ if(bp == nil) \
+ return HT_NOTFOUND; \
+ return *bp; \
+} \
+ \
+int \
+HTNAME##_has(htable_t *h, void *key) \
+{ \
+ return HTNAME##_get(h, key) != HT_NOTFOUND; \
+} \
+ \
+int \
+HTNAME##_remove(htable_t *h, void *key) \
+{ \
+ void **bp = HTNAME##_peek_bp(h, key); \
+ if(bp != nil){ \
+ *bp = HT_NOTFOUND; \
+ return 1; \
+ } \
+ return 0; \
+} \
+ \
+void \
+HTNAME##_adjoin(htable_t *h, void *key, void *val) \
+{ \
+ void **bp = HTNAME##_lookup_bp(h, key); \
+ if(*bp == HT_NOTFOUND) \
+ *bp = val; \
+}
--- /dev/null
+++ b/htableh.inc
@@ -1,0 +1,30 @@
+//-*- mode:c -*-
+
+#include "htable.h"
+
+#define HTPROT(HTNAME) \
+void *HTNAME##_get(htable_t *h, void *key); \
+void HTNAME##_put(htable_t *h, void *key, void *val); \
+void HTNAME##_adjoin(htable_t *h, void *key, void *val); \
+int HTNAME##_has(htable_t *h, void *key); \
+int HTNAME##_remove(htable_t *h, void *key); \
+void **HTNAME##_bp(htable_t *h, void *key);
+
+// return value, or HT_NOTFOUND if key not found
+
+// add key/value binding
+
+// add binding iff key is unbound
+
+// does key exist?
+
+// logically remove key
+
+// get a pointer to the location of the value for the given key.
+// creates the location if it doesn't exist. only returns nil
+// if memory allocation fails.
+// this should be used for updates, for example:
+// void **bp = ptrhash_bp(h, key);
+// *bp = f(*bp);
+// do not reuse bp if there might be intervening calls to ptrhash_put,
+// ptrhash_bp, ptrhash_reset, or ptrhash_free.
--- /dev/null
+++ b/ieee754.h
@@ -1,0 +1,66 @@
+#ifndef __IEEE754_H_
+#define __IEEE754_H_
+
+union ieee754_float {
+ float f;
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:8;
+ unsigned int mantissa:23;
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int mantissa:23;
+ unsigned int exponent:8;
+ unsigned int negative:1;
+#else
+#error which endian?
+#endif
+ }ieee;
+};
+
+#define IEEE754_FLOAT_BIAS 0x7f
+
+union ieee754_double {
+ double d;
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:11;
+ unsigned int mantissa0:20;
+ unsigned int mantissa1:32;
+#else
+ unsigned int mantissa1:32;
+ unsigned int mantissa0:20;
+ unsigned int exponent:11;
+ unsigned int negative:1;
+#endif
+ }ieee;
+};
+
+#define IEEE754_DOUBLE_BIAS 0x3ff
+
+union ieee854_long_double {
+ long double d;
+
+ struct {
+#if BYTE_ORDER == BIG_ENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:15;
+ unsigned int empty:16;
+ unsigned int mantissa0:32;
+ unsigned int mantissa1:32;
+#else
+ unsigned int mantissa1:32;
+ unsigned int mantissa0:32;
+ unsigned int exponent:15;
+ unsigned int negative:1;
+ unsigned int empty:16;
+#endif
+ }ieee;
+};
+
+#define IEEE854_LONG_DOUBLE_BIAS 0x3fff
+
+#endif
--- /dev/null
+++ b/ios.c
@@ -1,0 +1,1019 @@
+#include "llt.h"
+#include "timefuncs.h"
+
+#define MOST_OF(x) ((x) - ((x)>>4))
+
+ios_t *ios_stdin = nil;
+ios_t *ios_stdout = nil;
+ios_t *ios_stderr = nil;
+
+/* OS-level primitive wrappers */
+
+void *
+llt_memrchr(const void *s, int c, size_t n)
+{
+ const uint8_t *src = (const uint8_t*)s + n;
+ uint8_t uc = c;
+ while(--src >= (uint8_t*)s)
+ if(*src == uc)
+ return (void *)src;
+ return nil;
+}
+
+#if !defined(__plan9__)
+static int
+_enonfatal(int err)
+{
+ return err == EAGAIN || err == EINPROGRESS || err == EINTR || err == EWOULDBLOCK;
+}
+#define SLEEP_TIME 5//ms
+#endif
+
+// return error code, #bytes read in *nread
+// these wrappers retry operations until success or a fatal error
+static int
+_os_read(long fd, char *buf, size_t n, size_t *nread)
+{
+ ssize_t r;
+
+ while(1){
+ r = read((int)fd, buf, n);
+ if(r > -1){
+ *nread = (size_t)r;
+ break;
+ }
+#if defined(__plan9__)
+ return r;
+#else
+ if(!_enonfatal(errno)){
+ *nread = 0;
+ return errno;
+ }
+ sleep_ms(SLEEP_TIME);
+#endif
+ }
+ return 0;
+}
+
+static int
+_os_read_all(long fd, char *buf, size_t n, size_t *nread)
+{
+ size_t got;
+
+ *nread = 0;
+
+ while(n > 0){
+ int err = _os_read(fd, buf, n, &got);
+ n -= got;
+ *nread += got;
+ buf += got;
+ if(err || got == 0)
+ return err;
+ }
+ return 0;
+}
+
+static int
+_os_write(long fd, const void *buf, size_t n, size_t *nwritten)
+{
+ ssize_t r;
+
+ while(1){
+ r = write((int)fd, buf, n);
+ if(r > -1){
+ *nwritten = (size_t)r;
+ break;
+ }
+#if defined(__plan9__)
+ return r;
+#else
+ if(!_enonfatal(errno)){
+ *nwritten = 0;
+ return errno;
+ }
+ sleep_ms(SLEEP_TIME);
+#endif
+ }
+ return 0;
+}
+
+static int
+_os_write_all(long fd, const char *buf, size_t n, size_t *nwritten)
+{
+ size_t wrote;
+
+ *nwritten = 0;
+ while(n > 0){
+ int err = _os_write(fd, buf, n, &wrote);
+ n -= wrote;
+ *nwritten += wrote;
+ buf += wrote;
+ if(err)
+ return err;
+ }
+ return 0;
+}
+
+
+/* internal utility functions */
+
+static char *
+_buf_realloc(ios_t *s, size_t sz)
+{
+ char *temp;
+
+ if((s->buf == nil || s->buf == &s->local[0]) && sz <= IOS_INLSIZE){
+ /* TODO: if we want to allow shrinking, see if the buffer shrank
+ down to this size, in which case we need to copy. */
+ s->buf = &s->local[0];
+ s->maxsize = IOS_INLSIZE;
+ s->ownbuf = 1;
+ return s->buf;
+ }
+
+ if(sz <= s->maxsize)
+ return s->buf;
+
+ if(s->ownbuf && s->buf != &s->local[0]){
+ // if we own the buffer we're free to resize it
+ // always allocate 1 bigger in case user wants to add a NUL
+ // terminator after taking over the buffer
+ temp = LLT_REALLOC(s->buf, sz);
+ if(temp == nil)
+ return nil;
+ }else{
+ temp = LLT_ALLOC(sz);
+ if(temp == nil)
+ return nil;
+ s->ownbuf = 1;
+ if(s->size > 0)
+ memmove(temp, s->buf, s->size);
+ }
+
+ s->buf = temp;
+ s->maxsize = sz;
+ return s->buf;
+}
+
+// write a block of data into the buffer at the current position, resizing
+// if necessary. returns # written.
+static size_t
+_write_grow(ios_t *s, const char *data, size_t n)
+{
+ size_t amt;
+ size_t newsize;
+
+ if(n == 0)
+ return 0;
+
+ if(s->bpos + n > s->size){
+ if(s->bpos + n > s->maxsize){
+ /* TODO: here you might want to add a mechanism for limiting
+ the growth of the stream. */
+ newsize = s->maxsize ? s->maxsize * 2 : 8;
+ while(s->bpos + n > newsize)
+ newsize *= 2;
+ if(_buf_realloc(s, newsize) == nil){
+ /* no more space; write as much as we can */
+ amt = s->maxsize - s->bpos;
+ if(amt > 0){
+ memmove(&s->buf[s->bpos], data, amt);
+ }
+ s->bpos += amt;
+ s->size = s->maxsize;
+ return amt;
+ }
+ }
+ s->size = s->bpos + n;
+ }
+ memmove(s->buf + s->bpos, data, n);
+ s->bpos += n;
+
+ return n;
+}
+
+
+/* interface functions, low level */
+
+static size_t
+_ios_read(ios_t *s, char *dest, size_t n, int all)
+{
+ size_t tot = 0;
+ size_t got, avail;
+
+ while(n > 0){
+ avail = s->size - s->bpos;
+
+ if(avail > 0){
+ size_t ncopy = avail >= n ? n : avail;
+ memmove(dest, s->buf + s->bpos, ncopy);
+ s->bpos += ncopy;
+ if(ncopy >= n){
+ s->state = bst_rd;
+ return tot+ncopy;
+ }
+ }
+ if(s->bm == bm_mem || s->fd == -1){
+ // can't get any more data
+ s->state = bst_rd;
+ if(avail == 0)
+ s->_eof = 1;
+ return avail;
+ }
+
+ dest += avail;
+ n -= avail;
+ tot += avail;
+
+ ios_flush(s);
+ s->bpos = s->size = 0;
+ s->state = bst_rd;
+
+ s->fpos = -1;
+ if(n > MOST_OF(s->maxsize)){
+ // doesn't fit comfortably in buffer; go direct
+ if(all)
+ _os_read_all(s->fd, dest, n, &got);
+ else
+ _os_read(s->fd, dest, n, &got);
+ tot += got;
+ if(got == 0)
+ s->_eof = 1;
+ return tot;
+ }else{
+ // refill buffer
+ if(_os_read(s->fd, s->buf, s->maxsize, &got)){
+ s->_eof = 1;
+ return tot;
+ }
+ if(got == 0){
+ s->_eof = 1;
+ return tot;
+ }
+ s->size = got;
+ }
+ }
+
+ return tot;
+}
+
+size_t
+ios_read(ios_t *s, char *dest, size_t n)
+{
+ return _ios_read(s, dest, n, 0);
+}
+
+size_t
+ios_readall(ios_t *s, char *dest, size_t n)
+{
+ return _ios_read(s, dest, n, 1);
+}
+
+size_t
+ios_readprep(ios_t *s, size_t n)
+{
+ if(s->state == bst_wr && s->bm != bm_mem){
+ ios_flush(s);
+ s->bpos = s->size = 0;
+ }
+ size_t space = s->size - s->bpos;
+ s->state = bst_rd;
+ if(space >= n || s->bm == bm_mem || s->fd == -1)
+ return space;
+ if(s->maxsize < s->bpos+n){
+ // it won't fit. grow buffer or move data back.
+ if(n <= s->maxsize && space <= ((s->maxsize)>>2)){
+ if(space)
+ memmove(s->buf, s->buf+s->bpos, space);
+ s->size -= s->bpos;
+ s->bpos = 0;
+ }
+ else {
+ if(_buf_realloc(s, s->bpos + n) == nil)
+ return space;
+ }
+ }
+ size_t got;
+ int result = _os_read(s->fd, s->buf+s->size, s->maxsize - s->size, &got);
+ if(result)
+ return space;
+ s->size += got;
+ return s->size - s->bpos;
+}
+
+static void
+_write_update_pos(ios_t *s)
+{
+ if(s->bpos > s->ndirty)
+ s->ndirty = s->bpos;
+ if(s->bpos > s->size)
+ s->size = s->bpos;
+}
+
+size_t
+ios_write(ios_t *s, const char *data, size_t n)
+{
+ if(s->readonly)
+ return 0;
+ if(n == 0)
+ return 0;
+ size_t space;
+ size_t wrote = 0;
+
+ if(s->state == bst_none)
+ s->state = bst_wr;
+ if(s->state == bst_rd){
+ if(!s->rereadable){
+ s->size = 0;
+ s->bpos = 0;
+ }
+ space = s->size - s->bpos;
+ }else{
+ space = s->maxsize - s->bpos;
+ }
+
+ if(s->bm == bm_mem){
+ wrote = _write_grow(s, data, n);
+ }else if(s->bm == bm_none){
+ s->fpos = -1;
+ _os_write_all(s->fd, data, n, &wrote);
+ return wrote;
+ }else if(n <= space){
+ if(s->bm == bm_line){
+ char *nl;
+ if((nl = llt_memrchr(data, '\n', n)) != nil){
+ size_t linesz = nl-data+1;
+ s->bm = bm_block;
+ wrote += ios_write(s, data, linesz);
+ ios_flush(s);
+ s->bm = bm_line;
+ n -= linesz;
+ data += linesz;
+ }
+ }
+ memmove(s->buf + s->bpos, data, n);
+ s->bpos += n;
+ wrote += n;
+ }else{
+ s->state = bst_wr;
+ ios_flush(s);
+ if(n > MOST_OF(s->maxsize)){
+ _os_write_all(s->fd, data, n, &wrote);
+ return wrote;
+ }
+ return ios_write(s, data, n);
+ }
+ _write_update_pos(s);
+ return wrote;
+}
+
+off_t
+ios_seek(ios_t *s, off_t pos)
+{
+ s->_eof = 0;
+ if(s->bm == bm_mem){
+ if((size_t)pos > s->size)
+ return -1;
+ s->bpos = pos;
+ }else{
+ ios_flush(s);
+ off_t fdpos = lseek(s->fd, pos, SEEK_SET);
+ if(fdpos == (off_t)-1)
+ return fdpos;
+ s->bpos = s->size = 0;
+ }
+ return 0;
+}
+
+off_t
+ios_seek_end(ios_t *s)
+{
+ s->_eof = 1;
+ if(s->bm == bm_mem){
+ s->bpos = s->size;
+ }else{
+ ios_flush(s);
+ off_t fdpos = lseek(s->fd, 0, SEEK_END);
+ if(fdpos == (off_t)-1)
+ return fdpos;
+ s->bpos = s->size = 0;
+ }
+ return 0;
+}
+
+off_t
+ios_skip(ios_t *s, off_t offs)
+{
+ if(offs != 0){
+ if(offs > 0){
+ if(offs <= (off_t)(s->size-s->bpos)){
+ s->bpos += offs;
+ return 0;
+ }else if(s->bm == bm_mem){
+ // TODO: maybe grow buffer
+ return -1;
+ }
+ }else if(offs < 0){
+ if(-offs <= (off_t)s->bpos){
+ s->bpos += offs;
+ s->_eof = 0;
+ return 0;
+ }else if(s->bm == bm_mem){
+ return -1;
+ }
+ }
+ ios_flush(s);
+ if(s->state == bst_wr)
+ offs += s->bpos;
+ else if(s->state == bst_rd)
+ offs -= s->size - s->bpos;
+ off_t fdpos = lseek(s->fd, offs, SEEK_CUR);
+ if(fdpos == (off_t)-1)
+ return fdpos;
+ s->bpos = s->size = 0;
+ s->_eof = 0;
+ }
+ return 0;
+}
+
+off_t
+ios_pos(ios_t *s)
+{
+ if(s->bm == bm_mem)
+ return (off_t)s->bpos;
+
+ off_t fdpos = s->fpos;
+ if(fdpos == (off_t)-1){
+ fdpos = lseek(s->fd, 0, SEEK_CUR);
+ if(fdpos == (off_t)-1)
+ return fdpos;
+ s->fpos = fdpos;
+ }
+
+ if(s->state == bst_wr)
+ fdpos += s->bpos;
+ else if(s->state == bst_rd)
+ fdpos -= s->size - s->bpos;
+ return fdpos;
+}
+
+size_t
+ios_trunc(ios_t *s, size_t size)
+{
+ if(s->bm == bm_mem){
+ if(size == s->size)
+ return s->size;
+ if(size < s->size){
+ if(s->bpos > size)
+ s->bpos = size;
+ }else if(_buf_realloc(s, size) == nil)
+ return s->size;
+ s->size = size;
+ return size;
+ }
+ //todo
+ return 0;
+}
+
+int
+ios_eof(ios_t *s)
+{
+ if(s->bm == bm_mem)
+ return s->_eof;
+ return s->fd == -1 || s->_eof;
+}
+
+int
+ios_flush(ios_t *s)
+{
+ if(s->ndirty == 0 || s->bm == bm_mem || s->buf == nil)
+ return 0;
+ if(s->fd == -1)
+ return -1;
+
+ if(s->state == bst_rd){
+ if(lseek(s->fd, -(off_t)s->size, SEEK_CUR) == (off_t)-1){
+ // FIXME eh?
+ }
+ }
+
+ size_t nw, ntowrite = s->ndirty;
+ s->fpos = -1;
+ int err = _os_write_all(s->fd, s->buf, ntowrite, &nw);
+ // todo: try recovering from some kinds of errors (e.g. retry)
+
+ if(s->state == bst_rd){
+ if(lseek(s->fd, s->size - nw, SEEK_CUR) == (off_t)-1){
+ // FIXME eh?
+ }
+ }else if(s->state == bst_wr){
+ if(s->bpos != nw && lseek(s->fd, (off_t)s->bpos - (off_t)nw, SEEK_CUR) == (off_t)-1){
+ // FIXME eh?
+ }
+ // now preserve the invariant that data to write
+ // begins at the beginning of the buffer, and s->size refers
+ // to how much valid file data is stored in the buffer.
+ if(s->size > s->ndirty){
+ size_t delta = s->size - s->ndirty;
+ memmove(s->buf, s->buf + s->ndirty, delta);
+ }
+ s->size -= s->ndirty;
+ s->bpos = 0;
+ }
+
+ s->ndirty = 0;
+
+ if(err)
+ return err;
+ if(nw < ntowrite)
+ return -1;
+ return 0;
+}
+
+void
+ios_close(ios_t *s)
+{
+ ios_flush(s);
+ if(s->fd != -1 && s->ownfd)
+ close(s->fd);
+ s->fd = -1;
+ if(s->buf != nil && s->ownbuf && s->buf != &s->local[0])
+ LLT_FREE(s->buf);
+ s->buf = nil;
+ s->size = s->maxsize = s->bpos = 0;
+}
+
+static void
+_buf_init(ios_t *s, bufmode_t bm)
+{
+ s->bm = bm;
+ if(s->bm == bm_mem || s->bm == bm_none){
+ s->buf = &s->local[0];
+ s->maxsize = IOS_INLSIZE;
+ }else{
+ s->buf = nil;
+ _buf_realloc(s, IOS_BUFSIZE);
+ }
+ s->size = s->bpos = 0;
+}
+
+char *
+ios_takebuf(ios_t *s, size_t *psize)
+{
+ char *buf;
+
+ ios_flush(s);
+
+ if(s->buf == &s->local[0] || s->buf == nil || (!s->ownbuf && s->size == s->maxsize)){
+ buf = LLT_ALLOC(s->size+1);
+ if(buf == nil)
+ return nil;
+ if(s->size)
+ memmove(buf, s->buf, s->size);
+ }else if(s->size == s->maxsize){
+ buf = LLT_REALLOC(s->buf, s->size + 1);
+ if(buf == nil)
+ return nil;
+ }else{
+ buf = s->buf;
+ }
+ buf[s->size] = '\0';
+ *psize = s->size + 1;
+
+ /* empty stream and reinitialize */
+ _buf_init(s, s->bm);
+
+ return buf;
+}
+
+int
+ios_setbuf(ios_t *s, char *buf, size_t size, int own)
+{
+ ios_flush(s);
+ size_t nvalid;
+
+ nvalid = size < s->size ? size : s->size;
+ if(nvalid > 0)
+ memmove(buf, s->buf, nvalid);
+ if(s->bpos > nvalid){
+ // truncated
+ s->bpos = nvalid;
+ }
+ s->size = nvalid;
+
+ if(s->buf != nil && s->ownbuf && s->buf != &s->local[0])
+ LLT_FREE(s->buf);
+ s->buf = buf;
+ s->maxsize = size;
+ s->ownbuf = own;
+ return 0;
+}
+
+int
+ios_bufmode(ios_t *s, bufmode_t mode)
+{
+ // no fd; can only do mem-only buffering
+ if(s->fd == -1 && mode != bm_mem)
+ return -1;
+ s->bm = mode;
+ return 0;
+}
+
+void
+ios_set_readonly(ios_t *s)
+{
+ if(s->readonly)
+ return;
+ ios_flush(s);
+ s->state = bst_none;
+ s->readonly = 1;
+}
+
+static size_t
+ios_copy_(ios_t *to, ios_t *from, size_t nbytes, int all)
+{
+ size_t total = 0, avail;
+ if(!ios_eof(from)){
+ do{
+ avail = ios_readprep(from, IOS_BUFSIZE/2);
+ if(avail == 0){
+ from->_eof = 1;
+ break;
+ }
+ size_t written, ntowrite;
+ ntowrite = (avail <= nbytes || all) ? avail : nbytes;
+ written = ios_write(to, from->buf+from->bpos, ntowrite);
+ // TODO: should this be +=written instead?
+ from->bpos += ntowrite;
+ total += written;
+ if(!all){
+ nbytes -= written;
+ if(nbytes == 0)
+ break;
+ }
+ if(written < ntowrite)
+ break;
+ }while(!ios_eof(from));
+ }
+ return total;
+}
+
+size_t
+ios_copy(ios_t *to, ios_t *from, size_t nbytes)
+{
+ return ios_copy_(to, from, nbytes, 0);
+}
+
+size_t
+ios_copyall(ios_t *to, ios_t *from)
+{
+ return ios_copy_(to, from, 0, 1);
+}
+
+#define LINE_CHUNK_SIZE 160
+
+size_t
+ios_copyuntil(ios_t *to, ios_t *from, char delim)
+{
+ size_t total = 0, avail = from->size - from->bpos;
+ int first = 1;
+ if(!ios_eof(from)){
+ do{
+ if(avail == 0){
+ first = 0;
+ avail = ios_readprep(from, LINE_CHUNK_SIZE);
+ }
+ size_t written;
+ char *pd = memchr(from->buf+from->bpos, delim, avail);
+ if(pd == nil){
+ written = ios_write(to, from->buf+from->bpos, avail);
+ from->bpos += avail;
+ total += written;
+ avail = 0;
+ }else{
+ size_t ntowrite = pd - (from->buf+from->bpos) + 1;
+ written = ios_write(to, from->buf+from->bpos, ntowrite);
+ from->bpos += ntowrite;
+ total += written;
+ return total;
+ }
+ }while(!ios_eof(from) && (first || avail >= LINE_CHUNK_SIZE));
+ }
+ from->_eof = 1;
+ return total;
+}
+
+static void
+_ios_init(ios_t *s)
+{
+ // put all fields in a sane initial state
+ memset(s, 0, sizeof(*s));
+ s->bm = bm_block;
+ s->state = bst_none;
+ s->fpos = -1;
+ s->lineno = 1;
+ s->fd = -1;
+ s->ownbuf = 1;
+}
+
+/* stream object initializers. we do no allocation. */
+
+ios_t *
+ios_file(ios_t *s, char *fname, int rd, int wr, int creat, int trunc)
+{
+ int fd;
+ if(!(rd || wr)) // must specify read and/or write
+ goto open_file_err;
+ int flags = wr ? (rd ? O_RDWR : O_WRONLY) : O_RDONLY;
+ if(trunc)
+ flags |= O_TRUNC;
+#if defined(__plan9__)
+ fd = creat ? create(fname, flags, 0644) : open(fname, flags);
+#else
+ if(creat)
+ flags |= O_CREAT;
+ fd = open(fname, flags, 0644);
+#endif
+ s = ios_fd(s, fd, 1, 1);
+ if(fd < 0)
+ goto open_file_err;
+ if(!wr)
+ s->readonly = 1;
+ return s;
+open_file_err:
+ s->fd = -1;
+ return nil;
+}
+
+ios_t *
+ios_mem(ios_t *s, size_t initsize)
+{
+ _ios_init(s);
+ s->bm = bm_mem;
+ _buf_realloc(s, initsize);
+ return s;
+}
+
+ios_t *
+ios_str(ios_t *s, char *str)
+{
+ size_t n = strlen(str);
+ if(ios_mem(s, n+1) == nil)
+ return nil;
+ ios_write(s, str, n+1);
+ ios_seek(s, 0);
+ return s;
+}
+
+ios_t *
+ios_static_buffer(ios_t *s, const char *buf, size_t sz)
+{
+ ios_mem(s, 0);
+ ios_setbuf(s, (char*)buf, sz, 0);
+ s->size = sz;
+ ios_set_readonly(s);
+ return s;
+}
+
+ios_t *
+ios_fd(ios_t *s, long fd, int isfile, int own)
+{
+ _ios_init(s);
+ s->fd = fd;
+ if(isfile)
+ s->rereadable = 1;
+ _buf_init(s, bm_block);
+ s->ownfd = own;
+ if(fd == STDERR_FILENO)
+ s->bm = bm_none;
+ return s;
+}
+
+void
+ios_init_stdstreams(void)
+{
+ ios_stdin = LLT_ALLOC(sizeof(ios_t));
+ ios_fd(ios_stdin, STDIN_FILENO, 0, 0);
+
+ ios_stdout = LLT_ALLOC(sizeof(ios_t));
+ ios_fd(ios_stdout, STDOUT_FILENO, 0, 0);
+ ios_stdout->bm = bm_line;
+
+ ios_stderr = LLT_ALLOC(sizeof(ios_t));
+ ios_fd(ios_stderr, STDERR_FILENO, 0, 0);
+ ios_stderr->bm = bm_none;
+}
+
+/* higher level interface */
+
+int
+ios_putc(int c, ios_t *s)
+{
+ char ch = c;
+
+ if(s->state == bst_wr && s->bpos < s->maxsize && s->bm != bm_none){
+ s->buf[s->bpos++] = ch;
+ _write_update_pos(s);
+ if(s->bm == bm_line && ch == '\n')
+ ios_flush(s);
+ return 1;
+ }
+ return ios_write(s, &ch, 1);
+}
+
+int
+ios_getc(ios_t *s)
+{
+ char ch;
+ if(s->state == bst_rd && s->bpos < s->size)
+ ch = s->buf[s->bpos++];
+ else if(s->_eof)
+ return IOS_EOF;
+ else if(ios_read(s, &ch, 1) < 1)
+ return IOS_EOF;
+ if(ch == '\n')
+ s->lineno++;
+ return (uint8_t)ch;
+}
+
+int
+ios_peekc(ios_t *s)
+{
+ if(s->bpos < s->size)
+ return (uint8_t)s->buf[s->bpos];
+ if(s->_eof)
+ return IOS_EOF;
+ size_t n = ios_readprep(s, 1);
+ if(n == 0)
+ return IOS_EOF;
+ return (uint8_t)s->buf[s->bpos];
+}
+
+int
+ios_ungetc(int c, ios_t *s)
+{
+ if(s->state == bst_wr)
+ return IOS_EOF;
+ if(c == '\n')
+ s->lineno--;
+ if(s->bpos > 0){
+ s->bpos--;
+ if(s->buf[s->bpos] != (char)c)
+ s->buf[s->bpos] = (char)c;
+ s->_eof = 0;
+ return c;
+ }
+ if(s->size == s->maxsize && _buf_realloc(s, s->maxsize*2) == nil)
+ return IOS_EOF;
+ memmove(s->buf + 1, s->buf, s->size);
+ s->buf[0] = (char)c;
+ s->size++;
+ s->_eof = 0;
+ return c;
+}
+
+int
+ios_getutf8(ios_t *s, uint32_t *pwc)
+{
+ int c;
+ size_t sz;
+ char c0;
+ char buf[8];
+
+ c = ios_peekc(s);
+ if(c == IOS_EOF){
+ s->_eof = 1;
+ return IOS_EOF;
+ }
+ c0 = (char)c;
+ if((uint8_t)c0 < 0x80){
+ ios_getc(s);
+ *pwc = (uint32_t)(uint8_t)c0;
+ return 1;
+ }
+ sz = u8_seqlen(&c0)-1;
+ if(!isutf(c0) || sz > 3)
+ return 0;
+ if(ios_readprep(s, sz) < sz){
+ // NOTE: this returns EOF even though some bytes are available
+ // so we do not set s->_eof on this code path
+ return IOS_EOF;
+ }
+ if(u8_isvalid(&s->buf[s->bpos], sz+1)){
+ size_t i = s->bpos;
+ *pwc = u8_nextchar(s->buf, &i);
+ ios_read(s, buf, sz+1);
+ return 1;
+ }
+ return 0;
+}
+
+int
+ios_peekutf8(ios_t *s, uint32_t *pwc)
+{
+ int c;
+ size_t sz;
+ char c0;
+
+ c = ios_peekc(s);
+ if(c == IOS_EOF)
+ return IOS_EOF;
+ c0 = (char)c;
+ if((uint8_t)c0 < 0x80){
+ *pwc = (uint32_t)(uint8_t)c0;
+ return 1;
+ }
+ sz = u8_seqlen(&c0)-1;
+ if(!isutf(c0) || sz > 3)
+ return 0;
+ if(ios_readprep(s, sz) < sz)
+ return IOS_EOF;
+ if(u8_isvalid(&s->buf[s->bpos], sz+1)){
+ size_t i = s->bpos;
+ *pwc = u8_nextchar(s->buf, &i);
+ return 1;
+ }
+ return 0;
+}
+
+int
+ios_pututf8(ios_t *s, uint32_t wc)
+{
+ char buf[8];
+ if(wc < 0x80)
+ return ios_putc((int)wc, s);
+ size_t n = u8_toutf8(buf, 8, &wc, 1);
+ return ios_write(s, buf, n);
+}
+
+void
+ios_purge(ios_t *s)
+{
+ if(s->state == bst_rd)
+ s->bpos = s->size;
+}
+
+char *
+ios_readline(ios_t *s)
+{
+ ios_t dest;
+ ios_mem(&dest, 0);
+ ios_copyuntil(&dest, s, '\n');
+ size_t n;
+ return ios_takebuf(&dest, &n);
+}
+
+int
+ios_vprintf(ios_t *s, const char *format, va_list args)
+{
+ char *str;
+ int c;
+
+#if defined(__plan9__)
+ str = vsmprint(format, args);
+ if((c = strlen(str)) >= 0)
+ ios_write(s, str, c);
+ free(str);
+#else
+ va_list al;
+ va_copy(al, args);
+
+ if(s->state == bst_wr && s->bpos < s->maxsize && s->bm != bm_none){
+ int avail = s->maxsize - s->bpos;
+ char *start = s->buf + s->bpos;
+ c = vsnprintf(start, avail, format, args);
+ if(c < 0){
+ va_end(al);
+ return c;
+ }
+ if(c < avail){
+ s->bpos += (size_t)c;
+ _write_update_pos(s);
+ // TODO: only works right if newline is at end
+ if(s->bm == bm_line && llt_memrchr(start, '\n', (size_t)c))
+ ios_flush(s);
+ va_end(al);
+ return c;
+ }
+ }
+ c = vasprintf(&str, format, al);
+ if(c >= 0){
+ ios_write(s, str, c);
+ LLT_FREE(str);
+ }
+ va_end(al);
+#endif
+ return c;
+}
+
+int
+ios_printf(ios_t *s, const char *format, ...)
+{
+ va_list args;
+ int c;
+
+ va_start(args, format);
+ c = ios_vprintf(s, format, args);
+ va_end(args);
+ return c;
+}
--- /dev/null
+++ b/ios.h
@@ -1,0 +1,207 @@
+// this flag controls when data actually moves out to the underlying I/O
+// channel. memory streams are a special case of this where the data
+// never moves out.
+typedef enum {
+ bm_none,
+ bm_line,
+ bm_block,
+ bm_mem,
+}bufmode_t;
+
+typedef enum {
+ bst_none,
+ bst_rd,
+ bst_wr,
+}bufstate_t;
+
+#define IOS_INLSIZE 54
+#define IOS_BUFSIZE 131072
+
+typedef struct {
+ bufmode_t bm;
+
+ // the state only indicates where the underlying file position is relative
+ // to the buffer. reading: at the end. writing: at the beginning.
+ // in general, you can do any operation in any state.
+ bufstate_t state;
+
+ int errcode;
+
+ char *buf; // start of buffer
+ size_t maxsize; // space allocated to buffer
+ size_t size; // length of valid data in buf, >=ndirty
+ size_t bpos; // current position in buffer
+ size_t ndirty; // # bytes at &buf[0] that need to be written
+
+ off_t fpos; // cached file pos
+ size_t lineno; // current line number
+
+ int fd;
+
+ uint8_t readonly:1;
+ uint8_t ownbuf:1;
+ uint8_t ownfd:1;
+ uint8_t _eof:1;
+
+ // this means you can read, seek back, then read the same data
+ // again any number of times. usually only true for files and strings.
+ uint8_t rereadable:1;
+
+ // this enables "stenciled writes". you can alternately write and
+ // seek without flushing in between. this performs read-before-write
+ // to populate the buffer, so "rereadable" capability is required.
+ // this is off by default.
+ //uint8_t stenciled:1;
+
+ // request durable writes (fsync)
+ // uint8_t durable:1;
+
+ // todo: mutex
+ char local[IOS_INLSIZE];
+}ios_t;
+
+/* low-level interface functions */
+size_t ios_read(ios_t *s, char *dest, size_t n);
+size_t ios_readall(ios_t *s, char *dest, size_t n);
+size_t ios_write(ios_t *s, const char *data, size_t n);
+off_t ios_seek(ios_t *s, off_t pos); // absolute seek
+off_t ios_seek_end(ios_t *s);
+off_t ios_skip(ios_t *s, off_t offs); // relative seek
+off_t ios_pos(ios_t *s); // get current position
+size_t ios_trunc(ios_t *s, size_t size);
+int ios_eof(ios_t *s);
+int ios_flush(ios_t *s);
+void ios_close(ios_t *s);
+char *ios_takebuf(ios_t *s, size_t *psize); // null-terminate and release buffer to caller
+// set buffer space to use
+int ios_setbuf(ios_t *s, char *buf, size_t size, int own);
+int ios_bufmode(ios_t *s, bufmode_t mode);
+void ios_set_readonly(ios_t *s);
+size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes);
+size_t ios_copyall(ios_t *to, ios_t *from);
+size_t ios_copyuntil(ios_t *to, ios_t *from, char delim);
+// ensure at least n bytes are buffered if possible. returns # available.
+size_t ios_readprep(ios_t *from, size_t n);
+//void ios_lock(ios_t *s);
+//int ios_trylock(ios_t *s);
+//int ios_unlock(ios_t *s);
+
+/* stream creation */
+ios_t *ios_file(ios_t *s, char *fname, int rd, int wr, int create, int trunc);
+ios_t *ios_mem(ios_t *s, size_t initsize);
+ios_t *ios_str(ios_t *s, char *str);
+ios_t *ios_static_buffer(ios_t *s, const char *buf, size_t sz);
+ios_t *ios_fd(ios_t *s, long fd, int isfile, int own);
+// todo: ios_socket
+extern ios_t *ios_stdin;
+extern ios_t *ios_stdout;
+extern ios_t *ios_stderr;
+void ios_init_stdstreams(void);
+
+/* high-level functions - output */
+int ios_putnum(ios_t *s, char *data, uint32_t type);
+int ios_putint(ios_t *s, int n);
+int ios_pututf8(ios_t *s, uint32_t wc);
+int ios_putstringz(ios_t *s, char *str, int do_write_nulterm);
+int ios_printf(ios_t *s, const char *format, ...);
+int ios_vprintf(ios_t *s, const char *format, va_list args);
+
+void hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs);
+
+/* high-level stream functions - input */
+int ios_getnum(ios_t *s, char *data, uint32_t type);
+int ios_getutf8(ios_t *s, uint32_t *pwc);
+int ios_peekutf8(ios_t *s, uint32_t *pwc);
+int ios_ungetutf8(ios_t *s, uint32_t wc);
+int ios_getstringz(ios_t *dest, ios_t *src);
+int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars);
+int ios_getline(ios_t *s, char **pbuf, size_t *psz);
+char *ios_readline(ios_t *s);
+
+// discard data buffered for reading
+void ios_purge(ios_t *s);
+
+// seek by utf8 sequence increments
+int ios_nextutf8(ios_t *s);
+int ios_prevutf8(ios_t *s);
+
+/* stdio-style functions */
+#define IOS_EOF (-1)
+int ios_putc(int c, ios_t *s);
+//wint_t ios_putwc(ios_t *s, wchar_t wc);
+int ios_getc(ios_t *s);
+int ios_peekc(ios_t *s);
+//wint_t ios_getwc(ios_t *s);
+int ios_ungetc(int c, ios_t *s);
+//wint_t ios_ungetwc(ios_t *s, wint_t wc);
+#define ios_puts(str, s) ios_write(s, str, strlen(str))
+
+/*
+ With memory streams, mixed reads and writes are equivalent to performing
+ sequences of *p++, as either an lvalue or rvalue. File streams behave
+ similarly, but other streams might not support this. Using unbuffered
+ mode makes this more predictable.
+
+ Note on "unget" functions:
+ There are two kinds of functions here: those that operate on sized
+ blocks of bytes and those that operate on logical units like "character"
+ or "integer". The "unget" functions only work on logical units. There
+ is no "unget n bytes". You can only do an unget after a matching get.
+ However, data pushed back by an unget is available to all read operations.
+ The reason for this is that unget is defined in terms of its effect on
+ the underlying buffer (namely, it rebuffers data as if it had been
+ buffered but not read yet). IOS reserves the right to perform large block
+ operations directly, bypassing the buffer. In such a case data was
+ never buffered, so "rebuffering" has no meaning (i.e. there is no
+ correspondence between the buffer and the physical stream).
+
+ Single-bit I/O is able to write partial bytes ONLY IF the stream supports
+ seeking. Also, line buffering is not well-defined in the context of
+ single-bit I/O, so it might not do what you expect.
+
+ implementation notes:
+ in order to know where we are in a file, we must ensure the buffer
+ is only populated from the underlying stream starting with p==buf.
+
+ to switch from writing to reading: flush, set p=buf, cnt=0
+ to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
+
+ when writing: buf starts at curr. physical stream pos, p - buf is how
+ many bytes we've written logically. cnt==0
+
+ dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
+ reading to writing, where we might be in the middle of a byte without
+ having changed it.
+
+ to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
+ seek back by the same amount (undo it). write onto those bits. now set
+ the dirty bit. in this state, we can bit-read up to the end of the byte,
+ then formally switch to the read state using flush.
+
+ design points:
+ - data-source independence, including memory streams
+ - expose buffer to user, allow user-owned buffers
+ - allow direct I/O, don't always go through buffer
+ - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
+ and makes it possible for sockets where it otherwise wouldn't be
+ - tries to allow switching between reading and writing
+ - support 64-bit and large files
+ - efficient, low-latency buffering
+ - special support for utf8
+ - type-aware functions with byte-order swapping service
+ - position counter for meaningful data offsets with sockets
+
+ theory of operation:
+
+ the buffer is a view of part of a file/stream. you can seek, read, and
+ write around in it as much as you like, as if it were just a string.
+
+ we keep track of the part of the buffer that's invalid (written to).
+ we remember whether the position of the underlying stream is aligned
+ with the end of the buffer (reading mode) or the beginning (writing mode).
+
+ based on this info, we might have to seek back before doing a flush.
+
+ as optimizations, we do no writing if the buffer isn't "dirty", and we
+ do no reading if the data will only be overwritten.
+*/
--- /dev/null
+++ b/llt.c
@@ -1,0 +1,51 @@
+#include "llt.h"
+#include "random.h"
+
+double D_PNAN, D_NNAN, D_PINF, D_NINF;
+float F_PNAN, F_NNAN, F_PINF, F_NINF;
+
+void
+llt_init(void)
+{
+ D_PNAN = strtod("+NaN", nil);
+ D_NNAN = strtod("-NaN", nil);
+ D_PINF = strtod("+Inf", nil);
+ D_NINF = strtod("-Inf", nil);
+
+ *(uint32_t*)&F_PNAN = 0x7fc00000;
+ *(uint32_t*)&F_NNAN = 0xffc00000;
+ *(uint32_t*)&F_PINF = 0x7f800000;
+ *(uint32_t*)&F_NINF = 0xff800000;
+
+ randomize();
+ ios_init_stdstreams();
+}
+
+char *
+uint2str(char *dest, size_t len, uint64_t num, uint32_t base)
+{
+ int i = len-1;
+ uint64_t b = (uint64_t)base;
+ char ch;
+ dest[i--] = '\0';
+ while(i >= 0){
+ ch = (char)(num % b);
+ if(ch < 10)
+ ch += '0';
+ else
+ ch = ch-10+'a';
+ dest[i--] = ch;
+ num /= b;
+ if(num == 0)
+ break;
+ }
+ return &dest[i+1];
+}
+
+int
+isdigit_base(char c, int base)
+{
+ if(base < 11)
+ return c >= '0' && c < '0'+base;
+ return (c >= '0' && c <= '9') || (c >= 'a' && c < 'a'+base-10) || (c >= 'A' && c < 'A'+base-10);
+}
--- /dev/null
+++ b/llt.h
@@ -1,0 +1,72 @@
+#ifndef __LLT_H_
+#define __LLT_H_
+
+#include "platform.h"
+#include "utf8.h"
+#include "ios.h"
+#include "bitvector.h"
+
+#include "htableh.inc"
+HTPROT(ptrhash)
+
+#ifdef __GNUC__
+#define __unlikely(x) __builtin_expect(!!(x), 0)
+#define __likely(x) __builtin_expect(!!(x), 1)
+#else
+#define __unlikely(x) (x)
+#define __likely(x) (x)
+#endif
+
+#ifdef BOEHM_GC /* boehm GC allocator */
+#include <gc.h>
+#define LLT_ALLOC(n) GC_MALLOC(n)
+#define LLT_REALLOC(p, n) GC_REALLOC((p), (n))
+#define LLT_FREE(x) USED(x)
+#else /* standard allocator */
+#define LLT_ALLOC(n) malloc(n)
+#define LLT_REALLOC(p, n) realloc((p), (n))
+#define LLT_FREE(x) free(x)
+#endif
+
+#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
+#define bswap_32(x) \
+ ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
+ (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
+#define bswap_64(x) \
+ (uint64_t)bswap_32((x) & 0xffffffffULL)<<32 | \
+ (uint64_t)bswap_32(((x)>>32) & 0xffffffffULL)
+
+#define DBL_MAXINT (1LL<<53)
+#define FLT_MAXINT (1<<24)
+#define BIT63 0x8000000000000000ULL
+#define BIT31 0x80000000UL
+
+#ifdef BITS64
+#define NBITS 64
+#define TOP_BIT BIT63
+typedef uint64_t lltuint_t;
+typedef int64_t lltint_t;
+#else
+#define NBITS 32
+#define TOP_BIT BIT31
+typedef uint32_t lltuint_t;
+typedef int32_t lltint_t;
+#endif
+
+#define LOG2_10 3.3219280948873626
+#define rel_zero(a, b) (fabs((a)/(b)) < DBL_EPSILON)
+#define LABS(n) (((n)^((n)>>(NBITS-1))) - ((n)>>(NBITS-1)))
+#define NBABS(n, nb) (((n)^((n)>>((nb)-1))) - ((n)>>((nb)-1)))
+#define LLT_ALIGN(x, sz) (((x) + (sz-1)) & (-sz))
+
+// a mask with n set lo or hi bits
+#define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1)
+
+extern double D_PNAN, D_NNAN, D_PINF, D_NINF;
+extern float F_PNAN, F_NNAN, F_PINF, F_NINF;
+
+char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
+int isdigit_base(char c, int base);
+void llt_init(void);
+
+#endif
--- a/llt/bitvector-ops.c
+++ /dev/null
@@ -1,477 +1,0 @@
-#include "llt.h"
-
-#define ONES32 ((uint32_t)0xffffffffUL)
-
-// greater than this # of words we use malloc instead of alloca
-#define MALLOC_CUTOFF 2000
-
-static inline
-uint32_t count_bits(uint32_t b)
-{
- b = b - ((b>>1)&0x55555555);
- b = ((b>>2)&0x33333333) + (b&0x33333333);
- b = ((b>>4)+b)&0x0f0f0f0f;
- b += (b>>8);
- b += (b>>16);
- return b & 0x3f;
-}
-
-uint32_t
-bitreverse(uint32_t x)
-{
- uint32_t m;
-
-#ifdef __INTEL_COMPILER
- x = _bswap(x);
-#else
- x = (x >> 16) | (x << 16); m = 0xff00ff00;
- x = ((x & m) >> 8) | ((x & ~m) << 8);
-#endif
- m = 0xf0f0f0f0;
- x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc;
- x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa;
- x = ((x & m) >> 1) | ((x & ~m) << 1);
-
- return x;
-}
-
-// shift all bits in a long bit vector
-// n is # of int32s to consider, s is shift distance
-// lowest bit-index is bit 0 of word 0
-// TODO: handle boundary case of shift distance >= data size?
-void
-bitvector_shr(uint32_t *b, size_t n, uint32_t s)
-{
- uint32_t i;
- if(s == 0 || n == 0)
- return;
- i = s >> 5;
- if(i){
- n -= i;
- memmove(b, &b[i], n*4);
- memset(&b[n], 0, i*4);
- s &= 31;
- }
- for(i = 0; i < n-1; i++)
- b[i] = (b[i] >> s) | (b[i+1] << (32-s));
- b[i] >>= s;
-}
-
-// out-of-place version, good for re-aligning a strided submatrix to
-// linear representation when a copy is needed
-// assumes that dest has the same amount of space as source, even if it
-// wouldn't have been necessary to hold the shifted bits
-void
-bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s)
-{
- uint32_t i, j;
- if(n == 0)
- return;
- if(s == 0){
- memmove(dest, b, n*4);
- return;
- }
- j = s >> 5;
- if(j){
- n -= j;
- memset(&dest[n], 0, j*4);
- s &= 31;
- b = &b[j];
- }
- for(i = 0; i < n-1; i++)
- dest[i] = (b[i] >> s) | (b[i+1] << (32-s));
- dest[i] = b[i]>>s;
-}
-
-void
-bitvector_shl(uint32_t *b, size_t n, uint32_t s)
-{
- uint32_t i, scrap = 0, temp;
- if(s == 0 || n == 0)
- return;
- i = s >> 5;
- if(i){
- n -= i;
- memmove(&b[i], b, n*4);
- memset(b, 0, i*4);
- s &= 31;
- b = &b[i];
- }
- for(i = 0; i < n; i++){
- temp = (b[i] << s) | scrap;
- scrap = b[i] >> (32-s);
- b[i] = temp;
- }
-}
-
-// if dest has more space than source, set scrap to true to keep the
-// top bits that would otherwise be shifted out
-void
-bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap)
-{
- uint32_t i, j, sc = 0;
- if(n == 0)
- return;
- if(s == 0){
- memmove(dest, b, n*4);
- return;
- }
- j = s >> 5;
- if(j){
- n -= j;
- memset(dest, 0, j*4);
- s &= 31;
- dest = &dest[j];
- }
- for(i = 0; i < n; i++){
- dest[i] = (b[i] << s) | sc;
- sc = b[i] >> (32-s);
- }
- if(scrap)
- dest[i] = sc;
-}
-
-// set nbits to c, starting at given bit offset
-// assumes offs < 32
-void
-bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = (lomask(nbits)<<offs);
- if(c)
- b[0] |= mask;
- else
- b[0] &= ~mask;
- return;
- }
-
- mask = lomask(offs);
- if(c)
- b[0] |= ~mask;
- else
- b[0] &= mask;
-
- mask = c ? ONES32 : 0;
-
- for(i = 1; i < nw-1; i++)
- b[i] = mask;
-
- tail = (offs+nbits) & 31;
- if(tail == 0)
- b[i] = mask;
- else{
- mask = lomask(tail);
- if(c)
- b[i] |= mask;
- else
- b[i] &= ~mask;
- }
-}
-
-void
-bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = lomask(nbits)<<offs;
- b[0] ^= mask;
- return;
- }
-
- mask = ~lomask(offs);
- b[0] ^= mask;
-
- for(i = 1; i < nw-1; i++)
- b[i] = ~b[i];
-
- tail = (offs+nbits)&31;
- if(tail == 0)
- b[i] = ~b[i];
- else{
- mask = lomask(tail);
- b[i] ^= mask;
- }
-}
-
-// constant-space bit vector copy in a single pass, with arbitrary
-// offsets and lengths. to get this right, there are 16 cases to handle!
-#define BITVECTOR_COPY_OP(name, OP) \
-void \
-bitvector_##name(uint32_t *dest, uint32_t doffs, uint32_t *src, uint32_t soffs, uint32_t nbits) \
-{ \
- uint32_t i, s, nw, tail, snw, mask, scrap; \
- if(nbits == 0) \
- return; \
- nw = (doffs+nbits+31)>>5; \
- if(soffs == doffs){ \
- if(nw == 1){ \
- mask = (lomask(nbits)<<doffs); \
- dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
- return; \
- } \
- mask = ~lomask(doffs); \
- dest[0] = (dest[0] & ~mask) | (OP(src[0]) & mask); \
- for(i = 1; i < nw-1; i++) \
- dest[i] = OP(src[i]); \
- tail = (doffs+nbits)&31; \
- if(tail == 0) \
- dest[i] = src[i]; \
- else { \
- mask = lomask(tail); \
- dest[i] = (dest[i] & ~mask) | (OP(src[i]) & mask); \
- } \
- return; \
- } \
- snw = (soffs+nbits+31)>>5; \
- if(soffs < doffs){ \
- s = doffs-soffs; \
- if(nw == 1){ \
- mask = lomask(nbits) << doffs; \
- dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
- return; \
- } \
- mask = ~lomask(doffs); \
- dest[0] = (dest[0] & ~mask) | ((OP(src[0])<<s) & mask); \
- scrap = OP(src[0])>>(32-s); \
- for(i = 1; i < snw-1; i++){ \
- dest[i] = (OP(src[i])<<s) | scrap; \
- scrap = OP(src[i])>>(32-s); \
- } \
- tail = (doffs+nbits)&31; \
- mask = tail ? lomask(tail) : ONES32; \
- if(snw == nw) \
- dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s)|scrap) & mask); \
- else{ /* snw < nw */ \
- if(snw == 1) \
- dest[i] = (dest[i] & ~mask) | (((OP(src[i])<<s) | scrap) & mask); \
- else{ \
- dest[i] = (OP(src[i])<<s) | scrap; \
- scrap = OP(src[i])>>(32-s); \
- i++; \
- dest[i] = (dest[i] & ~mask) | (scrap & mask); \
- } \
- } \
- }else{ \
- s = soffs-doffs; \
- if(snw == 1){ \
- mask = (lomask(nbits)<<doffs); \
- dest[0] = (dest[0] & ~mask) | ((OP(src[0])>>s) & mask); \
- return; \
- } \
- if(nw == 1){ \
- mask = (lomask(nbits)<<doffs); \
- dest[0] = (dest[0] & ~mask) | \
- (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
- return; \
- } \
- mask = ~lomask(doffs); \
- dest[0] = (dest[0] & ~mask) | (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \
- for(i = 1; i < nw-1; i++) \
- dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \
- tail = (doffs+nbits)&31; \
- mask = tail ? lomask(tail) : ONES32; \
- if(snw == nw){ \
- dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \
- } \
- else /* snw > nw */ { \
- dest[i] = (dest[i] & ~mask) | \
- (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \
- } \
- } \
-}
-
-#define BV_COPY(a) (a)
-#define BV_NOT(a) (~(a))
-BITVECTOR_COPY_OP(copy, BV_COPY)
-BITVECTOR_COPY_OP(not_to, BV_NOT)
-
-// copy from source to dest while reversing bit-order
-// assumes dest offset == 0
-// assumes source and dest don't overlap
-// assumes offset < 32
-void
-bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits)
-{
- uint32_t i, nw, tail;
-
- if(nbits == 0)
- return;
-
- nw = (soffs+nbits+31)>>5;
- // first, reverse the words while reversing bit order within each word
- for(i = 0; i < nw/2; i++){
- dest[i] = bitreverse(src[nw-i-1]);
- dest[nw-i-1] = bitreverse(src[i]);
- }
- if(nw&0x1)
- dest[i] = bitreverse(src[i]);
-
- tail = (soffs+nbits)&31;
- if(tail)
- bitvector_shr(dest, nw, 32-tail);
-}
-
-void
-bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, *temp, a[MALLOC_CUTOFF];
-
- if(nbits == 0)
- return;
-
- nw = (offs+nbits+31)>>5;
- temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : a;
- for(i = 0; i < nw/2; i++){
- temp[i] = bitreverse(b[nw-i-1]);
- temp[nw-i-1] = bitreverse(b[i]);
- }
- if(nw & 1)
- temp[i] = bitreverse(b[i]);
-
- tail = (offs+nbits)&31;
- bitvector_copy(b, offs, temp, (32-tail)&31, nbits);
- if(nw > MALLOC_CUTOFF)
- free(temp);
-}
-
-uint64_t
-bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits)
-{
- size_t i, nw;
- uint32_t ntail;
- uint64_t ans;
-
- if(nbits == 0)
- return 0;
- nw = ((uint64_t)offs+nbits+31)>>5;
-
- if(nw == 1)
- return count_bits(b[0] & (lomask(nbits)<<offs));
-
- ans = count_bits(b[0]>>offs); // first end cap
-
- for(i = 1; i < nw-1; i++)
- ans += count_bits(b[i]);
-
- ntail = (offs + (uint32_t)nbits) & 31;
- ans += count_bits(b[i] & (ntail > 0 ? lomask(ntail) : ONES32)); // last end cap
-
- return ans;
-}
-
-uint32_t
-bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return 0;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = (lomask(nbits)<<offs);
- if((b[0] & mask) != mask)
- return 1;
- return 0;
- }
-
- mask = ~lomask(offs);
- if((b[0] & mask) != mask)
- return 1;
-
- for(i = 1; i < nw-1; i++)
- if(b[i] != ONES32)
- return 1;
-
- tail = (offs+nbits)&31;
- if(tail == 0)
- return b[i] != ONES32;
- mask = lomask(tail);
- return (b[i] & mask) != mask;
-}
-
-uint32_t
-bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits)
-{
- uint32_t i, nw, tail, mask;
-
- if(nbits == 0)
- return 0;
- nw = (offs+nbits+31)>>5;
-
- if(nw == 1){
- mask = lomask(nbits)<<offs;
- return (b[0] & mask) != 0;
- }
-
- mask = ~lomask(offs);
- if((b[0] & mask) != 0)
- return 1;
-
- for(i = 1; i < nw-1; i++){
- if(b[i] != 0)
- return 1;
- }
-
- tail = (offs+nbits)&31;
- if(tail == 0)
- return b[i] != 0;
- return (b[i] & lomask(tail)) != 0;
-}
-
-static void
-adjust_offset_to(uint32_t *dest, uint32_t *src, uint32_t nw, uint32_t soffs, uint32_t newoffs)
-{
- if(newoffs > soffs)
- bitvector_shl_to(dest, src, nw, newoffs-soffs, 1);
- else
- bitvector_shr_to(dest, src, nw, soffs-newoffs);
-}
-
-#define BITVECTOR_BINARY_OP_TO(opname, OP) \
-void \
-bitvector_##opname##_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits) \
-{ \
- uint32_t nw = (doffs+nbits+31)>>5; \
- uint32_t atmp[MALLOC_CUTOFF+1]; \
- uint32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : atmp; \
- uint32_t i, anw, bnw; \
- if(aoffs == boffs){ \
- anw = (aoffs+nbits+31)>>5; \
- }else if(aoffs == doffs){ \
- bnw = (boffs+nbits+31)>>5; \
- adjust_offset_to(temp, b, bnw, boffs, aoffs); \
- b = temp; \
- anw = nw; \
- }else{ \
- anw = (aoffs+nbits+31)>>5; \
- bnw = (boffs+nbits+31)>>5; \
- adjust_offset_to(temp, a, anw, aoffs, boffs); \
- a = temp; \
- aoffs = boffs; \
- anw = bnw; \
- } \
- for(i = 0; i < anw; i++) \
- temp[i] = OP(a[i], b[i]); \
- bitvector_copy(dest, doffs, temp, aoffs, nbits); \
- if(nw>MALLOC_CUTOFF) \
- free(temp); \
-}
-
-#define BV_AND(a, b) ((a)&(b))
-#define BV_OR(a, b) ((a)|(b))
-#define BV_XOR(a, b) ((a)^(b))
-BITVECTOR_BINARY_OP_TO(and, BV_AND)
-BITVECTOR_BINARY_OP_TO(or, BV_OR)
-BITVECTOR_BINARY_OP_TO(xor, BV_XOR)
--- a/llt/bitvector.c
+++ /dev/null
@@ -1,140 +1,0 @@
-/*
- bit vector primitives
-
- todo:
- * reverse
- * nreverse
- (- rotate left/right)
- * shl_to
- * not
- - shr_row, shl_row
-
- These routines are the back end supporting bit matrices. Many operations
- on bit matrices are slow (such as accessing or setting a single element!)
- but certain operations are privileged and lend themselves to extremely
- efficient implementation due to the bit-vector nature of machine integers.
- These are:
- done:
- & | $ ~ copy reverse fill sum prod
- todo:
- shift trans rowswap
- would be nice:
- channel interleave
-
- Important note:
- Out-of-place functions always assume dest and source have the same amount
- of space available.
-
- shr_to, shl_to, not_to, and reverse_to assume source and dest don't overlap
- and_to, or_to, and xor_to allow overlap.
-*/
-
-#include "llt.h"
-
-uint32_t *
-bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero)
-{
- uint32_t *p;
- size_t sz = ((newsz+31)>>5) * sizeof(uint32_t);
- p = LLT_REALLOC(b, sz);
- if(p == nil)
- return nil;
- if(initzero && newsz>oldsz){
- size_t osz = ((oldsz+31)>>5) * sizeof(uint32_t);
- memset(&p[osz/sizeof(uint32_t)], 0, sz-osz);
- }
- return p;
-}
-
-uint32_t *
-bitvector_new(uint64_t n, int initzero)
-{
- return bitvector_resize(nil, 0, n, initzero);
-}
-
-size_t
-bitvector_nwords(uint64_t nbits)
-{
- return (nbits+31)>>5;
-}
-
-void
-bitvector_set(uint32_t *b, uint64_t n, uint32_t c)
-{
- if(c)
- b[n>>5] |= 1<<(n&31);
- else
- b[n>>5] &= ~(1<<(n&31));
-}
-
-uint32_t
-bitvector_get(uint32_t *b, uint64_t n)
-{
- return b[n>>5] & (1<<(n&31));
-}
-
-static int
-ntz(uint32_t x)
-{
- int n;
-
- if(x == 0)
- return 32;
- n = 1;
- if((x & 0x0000FFFF) == 0){
- n = n +16;
- x = x >>16;
- }
- if((x & 0x000000FF) == 0){
- n = n + 8;
- x = x >> 8;
- }
- if((x & 0x0000000F) == 0){
- n = n + 4;
- x = x >> 4;
- }
- if((x & 0x00000003) == 0){
- n = n + 2;
- x = x >> 2;
- }
- return n - (x & 1);
-}
-
-// given a bitvector of n bits, starting at bit n0 find the next
-// set bit, including n0.
-// returns n if no set bits.
-uint32_t
-bitvector_next(uint32_t *b, uint64_t n0, uint64_t n)
-{
- if(n0 >= n)
- return n;
-
- uint32_t i = n0>>5;
- uint32_t nb = n0&31;
- uint32_t nw = (n+31)>>5;
- uint32_t w;
-
- if(i < nw-1 || (n&31) == 0)
- w = b[i]>>nb;
- else
- w = (b[i]&lomask(n&31)) >> nb;
- if(w != 0)
- return ntz(w) + n0;
- if(i == nw-1)
- return n;
- i++;
- while(i < nw-1){
- w = b[i];
- if(w != 0)
- return ntz(w) + (i<<5);
- i++;
- }
- w = b[i];
- nb = n&31;
- i = ntz(w);
- if(nb == 0)
- return i + (n-32);
- if(i >= nb)
- return n;
- return i + (n-nb);
-}
--- a/llt/bitvector.h
+++ /dev/null
@@ -1,23 +1,0 @@
-uint32_t bitreverse(uint32_t x);
-uint32_t *bitvector_new(uint64_t n, int initzero);
-uint32_t *bitvector_resize(uint32_t *b, uint64_t oldsz, uint64_t newsz, int initzero);
-size_t bitvector_nwords(uint64_t nbits);
-void bitvector_set(uint32_t *b, uint64_t n, uint32_t c);
-uint32_t bitvector_get(uint32_t *b, uint64_t n);
-uint32_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n);
-void bitvector_shr(uint32_t *b, size_t n, uint32_t s);
-void bitvector_shr_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s);
-void bitvector_shl(uint32_t *b, size_t n, uint32_t s);
-void bitvector_shl_to(uint32_t *dest, uint32_t *b, size_t n, uint32_t s, int scrap);
-void bitvector_fill(uint32_t *b, uint32_t offs, uint32_t c, uint32_t nbits);
-void bitvector_copy(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
-void bitvector_not(uint32_t *b, uint32_t offs, uint32_t nbits);
-void bitvector_not_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t nbits);
-void bitvector_reverse(uint32_t *b, uint32_t offs, uint32_t nbits);
-void bitvector_reverse_to(uint32_t *dest, uint32_t *src, uint32_t soffs, uint32_t nbits);
-void bitvector_and_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-void bitvector_or_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-void bitvector_xor_to(uint32_t *dest, uint32_t doffs, uint32_t *a, uint32_t aoffs, uint32_t *b, uint32_t boffs, uint32_t nbits);
-uint64_t bitvector_count(uint32_t *b, uint32_t offs, uint64_t nbits);
-uint32_t bitvector_any0(uint32_t *b, uint32_t offs, uint32_t nbits);
-uint32_t bitvector_any1(uint32_t *b, uint32_t offs, uint32_t nbits);
--- a/llt/dump.c
+++ /dev/null
@@ -1,35 +1,0 @@
-#include "llt.h"
-
-static char hexdig[] = "0123456789abcdef";
-
-/*
- display a given number of bytes from a buffer, with the first
- address label being startoffs
-*/
-void
-hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs)
-{
- size_t offs = 0;
- size_t i, pos;
- char ch, linebuffer[16], hexc[4];
- static const char *spc50 = " ";
-
- hexc[2] = hexc[3] = ' ';
- do{
- ios_printf(dest, "%.8x ", offs+startoffs);
- pos = 10;
- for(i = 0; i < 16 && offs < len; i++, offs++){
- ch = buffer[offs];
- linebuffer[i] = (ch < 32 || ch >= 0x7f) ? '.' : ch;
- hexc[0] = hexdig[((uint8_t)ch)>>4];
- hexc[1] = hexdig[ch & 0x0f];
- pos += ios_write(dest, hexc, (i == 7 || i == 15) ? 4 : 3);
- }
- for(; i < 16; i++)
- linebuffer[i] = ' ';
- ios_write(dest, spc50, 60-pos);
- ios_putc('|', dest);
- ios_write(dest, linebuffer, 16);
- ios_write(dest, "|\n", 2);
- }while(offs < len);
-}
--- a/llt/hashing.c
+++ /dev/null
@@ -1,75 +1,0 @@
-#include "llt.h"
-
-lltuint_t
-nextipow2(lltuint_t i)
-{
- if(i == 0)
- return 1;
- i |= i >> 1;
- i |= i >> 2;
- i |= i >> 4;
- i |= i >> 8;
- i |= i >> 16;
-#ifdef BITS64
- i |= i >> 32;
-#endif
- i++;
- return i ? i : TOP_BIT;
-}
-
-uint32_t
-int32hash(uint32_t a)
-{
- a = (a+0x7ed55d16) + (a<<12);
- a = (a^0xc761c23c) ^ (a>>19);
- a = (a+0x165667b1) + (a<<5);
- a = (a+0xd3a2646c) ^ (a<<9);
- a = (a+0xfd7046c5) + (a<<3);
- a = (a^0xb55a4f09) ^ (a>>16);
- return a;
-}
-
-uint64_t
-int64hash(uint64_t key)
-{
- key = (~key) + (key << 21); // key = (key << 21) - key - 1;
- key = key ^ (key >> 24);
- key = (key + (key << 3)) + (key << 8); // key * 265
- key = key ^ (key >> 14);
- key = (key + (key << 2)) + (key << 4); // key * 21
- key = key ^ (key >> 28);
- key = key + (key << 31);
- return key;
-}
-
-uint32_t
-int64to32hash(uint64_t key)
-{
- key = (~key) + (key << 18); // key = (key << 18) - key - 1;
- key = key ^ (key >> 31);
- key = key * 21; // key = (key + (key << 2)) + (key << 4);
- key = key ^ (key >> 11);
- key = key + (key << 6);
- key = key ^ (key >> 22);
- return (uint32_t)key;
-}
-
-#include "lookup3.c"
-
-uint64_t
-memhash(const char* buf, size_t n)
-{
- uint32_t c = 0xcafe8881, b = 0x4d6a087c;
-
- hashlittle2(buf, n, &c, &b);
- return (uint64_t)c | (((uint64_t)b)<<32);
-}
-
-uint32_t
-memhash32(const char* buf, size_t n)
-{
- uint32_t c = 0xcafe8881, b = 0x4d6a087c;
-
- hashlittle2(buf, n, &c, &b);
- return c;
-}
--- a/llt/htable.c
+++ /dev/null
@@ -1,52 +1,0 @@
-/*
- functions common to all hash table instantiations
-*/
-
-#include "llt.h"
-#include "htable.h"
-
-htable_t *
-htable_new(htable_t *h, size_t size)
-{
- if(size <= HT_N_INLINE/2){
- h->size = size = HT_N_INLINE;
- h->table = &h->_space[0];
- }else{
- size = nextipow2(size);
- size *= 2; // 2 pointers per key/value pair
- size *= 2; // aim for 50% occupancy
- h->size = size;
- h->table = LLT_ALLOC(size*sizeof(void*));
- }
- if(h->table == nil)
- return nil;
- size_t i;
- for(i = 0; i < size; i++)
- h->table[i] = HT_NOTFOUND;
- return h;
-}
-
-void
-htable_free(htable_t *h)
-{
- if(h->table != &h->_space[0])
- LLT_FREE(h->table);
-}
-
-// empty and reduce size
-void
-htable_reset(htable_t *h, size_t sz)
-{
- sz = nextipow2(sz);
- if(h->size > sz*4 && h->size > HT_N_INLINE){
- size_t newsz = sz*4;
- void **newtab = LLT_REALLOC(h->table, newsz*sizeof(void*));
- if(newtab == nil)
- return;
- h->size = newsz;
- h->table = newtab;
- }
- size_t i, hsz = h->size;
- for(i = 0; i < hsz; i++)
- h->table[i] = HT_NOTFOUND;
-}
--- a/llt/htable.h
+++ /dev/null
@@ -1,22 +1,0 @@
-#ifndef __HTABLE_H_
-#define __HTABLE_H_
-
-#define HT_N_INLINE 32
-
-typedef struct {
- size_t size;
- void **table;
- void *_space[HT_N_INLINE];
-}htable_t;
-
-// define this to be an invalid key/value
-#define HT_NOTFOUND ((void*)1)
-
-// initialize and free
-htable_t *htable_new(htable_t *h, size_t size);
-void htable_free(htable_t *h);
-
-// clear and (possibly) change size
-void htable_reset(htable_t *h, size_t sz);
-
-#endif
--- a/llt/htable.inc
+++ /dev/null
@@ -1,147 +1,0 @@
-//-*- mode:c -*-
-
-/*
- include this file and call HTIMPL to generate an implementation
-*/
-
-#define hash_size(h) ((h)->size/2)
-
-// compute empirical max-probe for a given size
-#define max_probe(size) ((size) <= (HT_N_INLINE*2) ? (HT_N_INLINE/2) : (size)>>3)
-
-#define HTIMPL(HTNAME, HFUNC, EQFUNC) \
-static void ** \
-HTNAME##_lookup_bp(htable_t *h, void *key) \
-{ \
- lltuint_t hv; \
- size_t i, orig, index, iter; \
- size_t newsz, sz = hash_size(h); \
- size_t maxprobe = max_probe(sz); \
- void **tab = h->table; \
- void **ol; \
- \
- hv = HFUNC((uintptr_t)key); \
-retry_bp: \
- iter = 0; \
- index = (hv & (sz-1)) * 2; \
- sz *= 2; \
- orig = index; \
- \
- do{ \
- if(tab[index+1] == HT_NOTFOUND){ \
- tab[index] = key; \
- return &tab[index+1]; \
- } \
- if(EQFUNC(key, tab[index])) \
- return &tab[index+1]; \
- index = (index+2) & (sz-1); \
- iter++; \
- if(iter > maxprobe) \
- break; \
- }while(index != orig); \
- \
- /* table full */ \
- /* quadruple size, rehash, retry the insert */ \
- /* it's important to grow the table really fast; otherwise we waste */ \
- /* lots of time rehashing all the keys over and over. */ \
- sz = h->size; \
- ol = h->table; \
- if(sz >= (1<<19) || (sz <= (1<<8))) \
- newsz = sz<<1; \
- else if(sz <= HT_N_INLINE) \
- newsz = HT_N_INLINE; \
- else \
- newsz = sz<<2; \
- tab = (void**)LLT_ALLOC(newsz*sizeof(void*)); \
- if(tab == nil) \
- return nil; \
- for(i = 0; i < newsz; i++) \
- tab[i] = HT_NOTFOUND; \
- h->table = tab; \
- h->size = newsz; \
- for(i = 0; i < sz; i += 2) { \
- if(ol[i+1] != HT_NOTFOUND) \
- (*HTNAME##_lookup_bp(h, ol[i])) = ol[i+1]; \
- } \
- if(ol != &h->_space[0]) \
- LLT_FREE(ol); \
- sz = hash_size(h); \
- maxprobe = max_probe(sz); \
- tab = h->table; \
- goto retry_bp; \
-} \
- \
-void \
-HTNAME##_put(htable_t *h, void *key, void *val) \
-{ \
- void **bp = HTNAME##_lookup_bp(h, key); \
- *bp = val; \
-} \
- \
-void ** \
-HTNAME##_bp(htable_t *h, void *key) \
-{ \
- return HTNAME##_lookup_bp(h, key); \
-} \
- \
-/* returns bp if key is in hash, otherwise nil */ \
-/* if return is non-nil and *bp == HT_NOTFOUND then key was deleted */ \
-static void ** \
-HTNAME##_peek_bp(htable_t *h, void *key) \
-{ \
- size_t sz = hash_size(h); \
- size_t maxprobe = max_probe(sz); \
- void **tab = h->table; \
- size_t index = (HFUNC((uintptr_t)key) & (sz-1)) * 2; \
- sz *= 2; \
- size_t orig = index; \
- size_t iter = 0; \
- \
- do { \
- if(tab[index] == HT_NOTFOUND) \
- return nil; \
- if(EQFUNC(key, tab[index])) \
- return &tab[index+1]; \
- \
- index = (index+2) & (sz-1); \
- iter++; \
- if(iter > maxprobe) \
- break; \
- }while(index != orig); \
- \
- return nil; \
-} \
- \
-void *\
-HTNAME##_get(htable_t *h, void *key) \
-{ \
- void **bp = HTNAME##_peek_bp(h, key); \
- if(bp == nil) \
- return HT_NOTFOUND; \
- return *bp; \
-} \
- \
-int \
-HTNAME##_has(htable_t *h, void *key) \
-{ \
- return HTNAME##_get(h, key) != HT_NOTFOUND; \
-} \
- \
-int \
-HTNAME##_remove(htable_t *h, void *key) \
-{ \
- void **bp = HTNAME##_peek_bp(h, key); \
- if(bp != nil){ \
- *bp = HT_NOTFOUND; \
- return 1; \
- } \
- return 0; \
-} \
- \
-void \
-HTNAME##_adjoin(htable_t *h, void *key, void *val) \
-{ \
- void **bp = HTNAME##_lookup_bp(h, key); \
- if(*bp == HT_NOTFOUND) \
- *bp = val; \
-}
--- a/llt/htableh.inc
+++ /dev/null
@@ -1,30 +1,0 @@
-//-*- mode:c -*-
-
-#include "htable.h"
-
-#define HTPROT(HTNAME) \
-void *HTNAME##_get(htable_t *h, void *key); \
-void HTNAME##_put(htable_t *h, void *key, void *val); \
-void HTNAME##_adjoin(htable_t *h, void *key, void *val); \
-int HTNAME##_has(htable_t *h, void *key); \
-int HTNAME##_remove(htable_t *h, void *key); \
-void **HTNAME##_bp(htable_t *h, void *key);
-
-// return value, or HT_NOTFOUND if key not found
-
-// add key/value binding
-
-// add binding iff key is unbound
-
-// does key exist?
-
-// logically remove key
-
-// get a pointer to the location of the value for the given key.
-// creates the location if it doesn't exist. only returns nil
-// if memory allocation fails.
-// this should be used for updates, for example:
-// void **bp = ptrhash_bp(h, key);
-// *bp = f(*bp);
-// do not reuse bp if there might be intervening calls to ptrhash_put,
-// ptrhash_bp, ptrhash_reset, or ptrhash_free.
--- a/llt/ieee754.h
+++ /dev/null
@@ -1,66 +1,0 @@
-#ifndef __IEEE754_H_
-#define __IEEE754_H_
-
-union ieee754_float {
- float f;
-
- struct {
-#if BYTE_ORDER == BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:8;
- unsigned int mantissa:23;
-#elif BYTE_ORDER == LITTLE_ENDIAN
- unsigned int mantissa:23;
- unsigned int exponent:8;
- unsigned int negative:1;
-#else
-#error which endian?
-#endif
- }ieee;
-};
-
-#define IEEE754_FLOAT_BIAS 0x7f
-
-union ieee754_double {
- double d;
-
- struct {
-#if BYTE_ORDER == BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:11;
- unsigned int mantissa0:20;
- unsigned int mantissa1:32;
-#else
- unsigned int mantissa1:32;
- unsigned int mantissa0:20;
- unsigned int exponent:11;
- unsigned int negative:1;
-#endif
- }ieee;
-};
-
-#define IEEE754_DOUBLE_BIAS 0x3ff
-
-union ieee854_long_double {
- long double d;
-
- struct {
-#if BYTE_ORDER == BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:15;
- unsigned int empty:16;
- unsigned int mantissa0:32;
- unsigned int mantissa1:32;
-#else
- unsigned int mantissa1:32;
- unsigned int mantissa0:32;
- unsigned int exponent:15;
- unsigned int negative:1;
- unsigned int empty:16;
-#endif
- }ieee;
-};
-
-#define IEEE854_LONG_DOUBLE_BIAS 0x3fff
-
-#endif
--- a/llt/int2str.c
+++ /dev/null
@@ -1,30 +1,0 @@
-#include "llt.h"
-
-char *
-uint2str(char *dest, size_t len, uint64_t num, uint32_t base)
-{
- int i = len-1;
- uint64_t b = (uint64_t)base;
- char ch;
- dest[i--] = '\0';
- while(i >= 0){
- ch = (char)(num % b);
- if(ch < 10)
- ch += '0';
- else
- ch = ch-10+'a';
- dest[i--] = ch;
- num /= b;
- if(num == 0)
- break;
- }
- return &dest[i+1];
-}
-
-int
-isdigit_base(char c, int base)
-{
- if(base < 11)
- return c >= '0' && c < '0'+base;
- return (c >= '0' && c <= '9') || (c >= 'a' && c < 'a'+base-10) || (c >= 'A' && c < 'A'+base-10);
-}
--- a/llt/ios.c
+++ /dev/null
@@ -1,1018 +1,0 @@
-#include "llt.h"
-
-#define MOST_OF(x) ((x) - ((x)>>4))
-
-ios_t *ios_stdin = nil;
-ios_t *ios_stdout = nil;
-ios_t *ios_stderr = nil;
-
-/* OS-level primitive wrappers */
-
-void *
-llt_memrchr(const void *s, int c, size_t n)
-{
- const uint8_t *src = (const uint8_t*)s + n;
- uint8_t uc = c;
- while(--src >= (uint8_t*)s)
- if(*src == uc)
- return (void *)src;
- return nil;
-}
-
-#if !defined(__plan9__)
-static int
-_enonfatal(int err)
-{
- return err == EAGAIN || err == EINPROGRESS || err == EINTR || err == EWOULDBLOCK;
-}
-#define SLEEP_TIME 5//ms
-#endif
-
-// return error code, #bytes read in *nread
-// these wrappers retry operations until success or a fatal error
-static int
-_os_read(long fd, char *buf, size_t n, size_t *nread)
-{
- ssize_t r;
-
- while(1){
- r = read((int)fd, buf, n);
- if(r > -1){
- *nread = (size_t)r;
- break;
- }
-#if defined(__plan9__)
- return r;
-#else
- if(!_enonfatal(errno)){
- *nread = 0;
- return errno;
- }
- sleep_ms(SLEEP_TIME);
-#endif
- }
- return 0;
-}
-
-static int
-_os_read_all(long fd, char *buf, size_t n, size_t *nread)
-{
- size_t got;
-
- *nread = 0;
-
- while(n > 0){
- int err = _os_read(fd, buf, n, &got);
- n -= got;
- *nread += got;
- buf += got;
- if(err || got == 0)
- return err;
- }
- return 0;
-}
-
-static int
-_os_write(long fd, const void *buf, size_t n, size_t *nwritten)
-{
- ssize_t r;
-
- while(1){
- r = write((int)fd, buf, n);
- if(r > -1){
- *nwritten = (size_t)r;
- break;
- }
-#if defined(__plan9__)
- return r;
-#else
- if(!_enonfatal(errno)){
- *nwritten = 0;
- return errno;
- }
- sleep_ms(SLEEP_TIME);
-#endif
- }
- return 0;
-}
-
-static int
-_os_write_all(long fd, const char *buf, size_t n, size_t *nwritten)
-{
- size_t wrote;
-
- *nwritten = 0;
- while(n > 0){
- int err = _os_write(fd, buf, n, &wrote);
- n -= wrote;
- *nwritten += wrote;
- buf += wrote;
- if(err)
- return err;
- }
- return 0;
-}
-
-
-/* internal utility functions */
-
-static char *
-_buf_realloc(ios_t *s, size_t sz)
-{
- char *temp;
-
- if((s->buf == nil || s->buf == &s->local[0]) && sz <= IOS_INLSIZE){
- /* TODO: if we want to allow shrinking, see if the buffer shrank
- down to this size, in which case we need to copy. */
- s->buf = &s->local[0];
- s->maxsize = IOS_INLSIZE;
- s->ownbuf = 1;
- return s->buf;
- }
-
- if(sz <= s->maxsize)
- return s->buf;
-
- if(s->ownbuf && s->buf != &s->local[0]){
- // if we own the buffer we're free to resize it
- // always allocate 1 bigger in case user wants to add a NUL
- // terminator after taking over the buffer
- temp = LLT_REALLOC(s->buf, sz);
- if(temp == nil)
- return nil;
- }else{
- temp = LLT_ALLOC(sz);
- if(temp == nil)
- return nil;
- s->ownbuf = 1;
- if(s->size > 0)
- memmove(temp, s->buf, s->size);
- }
-
- s->buf = temp;
- s->maxsize = sz;
- return s->buf;
-}
-
-// write a block of data into the buffer at the current position, resizing
-// if necessary. returns # written.
-static size_t
-_write_grow(ios_t *s, const char *data, size_t n)
-{
- size_t amt;
- size_t newsize;
-
- if(n == 0)
- return 0;
-
- if(s->bpos + n > s->size){
- if(s->bpos + n > s->maxsize){
- /* TODO: here you might want to add a mechanism for limiting
- the growth of the stream. */
- newsize = s->maxsize ? s->maxsize * 2 : 8;
- while(s->bpos + n > newsize)
- newsize *= 2;
- if(_buf_realloc(s, newsize) == nil){
- /* no more space; write as much as we can */
- amt = s->maxsize - s->bpos;
- if(amt > 0){
- memmove(&s->buf[s->bpos], data, amt);
- }
- s->bpos += amt;
- s->size = s->maxsize;
- return amt;
- }
- }
- s->size = s->bpos + n;
- }
- memmove(s->buf + s->bpos, data, n);
- s->bpos += n;
-
- return n;
-}
-
-
-/* interface functions, low level */
-
-static size_t
-_ios_read(ios_t *s, char *dest, size_t n, int all)
-{
- size_t tot = 0;
- size_t got, avail;
-
- while(n > 0){
- avail = s->size - s->bpos;
-
- if(avail > 0){
- size_t ncopy = avail >= n ? n : avail;
- memmove(dest, s->buf + s->bpos, ncopy);
- s->bpos += ncopy;
- if(ncopy >= n){
- s->state = bst_rd;
- return tot+ncopy;
- }
- }
- if(s->bm == bm_mem || s->fd == -1){
- // can't get any more data
- s->state = bst_rd;
- if(avail == 0)
- s->_eof = 1;
- return avail;
- }
-
- dest += avail;
- n -= avail;
- tot += avail;
-
- ios_flush(s);
- s->bpos = s->size = 0;
- s->state = bst_rd;
-
- s->fpos = -1;
- if(n > MOST_OF(s->maxsize)){
- // doesn't fit comfortably in buffer; go direct
- if(all)
- _os_read_all(s->fd, dest, n, &got);
- else
- _os_read(s->fd, dest, n, &got);
- tot += got;
- if(got == 0)
- s->_eof = 1;
- return tot;
- }else{
- // refill buffer
- if(_os_read(s->fd, s->buf, s->maxsize, &got)){
- s->_eof = 1;
- return tot;
- }
- if(got == 0){
- s->_eof = 1;
- return tot;
- }
- s->size = got;
- }
- }
-
- return tot;
-}
-
-size_t
-ios_read(ios_t *s, char *dest, size_t n)
-{
- return _ios_read(s, dest, n, 0);
-}
-
-size_t
-ios_readall(ios_t *s, char *dest, size_t n)
-{
- return _ios_read(s, dest, n, 1);
-}
-
-size_t
-ios_readprep(ios_t *s, size_t n)
-{
- if(s->state == bst_wr && s->bm != bm_mem){
- ios_flush(s);
- s->bpos = s->size = 0;
- }
- size_t space = s->size - s->bpos;
- s->state = bst_rd;
- if(space >= n || s->bm == bm_mem || s->fd == -1)
- return space;
- if(s->maxsize < s->bpos+n){
- // it won't fit. grow buffer or move data back.
- if(n <= s->maxsize && space <= ((s->maxsize)>>2)){
- if(space)
- memmove(s->buf, s->buf+s->bpos, space);
- s->size -= s->bpos;
- s->bpos = 0;
- }
- else {
- if(_buf_realloc(s, s->bpos + n) == nil)
- return space;
- }
- }
- size_t got;
- int result = _os_read(s->fd, s->buf+s->size, s->maxsize - s->size, &got);
- if(result)
- return space;
- s->size += got;
- return s->size - s->bpos;
-}
-
-static void
-_write_update_pos(ios_t *s)
-{
- if(s->bpos > s->ndirty)
- s->ndirty = s->bpos;
- if(s->bpos > s->size)
- s->size = s->bpos;
-}
-
-size_t
-ios_write(ios_t *s, const char *data, size_t n)
-{
- if(s->readonly)
- return 0;
- if(n == 0)
- return 0;
- size_t space;
- size_t wrote = 0;
-
- if(s->state == bst_none)
- s->state = bst_wr;
- if(s->state == bst_rd){
- if(!s->rereadable){
- s->size = 0;
- s->bpos = 0;
- }
- space = s->size - s->bpos;
- }else{
- space = s->maxsize - s->bpos;
- }
-
- if(s->bm == bm_mem){
- wrote = _write_grow(s, data, n);
- }else if(s->bm == bm_none){
- s->fpos = -1;
- _os_write_all(s->fd, data, n, &wrote);
- return wrote;
- }else if(n <= space){
- if(s->bm == bm_line){
- char *nl;
- if((nl = llt_memrchr(data, '\n', n)) != nil){
- size_t linesz = nl-data+1;
- s->bm = bm_block;
- wrote += ios_write(s, data, linesz);
- ios_flush(s);
- s->bm = bm_line;
- n -= linesz;
- data += linesz;
- }
- }
- memmove(s->buf + s->bpos, data, n);
- s->bpos += n;
- wrote += n;
- }else{
- s->state = bst_wr;
- ios_flush(s);
- if(n > MOST_OF(s->maxsize)){
- _os_write_all(s->fd, data, n, &wrote);
- return wrote;
- }
- return ios_write(s, data, n);
- }
- _write_update_pos(s);
- return wrote;
-}
-
-off_t
-ios_seek(ios_t *s, off_t pos)
-{
- s->_eof = 0;
- if(s->bm == bm_mem){
- if((size_t)pos > s->size)
- return -1;
- s->bpos = pos;
- }else{
- ios_flush(s);
- off_t fdpos = lseek(s->fd, pos, SEEK_SET);
- if(fdpos == (off_t)-1)
- return fdpos;
- s->bpos = s->size = 0;
- }
- return 0;
-}
-
-off_t
-ios_seek_end(ios_t *s)
-{
- s->_eof = 1;
- if(s->bm == bm_mem){
- s->bpos = s->size;
- }else{
- ios_flush(s);
- off_t fdpos = lseek(s->fd, 0, SEEK_END);
- if(fdpos == (off_t)-1)
- return fdpos;
- s->bpos = s->size = 0;
- }
- return 0;
-}
-
-off_t
-ios_skip(ios_t *s, off_t offs)
-{
- if(offs != 0){
- if(offs > 0){
- if(offs <= (off_t)(s->size-s->bpos)){
- s->bpos += offs;
- return 0;
- }else if(s->bm == bm_mem){
- // TODO: maybe grow buffer
- return -1;
- }
- }else if(offs < 0){
- if(-offs <= (off_t)s->bpos){
- s->bpos += offs;
- s->_eof = 0;
- return 0;
- }else if(s->bm == bm_mem){
- return -1;
- }
- }
- ios_flush(s);
- if(s->state == bst_wr)
- offs += s->bpos;
- else if(s->state == bst_rd)
- offs -= s->size - s->bpos;
- off_t fdpos = lseek(s->fd, offs, SEEK_CUR);
- if(fdpos == (off_t)-1)
- return fdpos;
- s->bpos = s->size = 0;
- s->_eof = 0;
- }
- return 0;
-}
-
-off_t
-ios_pos(ios_t *s)
-{
- if(s->bm == bm_mem)
- return (off_t)s->bpos;
-
- off_t fdpos = s->fpos;
- if(fdpos == (off_t)-1){
- fdpos = lseek(s->fd, 0, SEEK_CUR);
- if(fdpos == (off_t)-1)
- return fdpos;
- s->fpos = fdpos;
- }
-
- if(s->state == bst_wr)
- fdpos += s->bpos;
- else if(s->state == bst_rd)
- fdpos -= s->size - s->bpos;
- return fdpos;
-}
-
-size_t
-ios_trunc(ios_t *s, size_t size)
-{
- if(s->bm == bm_mem){
- if(size == s->size)
- return s->size;
- if(size < s->size){
- if(s->bpos > size)
- s->bpos = size;
- }else if(_buf_realloc(s, size) == nil)
- return s->size;
- s->size = size;
- return size;
- }
- //todo
- return 0;
-}
-
-int
-ios_eof(ios_t *s)
-{
- if(s->bm == bm_mem)
- return s->_eof;
- return s->fd == -1 || s->_eof;
-}
-
-int
-ios_flush(ios_t *s)
-{
- if(s->ndirty == 0 || s->bm == bm_mem || s->buf == nil)
- return 0;
- if(s->fd == -1)
- return -1;
-
- if(s->state == bst_rd){
- if(lseek(s->fd, -(off_t)s->size, SEEK_CUR) == (off_t)-1){
- // FIXME eh?
- }
- }
-
- size_t nw, ntowrite = s->ndirty;
- s->fpos = -1;
- int err = _os_write_all(s->fd, s->buf, ntowrite, &nw);
- // todo: try recovering from some kinds of errors (e.g. retry)
-
- if(s->state == bst_rd){
- if(lseek(s->fd, s->size - nw, SEEK_CUR) == (off_t)-1){
- // FIXME eh?
- }
- }else if(s->state == bst_wr){
- if(s->bpos != nw && lseek(s->fd, (off_t)s->bpos - (off_t)nw, SEEK_CUR) == (off_t)-1){
- // FIXME eh?
- }
- // now preserve the invariant that data to write
- // begins at the beginning of the buffer, and s->size refers
- // to how much valid file data is stored in the buffer.
- if(s->size > s->ndirty){
- size_t delta = s->size - s->ndirty;
- memmove(s->buf, s->buf + s->ndirty, delta);
- }
- s->size -= s->ndirty;
- s->bpos = 0;
- }
-
- s->ndirty = 0;
-
- if(err)
- return err;
- if(nw < ntowrite)
- return -1;
- return 0;
-}
-
-void
-ios_close(ios_t *s)
-{
- ios_flush(s);
- if(s->fd != -1 && s->ownfd)
- close(s->fd);
- s->fd = -1;
- if(s->buf != nil && s->ownbuf && s->buf != &s->local[0])
- LLT_FREE(s->buf);
- s->buf = nil;
- s->size = s->maxsize = s->bpos = 0;
-}
-
-static void
-_buf_init(ios_t *s, bufmode_t bm)
-{
- s->bm = bm;
- if(s->bm == bm_mem || s->bm == bm_none){
- s->buf = &s->local[0];
- s->maxsize = IOS_INLSIZE;
- }else{
- s->buf = nil;
- _buf_realloc(s, IOS_BUFSIZE);
- }
- s->size = s->bpos = 0;
-}
-
-char *
-ios_takebuf(ios_t *s, size_t *psize)
-{
- char *buf;
-
- ios_flush(s);
-
- if(s->buf == &s->local[0] || s->buf == nil || (!s->ownbuf && s->size == s->maxsize)){
- buf = LLT_ALLOC(s->size+1);
- if(buf == nil)
- return nil;
- if(s->size)
- memmove(buf, s->buf, s->size);
- }else if(s->size == s->maxsize){
- buf = LLT_REALLOC(s->buf, s->size + 1);
- if(buf == nil)
- return nil;
- }else{
- buf = s->buf;
- }
- buf[s->size] = '\0';
- *psize = s->size + 1;
-
- /* empty stream and reinitialize */
- _buf_init(s, s->bm);
-
- return buf;
-}
-
-int
-ios_setbuf(ios_t *s, char *buf, size_t size, int own)
-{
- ios_flush(s);
- size_t nvalid;
-
- nvalid = size < s->size ? size : s->size;
- if(nvalid > 0)
- memmove(buf, s->buf, nvalid);
- if(s->bpos > nvalid){
- // truncated
- s->bpos = nvalid;
- }
- s->size = nvalid;
-
- if(s->buf != nil && s->ownbuf && s->buf != &s->local[0])
- LLT_FREE(s->buf);
- s->buf = buf;
- s->maxsize = size;
- s->ownbuf = own;
- return 0;
-}
-
-int
-ios_bufmode(ios_t *s, bufmode_t mode)
-{
- // no fd; can only do mem-only buffering
- if(s->fd == -1 && mode != bm_mem)
- return -1;
- s->bm = mode;
- return 0;
-}
-
-void
-ios_set_readonly(ios_t *s)
-{
- if(s->readonly)
- return;
- ios_flush(s);
- s->state = bst_none;
- s->readonly = 1;
-}
-
-static size_t
-ios_copy_(ios_t *to, ios_t *from, size_t nbytes, int all)
-{
- size_t total = 0, avail;
- if(!ios_eof(from)){
- do{
- avail = ios_readprep(from, IOS_BUFSIZE/2);
- if(avail == 0){
- from->_eof = 1;
- break;
- }
- size_t written, ntowrite;
- ntowrite = (avail <= nbytes || all) ? avail : nbytes;
- written = ios_write(to, from->buf+from->bpos, ntowrite);
- // TODO: should this be +=written instead?
- from->bpos += ntowrite;
- total += written;
- if(!all){
- nbytes -= written;
- if(nbytes == 0)
- break;
- }
- if(written < ntowrite)
- break;
- }while(!ios_eof(from));
- }
- return total;
-}
-
-size_t
-ios_copy(ios_t *to, ios_t *from, size_t nbytes)
-{
- return ios_copy_(to, from, nbytes, 0);
-}
-
-size_t
-ios_copyall(ios_t *to, ios_t *from)
-{
- return ios_copy_(to, from, 0, 1);
-}
-
-#define LINE_CHUNK_SIZE 160
-
-size_t
-ios_copyuntil(ios_t *to, ios_t *from, char delim)
-{
- size_t total = 0, avail = from->size - from->bpos;
- int first = 1;
- if(!ios_eof(from)){
- do{
- if(avail == 0){
- first = 0;
- avail = ios_readprep(from, LINE_CHUNK_SIZE);
- }
- size_t written;
- char *pd = memchr(from->buf+from->bpos, delim, avail);
- if(pd == nil){
- written = ios_write(to, from->buf+from->bpos, avail);
- from->bpos += avail;
- total += written;
- avail = 0;
- }else{
- size_t ntowrite = pd - (from->buf+from->bpos) + 1;
- written = ios_write(to, from->buf+from->bpos, ntowrite);
- from->bpos += ntowrite;
- total += written;
- return total;
- }
- }while(!ios_eof(from) && (first || avail >= LINE_CHUNK_SIZE));
- }
- from->_eof = 1;
- return total;
-}
-
-static void
-_ios_init(ios_t *s)
-{
- // put all fields in a sane initial state
- memset(s, 0, sizeof(*s));
- s->bm = bm_block;
- s->state = bst_none;
- s->fpos = -1;
- s->lineno = 1;
- s->fd = -1;
- s->ownbuf = 1;
-}
-
-/* stream object initializers. we do no allocation. */
-
-ios_t *
-ios_file(ios_t *s, char *fname, int rd, int wr, int creat, int trunc)
-{
- int fd;
- if(!(rd || wr)) // must specify read and/or write
- goto open_file_err;
- int flags = wr ? (rd ? O_RDWR : O_WRONLY) : O_RDONLY;
- if(trunc)
- flags |= O_TRUNC;
-#if defined(__plan9__)
- fd = creat ? create(fname, flags, 0644) : open(fname, flags);
-#else
- if(creat)
- flags |= O_CREAT;
- fd = open(fname, flags, 0644);
-#endif
- s = ios_fd(s, fd, 1, 1);
- if(fd < 0)
- goto open_file_err;
- if(!wr)
- s->readonly = 1;
- return s;
-open_file_err:
- s->fd = -1;
- return nil;
-}
-
-ios_t *
-ios_mem(ios_t *s, size_t initsize)
-{
- _ios_init(s);
- s->bm = bm_mem;
- _buf_realloc(s, initsize);
- return s;
-}
-
-ios_t *
-ios_str(ios_t *s, char *str)
-{
- size_t n = strlen(str);
- if(ios_mem(s, n+1) == nil)
- return nil;
- ios_write(s, str, n+1);
- ios_seek(s, 0);
- return s;
-}
-
-ios_t *
-ios_static_buffer(ios_t *s, const char *buf, size_t sz)
-{
- ios_mem(s, 0);
- ios_setbuf(s, (char*)buf, sz, 0);
- s->size = sz;
- ios_set_readonly(s);
- return s;
-}
-
-ios_t *
-ios_fd(ios_t *s, long fd, int isfile, int own)
-{
- _ios_init(s);
- s->fd = fd;
- if(isfile)
- s->rereadable = 1;
- _buf_init(s, bm_block);
- s->ownfd = own;
- if(fd == STDERR_FILENO)
- s->bm = bm_none;
- return s;
-}
-
-void
-ios_init_stdstreams(void)
-{
- ios_stdin = LLT_ALLOC(sizeof(ios_t));
- ios_fd(ios_stdin, STDIN_FILENO, 0, 0);
-
- ios_stdout = LLT_ALLOC(sizeof(ios_t));
- ios_fd(ios_stdout, STDOUT_FILENO, 0, 0);
- ios_stdout->bm = bm_line;
-
- ios_stderr = LLT_ALLOC(sizeof(ios_t));
- ios_fd(ios_stderr, STDERR_FILENO, 0, 0);
- ios_stderr->bm = bm_none;
-}
-
-/* higher level interface */
-
-int
-ios_putc(int c, ios_t *s)
-{
- char ch = c;
-
- if(s->state == bst_wr && s->bpos < s->maxsize && s->bm != bm_none){
- s->buf[s->bpos++] = ch;
- _write_update_pos(s);
- if(s->bm == bm_line && ch == '\n')
- ios_flush(s);
- return 1;
- }
- return ios_write(s, &ch, 1);
-}
-
-int
-ios_getc(ios_t *s)
-{
- char ch;
- if(s->state == bst_rd && s->bpos < s->size)
- ch = s->buf[s->bpos++];
- else if(s->_eof)
- return IOS_EOF;
- else if(ios_read(s, &ch, 1) < 1)
- return IOS_EOF;
- if(ch == '\n')
- s->lineno++;
- return (uint8_t)ch;
-}
-
-int
-ios_peekc(ios_t *s)
-{
- if(s->bpos < s->size)
- return (uint8_t)s->buf[s->bpos];
- if(s->_eof)
- return IOS_EOF;
- size_t n = ios_readprep(s, 1);
- if(n == 0)
- return IOS_EOF;
- return (uint8_t)s->buf[s->bpos];
-}
-
-int
-ios_ungetc(int c, ios_t *s)
-{
- if(s->state == bst_wr)
- return IOS_EOF;
- if(c == '\n')
- s->lineno--;
- if(s->bpos > 0){
- s->bpos--;
- if(s->buf[s->bpos] != (char)c)
- s->buf[s->bpos] = (char)c;
- s->_eof = 0;
- return c;
- }
- if(s->size == s->maxsize && _buf_realloc(s, s->maxsize*2) == nil)
- return IOS_EOF;
- memmove(s->buf + 1, s->buf, s->size);
- s->buf[0] = (char)c;
- s->size++;
- s->_eof = 0;
- return c;
-}
-
-int
-ios_getutf8(ios_t *s, uint32_t *pwc)
-{
- int c;
- size_t sz;
- char c0;
- char buf[8];
-
- c = ios_peekc(s);
- if(c == IOS_EOF){
- s->_eof = 1;
- return IOS_EOF;
- }
- c0 = (char)c;
- if((uint8_t)c0 < 0x80){
- ios_getc(s);
- *pwc = (uint32_t)(uint8_t)c0;
- return 1;
- }
- sz = u8_seqlen(&c0)-1;
- if(!isutf(c0) || sz > 3)
- return 0;
- if(ios_readprep(s, sz) < sz){
- // NOTE: this returns EOF even though some bytes are available
- // so we do not set s->_eof on this code path
- return IOS_EOF;
- }
- if(u8_isvalid(&s->buf[s->bpos], sz+1)){
- size_t i = s->bpos;
- *pwc = u8_nextchar(s->buf, &i);
- ios_read(s, buf, sz+1);
- return 1;
- }
- return 0;
-}
-
-int
-ios_peekutf8(ios_t *s, uint32_t *pwc)
-{
- int c;
- size_t sz;
- char c0;
-
- c = ios_peekc(s);
- if(c == IOS_EOF)
- return IOS_EOF;
- c0 = (char)c;
- if((uint8_t)c0 < 0x80){
- *pwc = (uint32_t)(uint8_t)c0;
- return 1;
- }
- sz = u8_seqlen(&c0)-1;
- if(!isutf(c0) || sz > 3)
- return 0;
- if(ios_readprep(s, sz) < sz)
- return IOS_EOF;
- if(u8_isvalid(&s->buf[s->bpos], sz+1)){
- size_t i = s->bpos;
- *pwc = u8_nextchar(s->buf, &i);
- return 1;
- }
- return 0;
-}
-
-int
-ios_pututf8(ios_t *s, uint32_t wc)
-{
- char buf[8];
- if(wc < 0x80)
- return ios_putc((int)wc, s);
- size_t n = u8_toutf8(buf, 8, &wc, 1);
- return ios_write(s, buf, n);
-}
-
-void
-ios_purge(ios_t *s)
-{
- if(s->state == bst_rd)
- s->bpos = s->size;
-}
-
-char *
-ios_readline(ios_t *s)
-{
- ios_t dest;
- ios_mem(&dest, 0);
- ios_copyuntil(&dest, s, '\n');
- size_t n;
- return ios_takebuf(&dest, &n);
-}
-
-int
-ios_vprintf(ios_t *s, const char *format, va_list args)
-{
- char *str;
- int c;
-
-#if defined(__plan9__)
- str = vsmprint(format, args);
- if((c = strlen(str)) >= 0)
- ios_write(s, str, c);
- free(str);
-#else
- va_list al;
- va_copy(al, args);
-
- if(s->state == bst_wr && s->bpos < s->maxsize && s->bm != bm_none){
- int avail = s->maxsize - s->bpos;
- char *start = s->buf + s->bpos;
- c = vsnprintf(start, avail, format, args);
- if(c < 0){
- va_end(al);
- return c;
- }
- if(c < avail){
- s->bpos += (size_t)c;
- _write_update_pos(s);
- // TODO: only works right if newline is at end
- if(s->bm == bm_line && llt_memrchr(start, '\n', (size_t)c))
- ios_flush(s);
- va_end(al);
- return c;
- }
- }
- c = vasprintf(&str, format, al);
- if(c >= 0){
- ios_write(s, str, c);
- LLT_FREE(str);
- }
- va_end(al);
-#endif
- return c;
-}
-
-int
-ios_printf(ios_t *s, const char *format, ...)
-{
- va_list args;
- int c;
-
- va_start(args, format);
- c = ios_vprintf(s, format, args);
- va_end(args);
- return c;
-}
--- a/llt/ios.h
+++ /dev/null
@@ -1,207 +1,0 @@
-// this flag controls when data actually moves out to the underlying I/O
-// channel. memory streams are a special case of this where the data
-// never moves out.
-typedef enum {
- bm_none,
- bm_line,
- bm_block,
- bm_mem,
-}bufmode_t;
-
-typedef enum {
- bst_none,
- bst_rd,
- bst_wr,
-}bufstate_t;
-
-#define IOS_INLSIZE 54
-#define IOS_BUFSIZE 131072
-
-typedef struct {
- bufmode_t bm;
-
- // the state only indicates where the underlying file position is relative
- // to the buffer. reading: at the end. writing: at the beginning.
- // in general, you can do any operation in any state.
- bufstate_t state;
-
- int errcode;
-
- char *buf; // start of buffer
- size_t maxsize; // space allocated to buffer
- size_t size; // length of valid data in buf, >=ndirty
- size_t bpos; // current position in buffer
- size_t ndirty; // # bytes at &buf[0] that need to be written
-
- off_t fpos; // cached file pos
- size_t lineno; // current line number
-
- int fd;
-
- uint8_t readonly:1;
- uint8_t ownbuf:1;
- uint8_t ownfd:1;
- uint8_t _eof:1;
-
- // this means you can read, seek back, then read the same data
- // again any number of times. usually only true for files and strings.
- uint8_t rereadable:1;
-
- // this enables "stenciled writes". you can alternately write and
- // seek without flushing in between. this performs read-before-write
- // to populate the buffer, so "rereadable" capability is required.
- // this is off by default.
- //uint8_t stenciled:1;
-
- // request durable writes (fsync)
- // uint8_t durable:1;
-
- // todo: mutex
- char local[IOS_INLSIZE];
-}ios_t;
-
-/* low-level interface functions */
-size_t ios_read(ios_t *s, char *dest, size_t n);
-size_t ios_readall(ios_t *s, char *dest, size_t n);
-size_t ios_write(ios_t *s, const char *data, size_t n);
-off_t ios_seek(ios_t *s, off_t pos); // absolute seek
-off_t ios_seek_end(ios_t *s);
-off_t ios_skip(ios_t *s, off_t offs); // relative seek
-off_t ios_pos(ios_t *s); // get current position
-size_t ios_trunc(ios_t *s, size_t size);
-int ios_eof(ios_t *s);
-int ios_flush(ios_t *s);
-void ios_close(ios_t *s);
-char *ios_takebuf(ios_t *s, size_t *psize); // null-terminate and release buffer to caller
-// set buffer space to use
-int ios_setbuf(ios_t *s, char *buf, size_t size, int own);
-int ios_bufmode(ios_t *s, bufmode_t mode);
-void ios_set_readonly(ios_t *s);
-size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes);
-size_t ios_copyall(ios_t *to, ios_t *from);
-size_t ios_copyuntil(ios_t *to, ios_t *from, char delim);
-// ensure at least n bytes are buffered if possible. returns # available.
-size_t ios_readprep(ios_t *from, size_t n);
-//void ios_lock(ios_t *s);
-//int ios_trylock(ios_t *s);
-//int ios_unlock(ios_t *s);
-
-/* stream creation */
-ios_t *ios_file(ios_t *s, char *fname, int rd, int wr, int create, int trunc);
-ios_t *ios_mem(ios_t *s, size_t initsize);
-ios_t *ios_str(ios_t *s, char *str);
-ios_t *ios_static_buffer(ios_t *s, const char *buf, size_t sz);
-ios_t *ios_fd(ios_t *s, long fd, int isfile, int own);
-// todo: ios_socket
-extern ios_t *ios_stdin;
-extern ios_t *ios_stdout;
-extern ios_t *ios_stderr;
-void ios_init_stdstreams(void);
-
-/* high-level functions - output */
-int ios_putnum(ios_t *s, char *data, uint32_t type);
-int ios_putint(ios_t *s, int n);
-int ios_pututf8(ios_t *s, uint32_t wc);
-int ios_putstringz(ios_t *s, char *str, int do_write_nulterm);
-int ios_printf(ios_t *s, const char *format, ...);
-int ios_vprintf(ios_t *s, const char *format, va_list args);
-
-void hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs);
-
-/* high-level stream functions - input */
-int ios_getnum(ios_t *s, char *data, uint32_t type);
-int ios_getutf8(ios_t *s, uint32_t *pwc);
-int ios_peekutf8(ios_t *s, uint32_t *pwc);
-int ios_ungetutf8(ios_t *s, uint32_t wc);
-int ios_getstringz(ios_t *dest, ios_t *src);
-int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars);
-int ios_getline(ios_t *s, char **pbuf, size_t *psz);
-char *ios_readline(ios_t *s);
-
-// discard data buffered for reading
-void ios_purge(ios_t *s);
-
-// seek by utf8 sequence increments
-int ios_nextutf8(ios_t *s);
-int ios_prevutf8(ios_t *s);
-
-/* stdio-style functions */
-#define IOS_EOF (-1)
-int ios_putc(int c, ios_t *s);
-//wint_t ios_putwc(ios_t *s, wchar_t wc);
-int ios_getc(ios_t *s);
-int ios_peekc(ios_t *s);
-//wint_t ios_getwc(ios_t *s);
-int ios_ungetc(int c, ios_t *s);
-//wint_t ios_ungetwc(ios_t *s, wint_t wc);
-#define ios_puts(str, s) ios_write(s, str, strlen(str))
-
-/*
- With memory streams, mixed reads and writes are equivalent to performing
- sequences of *p++, as either an lvalue or rvalue. File streams behave
- similarly, but other streams might not support this. Using unbuffered
- mode makes this more predictable.
-
- Note on "unget" functions:
- There are two kinds of functions here: those that operate on sized
- blocks of bytes and those that operate on logical units like "character"
- or "integer". The "unget" functions only work on logical units. There
- is no "unget n bytes". You can only do an unget after a matching get.
- However, data pushed back by an unget is available to all read operations.
- The reason for this is that unget is defined in terms of its effect on
- the underlying buffer (namely, it rebuffers data as if it had been
- buffered but not read yet). IOS reserves the right to perform large block
- operations directly, bypassing the buffer. In such a case data was
- never buffered, so "rebuffering" has no meaning (i.e. there is no
- correspondence between the buffer and the physical stream).
-
- Single-bit I/O is able to write partial bytes ONLY IF the stream supports
- seeking. Also, line buffering is not well-defined in the context of
- single-bit I/O, so it might not do what you expect.
-
- implementation notes:
- in order to know where we are in a file, we must ensure the buffer
- is only populated from the underlying stream starting with p==buf.
-
- to switch from writing to reading: flush, set p=buf, cnt=0
- to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
-
- when writing: buf starts at curr. physical stream pos, p - buf is how
- many bytes we've written logically. cnt==0
-
- dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
- reading to writing, where we might be in the middle of a byte without
- having changed it.
-
- to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
- seek back by the same amount (undo it). write onto those bits. now set
- the dirty bit. in this state, we can bit-read up to the end of the byte,
- then formally switch to the read state using flush.
-
- design points:
- - data-source independence, including memory streams
- - expose buffer to user, allow user-owned buffers
- - allow direct I/O, don't always go through buffer
- - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
- and makes it possible for sockets where it otherwise wouldn't be
- - tries to allow switching between reading and writing
- - support 64-bit and large files
- - efficient, low-latency buffering
- - special support for utf8
- - type-aware functions with byte-order swapping service
- - position counter for meaningful data offsets with sockets
-
- theory of operation:
-
- the buffer is a view of part of a file/stream. you can seek, read, and
- write around in it as much as you like, as if it were just a string.
-
- we keep track of the part of the buffer that's invalid (written to).
- we remember whether the position of the underlying stream is aligned
- with the end of the buffer (reading mode) or the beginning (writing mode).
-
- based on this info, we might have to seek back before doing a flush.
-
- as optimizations, we do no writing if the buffer isn't "dirty", and we
- do no reading if the data will only be overwritten.
-*/
--- a/llt/llt.h
+++ /dev/null
@@ -1,96 +1,0 @@
-#ifndef __LLT_H_
-#define __LLT_H_
-
-#include "platform.h"
-#include "utf8.h"
-#include "ios.h"
-#include "bitvector.h"
-
-#include "htableh.inc"
-HTPROT(ptrhash)
-
-#ifdef __GNUC__
-#define __unlikely(x) __builtin_expect(!!(x), 0)
-#define __likely(x) __builtin_expect(!!(x), 1)
-#else
-#define __unlikely(x) (x)
-#define __likely(x) (x)
-#endif
-
-#ifdef BOEHM_GC /* boehm GC allocator */
-#include <gc.h>
-#define LLT_ALLOC(n) GC_MALLOC(n)
-#define LLT_REALLOC(p, n) GC_REALLOC((p), (n))
-#define LLT_FREE(x) USED(x)
-#else /* standard allocator */
-#define LLT_ALLOC(n) malloc(n)
-#define LLT_REALLOC(p, n) realloc((p), (n))
-#define LLT_FREE(x) free(x)
-#endif
-
-#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
-#define bswap_32(x) \
- ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
- (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
-#define bswap_64(x) \
- (uint64_t)bswap_32((x) & 0xffffffffULL)<<32 | \
- (uint64_t)bswap_32(((x)>>32) & 0xffffffffULL)
-
-#define DBL_MAXINT (1LL<<53)
-#define FLT_MAXINT (1<<24)
-#define BIT63 0x8000000000000000ULL
-#define BIT31 0x80000000UL
-
-#ifdef BITS64
-#define NBITS 64
-#define TOP_BIT BIT63
-typedef uint64_t lltuint_t;
-typedef int64_t lltint_t;
-#else
-#define NBITS 32
-#define TOP_BIT BIT31
-typedef uint32_t lltuint_t;
-typedef int32_t lltint_t;
-#endif
-
-#define LOG2_10 3.3219280948873626
-#define rel_zero(a, b) (fabs((a)/(b)) < DBL_EPSILON)
-#define LABS(n) (((n)^((n)>>(NBITS-1))) - ((n)>>(NBITS-1)))
-#define NBABS(n, nb) (((n)^((n)>>((nb)-1))) - ((n)>>((nb)-1)))
-#define LLT_ALIGN(x, sz) (((x) + (sz-1)) & (-sz))
-
-// a mask with n set lo or hi bits
-#define lomask(n) (uint32_t)((((uint32_t)1)<<(n))-1)
-
-extern double D_PNAN, D_NNAN, D_PINF, D_NINF;
-extern float F_PNAN, F_NNAN, F_PINF, F_NINF;
-
-/* timefuncs.c */
-uint64_t i64time(void);
-double clock_now(void);
-void timestring(double seconds, char *buffer, size_t len);
-double parsetime(const char *str);
-void sleep_ms(int ms);
-
-/* hashing.c */
-lltuint_t nextipow2(lltuint_t i);
-uint32_t int32hash(uint32_t a);
-uint64_t int64hash(uint64_t key);
-uint32_t int64to32hash(uint64_t key);
-uint64_t memhash(const char* buf, size_t n);
-uint32_t memhash32(const char* buf, size_t n);
-
-/* random.c */
-void randomize(void);
-double genrand_double(void);
-uint64_t genrand_uint64(void);
-uint32_t genrand_uint32(void);
-int64_t genrand_int63(void);
-
-/* utils.c */
-char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
-int isdigit_base(char c, int base);
-
-void llt_init(void);
-
-#endif
--- a/llt/lltinit.c
+++ /dev/null
@@ -1,21 +1,0 @@
-#include "llt.h"
-
-double D_PNAN, D_NNAN, D_PINF, D_NINF;
-float F_PNAN, F_NNAN, F_PINF, F_NINF;
-
-void
-llt_init(void)
-{
- D_PNAN = strtod("+NaN", nil);
- D_NNAN = strtod("-NaN", nil);
- D_PINF = strtod("+Inf", nil);
- D_NINF = strtod("-Inf", nil);
-
- *(uint32_t*)&F_PNAN = 0x7fc00000;
- *(uint32_t*)&F_NNAN = 0xffc00000;
- *(uint32_t*)&F_PINF = 0x7f800000;
- *(uint32_t*)&F_NINF = 0xff800000;
-
- randomize();
- ios_init_stdstreams();
-}
--- a/llt/ptrhash.c
+++ /dev/null
@@ -1,39 +1,0 @@
-/*
- pointer hash table
- optimized for storing info about particular values
-*/
-
-#include "llt.h"
-
-#define OP_EQ(x, y) ((x) == (y))
-
-#ifdef BITS64
-static uint64_t
-_pinthash(uint64_t key)
-{
- key = (~key) + (key << 21); // key = (key << 21) - key - 1;
- key = key ^ (key >> 24);
- key = (key + (key << 3)) + (key << 8); // key * 265
- key = key ^ (key >> 14);
- key = (key + (key << 2)) + (key << 4); // key * 21
- key = key ^ (key >> 28);
- key = key + (key << 31);
- return key;
-}
-#else
-static uint32_t
-_pinthash(uint32_t a)
-{
- a = (a+0x7ed55d16) + (a<<12);
- a = (a^0xc761c23c) ^ (a>>19);
- a = (a+0x165667b1) + (a<<5);
- a = (a+0xd3a2646c) ^ (a<<9);
- a = (a+0xfd7046c5) + (a<<3);
- a = (a^0xb55a4f09) ^ (a>>16);
- return a;
-}
-#endif
-
-#include "htable.inc"
-
-HTIMPL(ptrhash, _pinthash, OP_EQ)
--- a/llt/random.c
+++ /dev/null
@@ -1,38 +1,0 @@
-/*
- random numbers
-*/
-#include "llt.h"
-#include "mt19937-64.h"
-
-static mt19937_64 ctx;
-
-uint64_t
-genrand_uint64(void)
-{
- return genrand64_int64(&ctx);
-}
-
-uint32_t
-genrand_uint32(void)
-{
- return genrand64_int64(&ctx) >> 32;
-}
-
-int64_t
-genrand_int63(void)
-{
- return genrand64_int63(&ctx);
-}
-
-double
-genrand_double(void)
-{
- return genrand64_real1(&ctx);
-}
-
-void
-randomize(void)
-{
- unsigned long long tm = i64time();
- init_by_array64(&ctx, &tm, 1);
-}
--- a/llt/timefuncs.c
+++ /dev/null
@@ -1,116 +1,0 @@
-#include "platform.h"
-
-#if defined(__plan9__)
-double
-floattime(void)
-{
- return (double)nsec() / 1.0e9;
-}
-#else
-double
-tv2float(struct timeval *tv)
-{
- return (double)tv->tv_sec + (double)tv->tv_usec/1.0e6;
-}
-
-double
-diff_time(struct timeval *tv1, struct timeval *tv2)
-{
- return tv2float(tv1) - tv2float(tv2);
-}
-#endif
-
-// return as many bits of system randomness as we can get our hands on
-uint64_t
-i64time(void)
-{
- uint64_t a;
-#if defined(__plan9__)
- a = nsec();
-#else
- struct timeval now;
- gettimeofday(&now, nil);
- a = (((uint64_t)now.tv_sec)<<32) + (uint64_t)now.tv_usec;
-#endif
-
- return a;
-}
-
-double
-clock_now(void)
-{
-#if defined(__plan9__)
- return floattime();
-#else
- struct timeval now;
- gettimeofday(&now, nil);
- return tv2float(&now);
-#endif
-}
-
-void
-timestring(double seconds, char *buffer, size_t len)
-{
-#if defined(__plan9__)
- Tm tm;
- snprint(buffer, len, "%τ", tmfmt(tmtime(&tm, seconds, tzload("local")), nil));
-#else
- time_t tme = (time_t)seconds;
-
- char *fmt = "%c"; /* needed to suppress GCC warning */
- struct tm tm;
-
- localtime_r(&tme, &tm);
- strftime(buffer, len, fmt, &tm);
-#endif
-}
-
-#if defined(__plan9__)
-double
-parsetime(const char *str)
-{
- Tm tm;
- if(tmparse(&tm, "?WWW, ?MM ?DD hh:mm:ss ?Z YYYY", str, tzload("local"), nil) == nil)
- return -1;
- return tmnorm(&tm);
-}
-#else
-double
-parsetime(const char *str)
-{
- char *fmt = "%c"; /* needed to suppress GCC warning */
- char *res;
- time_t t;
- struct tm tm;
-
- res = strptime(str, fmt, &tm);
- if(res != nil){
- /* Not set by strptime(); tells mktime() to determine
- * whether daylight saving time is in effect
- */
- tm.tm_isdst = -1;
- t = mktime(&tm);
- if(t == (time_t)-1)
- return -1;
- return (double)t;
- }
- return -1;
-}
-#endif
-
-void
-sleep_ms(int ms)
-{
- if(ms == 0)
- return;
-
-#if defined(__plan9__)
- sleep(ms);
-#else
- struct timeval timeout;
-
- timeout.tv_sec = ms/1000;
- timeout.tv_usec = (ms % 1000) * 1000;
- select(0, nil, nil, nil, &timeout);
-#endif
-}
--- a/llt/utf8.c
+++ /dev/null
@@ -1,707 +1,0 @@
-/*
- Basic UTF-8 manipulation routines
- by Jeff Bezanson
- placed in the public domain Fall 2005
-
- This code is designed to provide the utilities you need to manipulate
- UTF-8 as an internal string encoding. These functions do not perform the
- error checking normally needed when handling UTF-8 data, so if you happen
- to be from the Unicode Consortium you will want to flay me alive.
- I do this because error checking can be performed at the boundaries (I/O),
- with these routines reserved for higher performance on data known to be
- valid.
- A UTF-8 validation routine is included.
-*/
-
-#include "llt.h"
-
-static const uint32_t offsetsFromUTF8[6] = {
- 0x00000000UL, 0x00003080UL, 0x000E2080UL,
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL
-};
-
-static const char trailingBytesForUTF8[256] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-// straight from musl
-int
-u8_iswprint(uint32_t c)
-{
- if(c < 0xff)
- return ((c+1) & 0x7f) >= 0x21;
- if(c < 0x2028 || c-0x202a < 0xd800-0x202a || c-0xe000 < 0xfff9-0xe000)
- return 1;
- return !(c-0xfffc > 0x10ffff-0xfffc || (c&0xfffe) == 0xfffe);
-}
-
-/* returns length of next utf-8 sequence */
-size_t
-u8_seqlen(const char *s)
-{
- return trailingBytesForUTF8[(unsigned int)(uint8_t)s[0]] + 1;
-}
-
-/* returns the # of bytes needed to encode a certain character
- 0 means the character cannot (or should not) be encoded. */
-size_t
-u8_charlen(uint32_t ch)
-{
- if(ch < 0x80)
- return 1;
- if(ch < 0x800)
- return 2;
- if(ch < 0x10000)
- return 3;
- if(ch < 0x110000)
- return 4;
- return 0;
-}
-
-size_t
-u8_codingsize(uint32_t *wcstr, size_t n)
-{
- size_t i, c = 0;
-
- for(i = 0; i < n; i++)
- c += u8_charlen(wcstr[i]);
- return c;
-}
-
-/* conversions without error checking
- only works for valid UTF-8, i.e. no 5- or 6-byte sequences
- srcsz = source size in bytes
- sz = dest size in # of wide characters
-
- returns # characters converted
- if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
-*/
-size_t
-u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz)
-{
- uint32_t ch;
- const char *src_end = src + srcsz;
- size_t nb, i = 0;
-
- if(sz == 0 || srcsz == 0)
- return 0;
-
- while(i < sz){
- if(!isutf(*src)){ // invalid sequence
- dest[i++] = 0xFFFD;
- src++;
- if(src >= src_end)
- break;
- continue;
- }
- nb = trailingBytesForUTF8[(uint8_t)*src];
- if(src + nb >= src_end)
- break;
- ch = 0;
- switch(nb){
- case 5: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 4: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 3: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 2: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 1: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 0: ch += (uint8_t)*src++;
- }
- ch -= offsetsFromUTF8[nb];
- dest[i++] = ch;
- }
- return i;
-}
-
-/*
- * srcsz = number of source characters
- * sz = size of dest buffer in bytes
- * returns # bytes stored in dest
- * the destination string will never be bigger than the source string.
-*/
-size_t
-u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz)
-{
- uint32_t ch;
- size_t i = 0;
- char *dest0 = dest;
- char *dest_end = dest + sz;
-
- while(i < srcsz){
- ch = src[i];
- if(ch < 0x80){
- if(dest >= dest_end)
- break;
- *dest++ = (char)ch;
- }else if(ch < 0x800){
- if(dest >= dest_end-1)
- break;
- *dest++ = (ch>>6) | 0xC0;
- *dest++ = (ch & 0x3F) | 0x80;
- }else if(ch < 0x10000){
- if(dest >= dest_end-2)
- break;
- *dest++ = (ch>>12) | 0xE0;
- *dest++ = ((ch>>6) & 0x3F) | 0x80;
- *dest++ = (ch & 0x3F) | 0x80;
- }else if(ch < 0x110000){
- if(dest >= dest_end-3)
- break;
- *dest++ = (ch>>18) | 0xF0;
- *dest++ = ((ch>>12) & 0x3F) | 0x80;
- *dest++ = ((ch>>6) & 0x3F) | 0x80;
- *dest++ = (ch & 0x3F) | 0x80;
- }
- i++;
- }
- return dest-dest0;
-}
-
-size_t
-u8_wc_toutf8(char *dest, uint32_t ch)
-{
- if(ch < 0x80){
- dest[0] = (char)ch;
- return 1;
- }
- if(ch < 0x800){
- dest[0] = (ch>>6) | 0xC0;
- dest[1] = (ch & 0x3F) | 0x80;
- return 2;
- }
- if(ch < 0x10000){
- dest[0] = (ch>>12) | 0xE0;
- dest[1] = ((ch>>6) & 0x3F) | 0x80;
- dest[2] = (ch & 0x3F) | 0x80;
- return 3;
- }
- if(ch < 0x110000){
- dest[0] = (ch>>18) | 0xF0;
- dest[1] = ((ch>>12) & 0x3F) | 0x80;
- dest[2] = ((ch>>6) & 0x3F) | 0x80;
- dest[3] = (ch & 0x3F) | 0x80;
- return 4;
- }
- return 0;
-}
-
-/* charnum => byte offset */
-size_t
-u8_offset(const char *s, size_t charnum)
-{
- size_t i = 0;
-
- while(charnum > 0){
- if(s[i++] & 0x80)
- (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
- charnum--;
- }
- return i;
-}
-
-/* byte offset => charnum */
-size_t
-u8_charnum(const char *s, size_t offset)
-{
- size_t charnum = 0, i = 0;
-
- while(i < offset){
- if((s[i++] & 0x80) != 0 && !isutf(s[++i]) && !isutf(s[++i]))
- i++;
- charnum++;
- }
- return charnum;
-}
-
-/* number of characters in NUL-terminated string */
-size_t
-u8_strlen(const char *s)
-{
- size_t count = 0;
- size_t i = 0, lasti;
-
- while(1) {
- lasti = i;
- while(s[i] > 0)
- i++;
- count += (i-lasti);
- if(s[i++] == 0)
- break;
- (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
- count++;
- }
- return count;
-}
-
-size_t
-u8_strwidth(const char *s)
-{
- uint32_t ch;
- size_t nb, tot = 0;
- int w;
- signed char sc;
-
- while((sc = (signed char)*s) != 0){
- if(sc >= 0){
- s++;
- if(sc)
- tot++;
- }else{
- if(!isutf(sc)){
- tot++;
- s++;
- continue;
- }
- nb = trailingBytesForUTF8[(uint8_t)sc];
- ch = 0;
- switch(nb){
- case 5: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 4: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 3: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 2: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 1: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 0: ch += (uint8_t)*s++;
- }
- ch -= offsetsFromUTF8[nb];
- w = wcwidth(ch); // might return -1
- if(w > 0)
- tot += w;
- }
- }
- return tot;
-}
-
-/* reads the next utf-8 sequence out of a string, updating an index */
-uint32_t
-u8_nextchar(const char *s, size_t *i)
-{
- uint32_t ch = 0;
- size_t sz = 0;
-
- do{
- ch <<= 6;
- ch += (uint8_t)s[(*i)];
- sz++;
- }while(s[*i] && (++(*i)) && !isutf(s[*i]));
- return ch - offsetsFromUTF8[sz-1];
-}
-
-/* next character without NUL character terminator */
-uint32_t
-u8_nextmemchar(const char *s, size_t *i)
-{
- uint32_t ch = 0;
- size_t sz = 0;
-
- do{
- ch <<= 6;
- ch += (uint8_t)s[(*i)++];
- sz++;
- }while(!isutf(s[*i]));
- return ch - offsetsFromUTF8[sz-1];
-}
-
-void
-u8_inc(const char *s, size_t *i)
-{
- (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i));
-}
-
-void
-u8_dec(const char *s, size_t *i)
-{
- (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
-}
-
-int
-octal_digit(char c)
-{
- return (c >= '0' && c <= '7');
-}
-
-int
-hex_digit(char c)
-{
- return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
-}
-
-char
-read_escape_control_char(char c)
-{
- switch(c){
- case 'n': return '\n';
- case 't': return '\t';
- case 'a': return '\a';
- case 'b': return '\b';
- case 'e': return '\e';
- case 'f': return '\f';
- case 'r': return '\r';
- case 'v': return '\v';
- }
- return c;
-}
-
-/* assumes that src points to the character after a backslash
- returns number of input characters processed, 0 if error */
-size_t
-u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
-{
- assert(ssz > 0);
- uint32_t ch;
- char digs[10];
- int dno = 0, ndig;
- size_t i = 1;
- char c0 = str[0];
-
- if(octal_digit(c0)){
- i = 0;
- do{
- digs[dno++] = str[i++];
- }while(i < ssz && octal_digit(str[i]) && dno < 3);
- digs[dno] = '\0';
- ch = strtol(digs, nil, 8);
- }else if((c0 == 'x' && (ndig = 2)) || (c0 == 'u' && (ndig = 4)) || (c0 == 'U' && (ndig = 8))){
- while(i<ssz && hex_digit(str[i]) && dno < ndig)
- digs[dno++] = str[i++];
- if(dno == 0)
- return 0;
- digs[dno] = '\0';
- ch = strtol(digs, nil, 16);
- }else{
- ch = (uint32_t)read_escape_control_char(c0);
- }
- *dest = ch;
-
- return i;
-}
-
-/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
- example: u8_unescape(mybuf, 256, "hello\\u220e")
- note the double backslash is needed if called on a C string literal */
-size_t
-u8_unescape(char *buf, size_t sz, const char *src)
-{
- size_t c = 0, amt;
- uint32_t ch;
- char temp[4];
-
- while(*src && c < sz){
- if(*src == '\\'){
- src++;
- amt = u8_read_escape_sequence(src, 1000, &ch);
- }else{
- ch = (uint32_t)*src;
- amt = 1;
- }
- src += amt;
- amt = u8_wc_toutf8(temp, ch);
- if(amt > sz-c)
- break;
- memmove(&buf[c], temp, amt);
- c += amt;
- }
- if(c < sz)
- buf[c] = '\0';
- return c;
-}
-
-static inline int
-buf_put2c(char *buf, const char *src)
-{
- buf[0] = src[0];
- buf[1] = src[1];
- buf[2] = '\0';
- return 2;
-}
-
-int
-u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
-{
- assert(sz > 2);
- if(ch >= 0x20 && ch < 0x7f){
- buf[0] = ch;
- buf[1] = '\0';
- return 1;
- }
- if(ch > 0xffff)
- return snprintf(buf, sz, "\\U%.8x", ch);
- if(ch >= 0x80)
- return snprintf(buf, sz, "\\u%04x", ch);
- switch(ch){
- case '\n': return buf_put2c(buf, "\\n");
- case '\t': return buf_put2c(buf, "\\t");
- case '\\': return buf_put2c(buf, "\\\\");
- case '\a': return buf_put2c(buf, "\\a");
- case '\b': return buf_put2c(buf, "\\b");
- case '\e': return buf_put2c(buf, "\\e");
- case '\f': return buf_put2c(buf, "\\f");
- case '\r': return buf_put2c(buf, "\\r");
- case '\v': return buf_put2c(buf, "\\v");
- }
- return snprintf(buf, sz, "\\x%02x", ch);
-}
-
-size_t
-u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end, int escape_quotes, int ascii)
-{
- size_t i = *pi, i0;
- uint32_t ch;
- char *start = buf;
- char *blim = start + sz-11;
- assert(sz > 11);
-
- while(i < end && buf < blim){
- // sz-11: leaves room for longest escape sequence
- if(escape_quotes && src[i] == '"'){
- buf += buf_put2c(buf, "\\\"");
- i++;
- }else if(src[i] == '\\'){
- buf += buf_put2c(buf, "\\\\");
- i++;
- }else{
- i0 = i;
- ch = u8_nextmemchar(src, &i);
- if(ascii || !u8_iswprint(ch)){
- buf += u8_escape_wchar(buf, sz - (buf-start), ch);
- }else{
- i = i0;
- do{
- *buf++ = src[i++];
- }while(!isutf(src[i]));
- }
- }
- }
- *buf++ = '\0';
- *pi = i;
- return (buf-start);
-}
-
-char *
-u8_strchr(const char *s, uint32_t ch, size_t *charn)
-{
- size_t i = 0, lasti = 0;
- uint32_t c;
-
- *charn = 0;
- while(s[i]){
- c = u8_nextchar(s, &i);
- if(c == ch){
- /* it's const for us, but not necessarily the caller */
- return (char*)&s[lasti];
- }
- lasti = i;
- (*charn)++;
- }
- return nil;
-}
-
-char *
-u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn)
-{
- size_t i = 0, lasti = 0;
- uint32_t c;
- int csz;
-
- *charn = 0;
- while(i < sz){
- c = csz = 0;
- do{
- c <<= 6;
- c += (uint8_t)s[i++];
- csz++;
- }while(i < sz && !isutf(s[i]));
- c -= offsetsFromUTF8[csz-1];
-
- if(c == ch)
- return (char*)&s[lasti];
- lasti = i;
- (*charn)++;
- }
- return nil;
-}
-
-char *
-u8_memrchr(const char *s, uint32_t ch, size_t sz)
-{
- size_t i = sz-1, tempi = 0;
- uint32_t c;
-
- if(sz == 0)
- return nil;
-
- while(i && !isutf(s[i]))
- i--;
-
- while(1){
- tempi = i;
- c = u8_nextmemchar(s, &tempi);
- if(c == ch)
- return (char*)&s[i];
- if(i == 0)
- break;
- tempi = i;
- u8_dec(s, &i);
- if(i > tempi)
- break;
- }
- return nil;
-}
-
-size_t
-u8_vprintf(const char *fmt, va_list ap)
-{
- size_t cnt, sz, nc, needfree = 0;
- char *buf, tmp[512];
- uint32_t *wcs;
-
- sz = 512;
- buf = tmp;
- cnt = vsnprintf(buf, sz, fmt, ap);
- if((ssize_t)cnt < 0)
- return 0;
- if(cnt >= sz){
- buf = (char*)malloc(cnt + 1);
- needfree = 1;
- vsnprintf(buf, cnt+1, fmt, ap);
- }
- wcs = (uint32_t*)malloc((cnt+1) * sizeof(uint32_t));
- nc = u8_toucs(wcs, cnt+1, buf, cnt);
- wcs[nc] = 0;
-#if defined(__plan9__)
- print("%S", (Rune*)wcs);
-#else
- printf("%ls", (wchar_t*)wcs);
-#endif
- free(wcs);
- if(needfree)
- free(buf);
- return nc;
-}
-
-size_t
-u8_printf(const char *fmt, ...)
-{
- size_t cnt;
- va_list args;
-
- va_start(args, fmt);
- cnt = u8_vprintf(fmt, args);
-
- va_end(args);
- return cnt;
-}
-
-/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
-
- length is in bytes, since without knowing whether the string is valid
- it's hard to know how many characters there are! */
-int
-u8_isvalid(const char *str, int length)
-{
- const uint8_t *p, *pend = (uint8_t*)str + length;
- uint8_t c;
- int ab;
-
- for(p = (uint8_t*)str; p < pend; p++){
- c = *p;
- if(c < 128)
- continue;
- if((c & 0xc0) != 0xc0)
- return 0;
- ab = trailingBytesForUTF8[c];
- if(length < ab)
- return 0;
- length -= ab;
-
- p++;
- /* Check top bits in the second byte */
- if((*p & 0xc0) != 0x80)
- return 0;
-
- /* Check for overlong sequences for each different length */
- switch(ab){
- /* Check for xx00 000x */
- case 1:
- if((c & 0x3e) == 0)
- return 0;
- continue; /* We know there aren't any more bytes to check */
-
- /* Check for 1110 0000, xx0x xxxx */
- case 2:
- if(c == 0xe0 && (*p & 0x20) == 0)
- return 0;
- break;
-
- /* Check for 1111 0000, xx00 xxxx */
- case 3:
- if(c == 0xf0 && (*p & 0x30) == 0)
- return 0;
- break;
-
- /* Check for 1111 1000, xx00 0xxx */
- case 4:
- if(c == 0xf8 && (*p & 0x38) == 0)
- return 0;
- break;
-
- /* Check for leading 0xfe or 0xff and then for 1111 1100, xx00 00xx */
- case 5:
- if(c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
- return 0;
- break;
- }
-
- /* Check for valid bytes after the 2nd, if any; all must start 10 */
- while(--ab > 0)
- if((*(++p) & 0xc0) != 0x80)
- return 0;
- }
-
- return 1;
-}
-
-int
-u8_reverse(char *dest, char * src, size_t len)
-{
- size_t si = 0, di = len;
- uint8_t c;
-
- dest[di] = '\0';
- while(si < len){
- c = (uint8_t)src[si];
- if((~c) & 0x80){
- di--;
- dest[di] = c;
- si++;
- }else{
- switch(c>>4){
- case 0xc:
- case 0xd:
- di -= 2;
- *((int16_t*)&dest[di]) = *((int16_t*)&src[si]);
- si += 2;
- break;
- case 0xe:
- di -= 3;
- dest[di] = src[si];
- *((int16_t*)&dest[di+1]) = *((int16_t*)&src[si+1]);
- si += 3;
- break;
- case 0xf:
- di -= 4;
- *((int32_t*)&dest[di]) = *((int32_t*)&src[si]);
- si += 4;
- break;
- default:
- return 1;
- }
- }
- }
- return 0;
-}
--- a/llt/utf8.h
+++ /dev/null
@@ -1,111 +1,0 @@
-#ifndef __UTF8_H_
-#define __UTF8_H_
-
-/* is c the start of a utf8 sequence? */
-#define isutf(c) (((c)&0xC0) != 0x80)
-
-int u8_iswprint(uint32_t c);
-
-/* convert UTF-8 data to wide character */
-size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
-
-/* the opposite conversion */
-size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
-
-/* single character to UTF-8, returns # bytes written */
-size_t u8_wc_toutf8(char *dest, uint32_t ch);
-
-/* character number to byte offset */
-size_t u8_offset(const char *str, size_t charnum);
-
-/* byte offset to character number */
-size_t u8_charnum(const char *s, size_t offset);
-
-/* return next character, updating an index variable */
-uint32_t u8_nextchar(const char *s, size_t *i);
-
-/* next character without NUL character terminator */
-uint32_t u8_nextmemchar(const char *s, size_t *i);
-
-/* move to next character */
-void u8_inc(const char *s, size_t *i);
-
-/* move to previous character */
-void u8_dec(const char *s, size_t *i);
-
-/* returns length of next utf-8 sequence */
-size_t u8_seqlen(const char *s);
-
-/* returns the # of bytes needed to encode a certain character */
-size_t u8_charlen(uint32_t ch);
-
-/* computes the # of bytes needed to encode a WC string as UTF-8 */
-size_t u8_codingsize(uint32_t *wcstr, size_t n);
-
-char read_escape_control_char(char c);
-
-/* assuming src points to the character after a backslash, read an
- escape sequence, storing the result in dest and returning the number of
- input characters processed */
-size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
-
-/* given a wide character, convert it to an ASCII escape sequence stored in
- buf, where buf is "sz" bytes. returns the number of characters output.
- sz must be at least 3. */
-int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
-
-/* convert a string "src" containing escape sequences to UTF-8 */
-size_t u8_unescape(char *buf, size_t sz, const char *src);
-
-/* convert UTF-8 "src" to escape sequences.
-
- sz is buf size in bytes. must be at least 12.
-
- if escape_quotes is nonzero, quote characters will be escaped.
-
- if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
-
- starts at src[*pi], updates *pi to point to the first unprocessed
- byte of the input.
-
- end is one more than the last allowable value of *pi.
-
- returns number of bytes placed in buf, including a NUL terminator.
-*/
-size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
- int escape_quotes, int ascii);
-
-/* utility predicates used by the above */
-int octal_digit(char c);
-int hex_digit(char c);
-
-/* return a pointer to the first occurrence of ch in s, or nil if not
- found. character index of found character returned in *charn. */
-char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
-
-/* same as the above, but searches a buffer of a given size instead of
- a NUL-terminated string. */
-char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
-
-char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
-
-/* count the number of characters in a UTF-8 string */
-size_t u8_strlen(const char *s);
-
-/* number of columns occupied by a string */
-size_t u8_strwidth(const char *s);
-
-/* printf where the format string and arguments may be in UTF-8.
- you can avoid this function and just use ordinary printf() if the current
- locale is UTF-8. */
-size_t u8_vprintf(const char *fmt, va_list ap);
-size_t u8_printf(const char *fmt, ...);
-
-/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
-int u8_isvalid(const char *str, int length);
-
-/* reverse a UTF-8 string. len is length in bytes. dest and src must both
- be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
-int u8_reverse(char *dest, char *src, size_t len);
-
-#endif
--- a/mkfile
+++ b/mkfile
@@ -2,7 +2,7 @@
BIN=/$objtype/bin
TARG=flisp
-CFLAGS=$CFLAGS -p -D__plan9__ -D__${objtype}__ -I3rd -Illt -Iplan9
+CFLAGS=$CFLAGS -p -D__plan9__ -D__${objtype}__ -I3rd -Iplan9
CLEANFILES=boot.h builtin_fns.h
HFILES=\
@@ -9,15 +9,6 @@
equalhash.h\
flisp.h\
-#CHFILES=\
-# cvalues.c\
-# equal.c\
-# operators.c\
-# print.c\
-# read.c\
-# types.c\
-
-
OFILES=\
builtins.$O\
equalhash.$O\
@@ -32,20 +23,19 @@
print.$O\
equal.$O\
types.$O\
+ bitvector-ops.$O\
+ bitvector.$O\
+ dump.$O\
+ hashing.$O\
+ htable.$O\
+ ios.$O\
+ llt.$O\
+ ptrhash.$O\
+ random.$O\
+ timefuncs.$O\
+ utf8.$O\
3rd/mt19937-64.$O\
3rd/wcwidth.$O\
- llt/bitvector-ops.$O\
- llt/bitvector.$O\
- llt/dump.$O\
- llt/hashing.$O\
- llt/htable.$O\
- llt/int2str.$O\
- llt/ios.$O\
- llt/lltinit.$O\
- llt/ptrhash.$O\
- llt/random.$O\
- llt/timefuncs.$O\
- llt/utf8.$O\
default:V: all
@@ -57,9 +47,7 @@
builtin_fns.h:
sed -n 's/^BUILTIN[_]?(\(".*)/BUILTIN_FN\1/gp' *.c >$target
-flmain.$O: boot.h
-
-#flisp.$O: maxstack.inc opcodes.h builtin_fns.h $CHFILES
+flmain.$O: boot.h builtin_fns.h
flisp.$O: maxstack.inc opcodes.h builtin_fns.h
%.$O: %.c
--- /dev/null
+++ b/ptrhash.c
@@ -1,0 +1,39 @@
+/*
+ pointer hash table
+ optimized for storing info about particular values
+*/
+
+#include "llt.h"
+
+#define OP_EQ(x, y) ((x) == (y))
+
+#ifdef BITS64
+static uint64_t
+_pinthash(uint64_t key)
+{
+ key = (~key) + (key << 21); // key = (key << 21) - key - 1;
+ key = key ^ (key >> 24);
+ key = (key + (key << 3)) + (key << 8); // key * 265
+ key = key ^ (key >> 14);
+ key = (key + (key << 2)) + (key << 4); // key * 21
+ key = key ^ (key >> 28);
+ key = key + (key << 31);
+ return key;
+}
+#else
+static uint32_t
+_pinthash(uint32_t a)
+{
+ a = (a+0x7ed55d16) + (a<<12);
+ a = (a^0xc761c23c) ^ (a>>19);
+ a = (a+0x165667b1) + (a<<5);
+ a = (a+0xd3a2646c) ^ (a<<9);
+ a = (a+0xfd7046c5) + (a<<3);
+ a = (a^0xb55a4f09) ^ (a>>16);
+ return a;
+}
+#endif
+
+#include "htable.inc"
+
+HTIMPL(ptrhash, _pinthash, OP_EQ)
--- /dev/null
+++ b/random.c
@@ -1,0 +1,39 @@
+/*
+ random numbers
+*/
+#include "llt.h"
+#include "mt19937-64.h"
+#include "timefuncs.h"
+
+static mt19937_64 ctx;
+
+uint64_t
+genrand_uint64(void)
+{
+ return genrand64_int64(&ctx);
+}
+
+uint32_t
+genrand_uint32(void)
+{
+ return genrand64_int64(&ctx) >> 32;
+}
+
+int64_t
+genrand_int63(void)
+{
+ return genrand64_int63(&ctx);
+}
+
+double
+genrand_double(void)
+{
+ return genrand64_real1(&ctx);
+}
+
+void
+randomize(void)
+{
+ unsigned long long tm = i64time();
+ init_by_array64(&ctx, &tm, 1);
+}
--- /dev/null
+++ b/random.h
@@ -1,0 +1,10 @@
+#ifndef RANDOM_H_
+#define RANDOM_H_
+
+void randomize(void);
+double genrand_double(void);
+uint64_t genrand_uint64(void);
+uint32_t genrand_uint32(void);
+int64_t genrand_int63(void);
+
+#endif
--- /dev/null
+++ b/timefuncs.c
@@ -1,0 +1,116 @@
+#include "platform.h"
+
+#if defined(__plan9__)
+double
+floattime(void)
+{
+ return (double)nsec() / 1.0e9;
+}
+#else
+double
+tv2float(struct timeval *tv)
+{
+ return (double)tv->tv_sec + (double)tv->tv_usec/1.0e6;
+}
+
+double
+diff_time(struct timeval *tv1, struct timeval *tv2)
+{
+ return tv2float(tv1) - tv2float(tv2);
+}
+#endif
+
+// return as many bits of system randomness as we can get our hands on
+uint64_t
+i64time(void)
+{
+ uint64_t a;
+#if defined(__plan9__)
+ a = nsec();
+#else
+ struct timeval now;
+ gettimeofday(&now, nil);
+ a = (((uint64_t)now.tv_sec)<<32) + (uint64_t)now.tv_usec;
+#endif
+
+ return a;
+}
+
+double
+clock_now(void)
+{
+#if defined(__plan9__)
+ return floattime();
+#else
+ struct timeval now;
+ gettimeofday(&now, nil);
+ return tv2float(&now);
+#endif
+}
+
+void
+timestring(double seconds, char *buffer, size_t len)
+{
+#if defined(__plan9__)
+ Tm tm;
+ snprint(buffer, len, "%τ", tmfmt(tmtime(&tm, seconds, tzload("local")), nil));
+#else
+ time_t tme = (time_t)seconds;
+
+ char *fmt = "%c"; /* needed to suppress GCC warning */
+ struct tm tm;
+
+ localtime_r(&tme, &tm);
+ strftime(buffer, len, fmt, &tm);
+#endif
+}
+
+#if defined(__plan9__)
+double
+parsetime(const char *str)
+{
+ Tm tm;
+ if(tmparse(&tm, "?WWW, ?MM ?DD hh:mm:ss ?Z YYYY", str, tzload("local"), nil) == nil)
+ return -1;
+ return tmnorm(&tm);
+}
+#else
+double
+parsetime(const char *str)
+{
+ char *fmt = "%c"; /* needed to suppress GCC warning */
+ char *res;
+ time_t t;
+ struct tm tm;
+
+ res = strptime(str, fmt, &tm);
+ if(res != nil){
+ /* Not set by strptime(); tells mktime() to determine
+ * whether daylight saving time is in effect
+ */
+ tm.tm_isdst = -1;
+ t = mktime(&tm);
+ if(t == (time_t)-1)
+ return -1;
+ return (double)t;
+ }
+ return -1;
+}
+#endif
+
+void
+sleep_ms(int ms)
+{
+ if(ms == 0)
+ return;
+
+#if defined(__plan9__)
+ sleep(ms);
+#else
+ struct timeval timeout;
+
+ timeout.tv_sec = ms/1000;
+ timeout.tv_usec = (ms % 1000) * 1000;
+ select(0, nil, nil, nil, &timeout);
+#endif
+}
--- /dev/null
+++ b/timefuncs.h
@@ -1,0 +1,10 @@
+#ifndef TIMEFUNCS_H_
+#define TIMEFUNCS_H_
+
+uint64_t i64time(void);
+double clock_now(void);
+void timestring(double seconds, char *buffer, size_t len);
+double parsetime(const char *str);
+void sleep_ms(int ms);
+
+#endif
--- /dev/null
+++ b/utf8.c
@@ -1,0 +1,707 @@
+/*
+ Basic UTF-8 manipulation routines
+ by Jeff Bezanson
+ placed in the public domain Fall 2005
+
+ This code is designed to provide the utilities you need to manipulate
+ UTF-8 as an internal string encoding. These functions do not perform the
+ error checking normally needed when handling UTF-8 data, so if you happen
+ to be from the Unicode Consortium you will want to flay me alive.
+ I do this because error checking can be performed at the boundaries (I/O),
+ with these routines reserved for higher performance on data known to be
+ valid.
+ A UTF-8 validation routine is included.
+*/
+
+#include "llt.h"
+
+static const uint32_t offsetsFromUTF8[6] = {
+ 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL
+};
+
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+// straight from musl
+int
+u8_iswprint(uint32_t c)
+{
+ if(c < 0xff)
+ return ((c+1) & 0x7f) >= 0x21;
+ if(c < 0x2028 || c-0x202a < 0xd800-0x202a || c-0xe000 < 0xfff9-0xe000)
+ return 1;
+ return !(c-0xfffc > 0x10ffff-0xfffc || (c&0xfffe) == 0xfffe);
+}
+
+/* returns length of next utf-8 sequence */
+size_t
+u8_seqlen(const char *s)
+{
+ return trailingBytesForUTF8[(unsigned int)(uint8_t)s[0]] + 1;
+}
+
+/* returns the # of bytes needed to encode a certain character
+ 0 means the character cannot (or should not) be encoded. */
+size_t
+u8_charlen(uint32_t ch)
+{
+ if(ch < 0x80)
+ return 1;
+ if(ch < 0x800)
+ return 2;
+ if(ch < 0x10000)
+ return 3;
+ if(ch < 0x110000)
+ return 4;
+ return 0;
+}
+
+size_t
+u8_codingsize(uint32_t *wcstr, size_t n)
+{
+ size_t i, c = 0;
+
+ for(i = 0; i < n; i++)
+ c += u8_charlen(wcstr[i]);
+ return c;
+}
+
+/* conversions without error checking
+ only works for valid UTF-8, i.e. no 5- or 6-byte sequences
+ srcsz = source size in bytes
+ sz = dest size in # of wide characters
+
+ returns # characters converted
+ if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
+*/
+size_t
+u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz)
+{
+ uint32_t ch;
+ const char *src_end = src + srcsz;
+ size_t nb, i = 0;
+
+ if(sz == 0 || srcsz == 0)
+ return 0;
+
+ while(i < sz){
+ if(!isutf(*src)){ // invalid sequence
+ dest[i++] = 0xFFFD;
+ src++;
+ if(src >= src_end)
+ break;
+ continue;
+ }
+ nb = trailingBytesForUTF8[(uint8_t)*src];
+ if(src + nb >= src_end)
+ break;
+ ch = 0;
+ switch(nb){
+ case 5: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
+ case 4: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
+ case 3: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
+ case 2: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
+ case 1: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
+ case 0: ch += (uint8_t)*src++;
+ }
+ ch -= offsetsFromUTF8[nb];
+ dest[i++] = ch;
+ }
+ return i;
+}
+
+/*
+ * srcsz = number of source characters
+ * sz = size of dest buffer in bytes
+ * returns # bytes stored in dest
+ * the destination string will never be bigger than the source string.
+*/
+size_t
+u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz)
+{
+ uint32_t ch;
+ size_t i = 0;
+ char *dest0 = dest;
+ char *dest_end = dest + sz;
+
+ while(i < srcsz){
+ ch = src[i];
+ if(ch < 0x80){
+ if(dest >= dest_end)
+ break;
+ *dest++ = (char)ch;
+ }else if(ch < 0x800){
+ if(dest >= dest_end-1)
+ break;
+ *dest++ = (ch>>6) | 0xC0;
+ *dest++ = (ch & 0x3F) | 0x80;
+ }else if(ch < 0x10000){
+ if(dest >= dest_end-2)
+ break;
+ *dest++ = (ch>>12) | 0xE0;
+ *dest++ = ((ch>>6) & 0x3F) | 0x80;
+ *dest++ = (ch & 0x3F) | 0x80;
+ }else if(ch < 0x110000){
+ if(dest >= dest_end-3)
+ break;
+ *dest++ = (ch>>18) | 0xF0;
+ *dest++ = ((ch>>12) & 0x3F) | 0x80;
+ *dest++ = ((ch>>6) & 0x3F) | 0x80;
+ *dest++ = (ch & 0x3F) | 0x80;
+ }
+ i++;
+ }
+ return dest-dest0;
+}
+
+size_t
+u8_wc_toutf8(char *dest, uint32_t ch)
+{
+ if(ch < 0x80){
+ dest[0] = (char)ch;
+ return 1;
+ }
+ if(ch < 0x800){
+ dest[0] = (ch>>6) | 0xC0;
+ dest[1] = (ch & 0x3F) | 0x80;
+ return 2;
+ }
+ if(ch < 0x10000){
+ dest[0] = (ch>>12) | 0xE0;
+ dest[1] = ((ch>>6) & 0x3F) | 0x80;
+ dest[2] = (ch & 0x3F) | 0x80;
+ return 3;
+ }
+ if(ch < 0x110000){
+ dest[0] = (ch>>18) | 0xF0;
+ dest[1] = ((ch>>12) & 0x3F) | 0x80;
+ dest[2] = ((ch>>6) & 0x3F) | 0x80;
+ dest[3] = (ch & 0x3F) | 0x80;
+ return 4;
+ }
+ return 0;
+}
+
+/* charnum => byte offset */
+size_t
+u8_offset(const char *s, size_t charnum)
+{
+ size_t i = 0;
+
+ while(charnum > 0){
+ if(s[i++] & 0x80)
+ (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
+ charnum--;
+ }
+ return i;
+}
+
+/* byte offset => charnum */
+size_t
+u8_charnum(const char *s, size_t offset)
+{
+ size_t charnum = 0, i = 0;
+
+ while(i < offset){
+ if((s[i++] & 0x80) != 0 && !isutf(s[++i]) && !isutf(s[++i]))
+ i++;
+ charnum++;
+ }
+ return charnum;
+}
+
+/* number of characters in NUL-terminated string */
+size_t
+u8_strlen(const char *s)
+{
+ size_t count = 0;
+ size_t i = 0, lasti;
+
+ while(1) {
+ lasti = i;
+ while(s[i] > 0)
+ i++;
+ count += (i-lasti);
+ if(s[i++] == 0)
+ break;
+ (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
+ count++;
+ }
+ return count;
+}
+
+size_t
+u8_strwidth(const char *s)
+{
+ uint32_t ch;
+ size_t nb, tot = 0;
+ int w;
+ signed char sc;
+
+ while((sc = (signed char)*s) != 0){
+ if(sc >= 0){
+ s++;
+ if(sc)
+ tot++;
+ }else{
+ if(!isutf(sc)){
+ tot++;
+ s++;
+ continue;
+ }
+ nb = trailingBytesForUTF8[(uint8_t)sc];
+ ch = 0;
+ switch(nb){
+ case 5: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
+ case 4: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
+ case 3: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
+ case 2: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
+ case 1: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
+ case 0: ch += (uint8_t)*s++;
+ }
+ ch -= offsetsFromUTF8[nb];
+ w = wcwidth(ch); // might return -1
+ if(w > 0)
+ tot += w;
+ }
+ }
+ return tot;
+}
+
+/* reads the next utf-8 sequence out of a string, updating an index */
+uint32_t
+u8_nextchar(const char *s, size_t *i)
+{
+ uint32_t ch = 0;
+ size_t sz = 0;
+
+ do{
+ ch <<= 6;
+ ch += (uint8_t)s[(*i)];
+ sz++;
+ }while(s[*i] && (++(*i)) && !isutf(s[*i]));
+ return ch - offsetsFromUTF8[sz-1];
+}
+
+/* next character without NUL character terminator */
+uint32_t
+u8_nextmemchar(const char *s, size_t *i)
+{
+ uint32_t ch = 0;
+ size_t sz = 0;
+
+ do{
+ ch <<= 6;
+ ch += (uint8_t)s[(*i)++];
+ sz++;
+ }while(!isutf(s[*i]));
+ return ch - offsetsFromUTF8[sz-1];
+}
+
+void
+u8_inc(const char *s, size_t *i)
+{
+ (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i));
+}
+
+void
+u8_dec(const char *s, size_t *i)
+{
+ (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
+}
+
+int
+octal_digit(char c)
+{
+ return (c >= '0' && c <= '7');
+}
+
+int
+hex_digit(char c)
+{
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+
+char
+read_escape_control_char(char c)
+{
+ switch(c){
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 'e': return '\e';
+ case 'f': return '\f';
+ case 'r': return '\r';
+ case 'v': return '\v';
+ }
+ return c;
+}
+
+/* assumes that src points to the character after a backslash
+ returns number of input characters processed, 0 if error */
+size_t
+u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
+{
+ assert(ssz > 0);
+ uint32_t ch;
+ char digs[10];
+ int dno = 0, ndig;
+ size_t i = 1;
+ char c0 = str[0];
+
+ if(octal_digit(c0)){
+ i = 0;
+ do{
+ digs[dno++] = str[i++];
+ }while(i < ssz && octal_digit(str[i]) && dno < 3);
+ digs[dno] = '\0';
+ ch = strtol(digs, nil, 8);
+ }else if((c0 == 'x' && (ndig = 2)) || (c0 == 'u' && (ndig = 4)) || (c0 == 'U' && (ndig = 8))){
+ while(i<ssz && hex_digit(str[i]) && dno < ndig)
+ digs[dno++] = str[i++];
+ if(dno == 0)
+ return 0;
+ digs[dno] = '\0';
+ ch = strtol(digs, nil, 16);
+ }else{
+ ch = (uint32_t)read_escape_control_char(c0);
+ }
+ *dest = ch;
+
+ return i;
+}
+
+/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
+ example: u8_unescape(mybuf, 256, "hello\\u220e")
+ note the double backslash is needed if called on a C string literal */
+size_t
+u8_unescape(char *buf, size_t sz, const char *src)
+{
+ size_t c = 0, amt;
+ uint32_t ch;
+ char temp[4];
+
+ while(*src && c < sz){
+ if(*src == '\\'){
+ src++;
+ amt = u8_read_escape_sequence(src, 1000, &ch);
+ }else{
+ ch = (uint32_t)*src;
+ amt = 1;
+ }
+ src += amt;
+ amt = u8_wc_toutf8(temp, ch);
+ if(amt > sz-c)
+ break;
+ memmove(&buf[c], temp, amt);
+ c += amt;
+ }
+ if(c < sz)
+ buf[c] = '\0';
+ return c;
+}
+
+static inline int
+buf_put2c(char *buf, const char *src)
+{
+ buf[0] = src[0];
+ buf[1] = src[1];
+ buf[2] = '\0';
+ return 2;
+}
+
+int
+u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
+{
+ assert(sz > 2);
+ if(ch >= 0x20 && ch < 0x7f){
+ buf[0] = ch;
+ buf[1] = '\0';
+ return 1;
+ }
+ if(ch > 0xffff)
+ return snprintf(buf, sz, "\\U%.8x", ch);
+ if(ch >= 0x80)
+ return snprintf(buf, sz, "\\u%04x", ch);
+ switch(ch){
+ case '\n': return buf_put2c(buf, "\\n");
+ case '\t': return buf_put2c(buf, "\\t");
+ case '\\': return buf_put2c(buf, "\\\\");
+ case '\a': return buf_put2c(buf, "\\a");
+ case '\b': return buf_put2c(buf, "\\b");
+ case '\e': return buf_put2c(buf, "\\e");
+ case '\f': return buf_put2c(buf, "\\f");
+ case '\r': return buf_put2c(buf, "\\r");
+ case '\v': return buf_put2c(buf, "\\v");
+ }
+ return snprintf(buf, sz, "\\x%02x", ch);
+}
+
+size_t
+u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end, int escape_quotes, int ascii)
+{
+ size_t i = *pi, i0;
+ uint32_t ch;
+ char *start = buf;
+ char *blim = start + sz-11;
+ assert(sz > 11);
+
+ while(i < end && buf < blim){
+ // sz-11: leaves room for longest escape sequence
+ if(escape_quotes && src[i] == '"'){
+ buf += buf_put2c(buf, "\\\"");
+ i++;
+ }else if(src[i] == '\\'){
+ buf += buf_put2c(buf, "\\\\");
+ i++;
+ }else{
+ i0 = i;
+ ch = u8_nextmemchar(src, &i);
+ if(ascii || !u8_iswprint(ch)){
+ buf += u8_escape_wchar(buf, sz - (buf-start), ch);
+ }else{
+ i = i0;
+ do{
+ *buf++ = src[i++];
+ }while(!isutf(src[i]));
+ }
+ }
+ }
+ *buf++ = '\0';
+ *pi = i;
+ return (buf-start);
+}
+
+char *
+u8_strchr(const char *s, uint32_t ch, size_t *charn)
+{
+ size_t i = 0, lasti = 0;
+ uint32_t c;
+
+ *charn = 0;
+ while(s[i]){
+ c = u8_nextchar(s, &i);
+ if(c == ch){
+ /* it's const for us, but not necessarily the caller */
+ return (char*)&s[lasti];
+ }
+ lasti = i;
+ (*charn)++;
+ }
+ return nil;
+}
+
+char *
+u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn)
+{
+ size_t i = 0, lasti = 0;
+ uint32_t c;
+ int csz;
+
+ *charn = 0;
+ while(i < sz){
+ c = csz = 0;
+ do{
+ c <<= 6;
+ c += (uint8_t)s[i++];
+ csz++;
+ }while(i < sz && !isutf(s[i]));
+ c -= offsetsFromUTF8[csz-1];
+
+ if(c == ch)
+ return (char*)&s[lasti];
+ lasti = i;
+ (*charn)++;
+ }
+ return nil;
+}
+
+char *
+u8_memrchr(const char *s, uint32_t ch, size_t sz)
+{
+ size_t i = sz-1, tempi = 0;
+ uint32_t c;
+
+ if(sz == 0)
+ return nil;
+
+ while(i && !isutf(s[i]))
+ i--;
+
+ while(1){
+ tempi = i;
+ c = u8_nextmemchar(s, &tempi);
+ if(c == ch)
+ return (char*)&s[i];
+ if(i == 0)
+ break;
+ tempi = i;
+ u8_dec(s, &i);
+ if(i > tempi)
+ break;
+ }
+ return nil;
+}
+
+size_t
+u8_vprintf(const char *fmt, va_list ap)
+{
+ size_t cnt, sz, nc, needfree = 0;
+ char *buf, tmp[512];
+ uint32_t *wcs;
+
+ sz = 512;
+ buf = tmp;
+ cnt = vsnprintf(buf, sz, fmt, ap);
+ if((ssize_t)cnt < 0)
+ return 0;
+ if(cnt >= sz){
+ buf = (char*)malloc(cnt + 1);
+ needfree = 1;
+ vsnprintf(buf, cnt+1, fmt, ap);
+ }
+ wcs = (uint32_t*)malloc((cnt+1) * sizeof(uint32_t));
+ nc = u8_toucs(wcs, cnt+1, buf, cnt);
+ wcs[nc] = 0;
+#if defined(__plan9__)
+ print("%S", (Rune*)wcs);
+#else
+ printf("%ls", (wchar_t*)wcs);
+#endif
+ free(wcs);
+ if(needfree)
+ free(buf);
+ return nc;
+}
+
+size_t
+u8_printf(const char *fmt, ...)
+{
+ size_t cnt;
+ va_list args;
+
+ va_start(args, fmt);
+ cnt = u8_vprintf(fmt, args);
+
+ va_end(args);
+ return cnt;
+}
+
+/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
+
+ length is in bytes, since without knowing whether the string is valid
+ it's hard to know how many characters there are! */
+int
+u8_isvalid(const char *str, int length)
+{
+ const uint8_t *p, *pend = (uint8_t*)str + length;
+ uint8_t c;
+ int ab;
+
+ for(p = (uint8_t*)str; p < pend; p++){
+ c = *p;
+ if(c < 128)
+ continue;
+ if((c & 0xc0) != 0xc0)
+ return 0;
+ ab = trailingBytesForUTF8[c];
+ if(length < ab)
+ return 0;
+ length -= ab;
+
+ p++;
+ /* Check top bits in the second byte */
+ if((*p & 0xc0) != 0x80)
+ return 0;
+
+ /* Check for overlong sequences for each different length */
+ switch(ab){
+ /* Check for xx00 000x */
+ case 1:
+ if((c & 0x3e) == 0)
+ return 0;
+ continue; /* We know there aren't any more bytes to check */
+
+ /* Check for 1110 0000, xx0x xxxx */
+ case 2:
+ if(c == 0xe0 && (*p & 0x20) == 0)
+ return 0;
+ break;
+
+ /* Check for 1111 0000, xx00 xxxx */
+ case 3:
+ if(c == 0xf0 && (*p & 0x30) == 0)
+ return 0;
+ break;
+
+ /* Check for 1111 1000, xx00 0xxx */
+ case 4:
+ if(c == 0xf8 && (*p & 0x38) == 0)
+ return 0;
+ break;
+
+ /* Check for leading 0xfe or 0xff and then for 1111 1100, xx00 00xx */
+ case 5:
+ if(c == 0xfe || c == 0xff || (c == 0xfc && (*p & 0x3c) == 0))
+ return 0;
+ break;
+ }
+
+ /* Check for valid bytes after the 2nd, if any; all must start 10 */
+ while(--ab > 0)
+ if((*(++p) & 0xc0) != 0x80)
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+u8_reverse(char *dest, char * src, size_t len)
+{
+ size_t si = 0, di = len;
+ uint8_t c;
+
+ dest[di] = '\0';
+ while(si < len){
+ c = (uint8_t)src[si];
+ if((~c) & 0x80){
+ di--;
+ dest[di] = c;
+ si++;
+ }else{
+ switch(c>>4){
+ case 0xc:
+ case 0xd:
+ di -= 2;
+ *((int16_t*)&dest[di]) = *((int16_t*)&src[si]);
+ si += 2;
+ break;
+ case 0xe:
+ di -= 3;
+ dest[di] = src[si];
+ *((int16_t*)&dest[di+1]) = *((int16_t*)&src[si+1]);
+ si += 3;
+ break;
+ case 0xf:
+ di -= 4;
+ *((int32_t*)&dest[di]) = *((int32_t*)&src[si]);
+ si += 4;
+ break;
+ default:
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
--- /dev/null
+++ b/utf8.h
@@ -1,0 +1,111 @@
+#ifndef __UTF8_H_
+#define __UTF8_H_
+
+/* is c the start of a utf8 sequence? */
+#define isutf(c) (((c)&0xC0) != 0x80)
+
+int u8_iswprint(uint32_t c);
+
+/* convert UTF-8 data to wide character */
+size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
+
+/* the opposite conversion */
+size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
+
+/* single character to UTF-8, returns # bytes written */
+size_t u8_wc_toutf8(char *dest, uint32_t ch);
+
+/* character number to byte offset */
+size_t u8_offset(const char *str, size_t charnum);
+
+/* byte offset to character number */
+size_t u8_charnum(const char *s, size_t offset);
+
+/* return next character, updating an index variable */
+uint32_t u8_nextchar(const char *s, size_t *i);
+
+/* next character without NUL character terminator */
+uint32_t u8_nextmemchar(const char *s, size_t *i);
+
+/* move to next character */
+void u8_inc(const char *s, size_t *i);
+
+/* move to previous character */
+void u8_dec(const char *s, size_t *i);
+
+/* returns length of next utf-8 sequence */
+size_t u8_seqlen(const char *s);
+
+/* returns the # of bytes needed to encode a certain character */
+size_t u8_charlen(uint32_t ch);
+
+/* computes the # of bytes needed to encode a WC string as UTF-8 */
+size_t u8_codingsize(uint32_t *wcstr, size_t n);
+
+char read_escape_control_char(char c);
+
+/* assuming src points to the character after a backslash, read an
+ escape sequence, storing the result in dest and returning the number of
+ input characters processed */
+size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
+
+/* given a wide character, convert it to an ASCII escape sequence stored in
+ buf, where buf is "sz" bytes. returns the number of characters output.
+ sz must be at least 3. */
+int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
+
+/* convert a string "src" containing escape sequences to UTF-8 */
+size_t u8_unescape(char *buf, size_t sz, const char *src);
+
+/* convert UTF-8 "src" to escape sequences.
+
+ sz is buf size in bytes. must be at least 12.
+
+ if escape_quotes is nonzero, quote characters will be escaped.
+
+ if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
+
+ starts at src[*pi], updates *pi to point to the first unprocessed
+ byte of the input.
+
+ end is one more than the last allowable value of *pi.
+
+ returns number of bytes placed in buf, including a NUL terminator.
+*/
+size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
+ int escape_quotes, int ascii);
+
+/* utility predicates used by the above */
+int octal_digit(char c);
+int hex_digit(char c);
+
+/* return a pointer to the first occurrence of ch in s, or nil if not
+ found. character index of found character returned in *charn. */
+char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
+
+/* same as the above, but searches a buffer of a given size instead of
+ a NUL-terminated string. */
+char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
+
+char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
+
+/* count the number of characters in a UTF-8 string */
+size_t u8_strlen(const char *s);
+
+/* number of columns occupied by a string */
+size_t u8_strwidth(const char *s);
+
+/* printf where the format string and arguments may be in UTF-8.
+ you can avoid this function and just use ordinary printf() if the current
+ locale is UTF-8. */
+size_t u8_vprintf(const char *fmt, va_list ap);
+size_t u8_printf(const char *fmt, ...);
+
+/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
+int u8_isvalid(const char *str, int length);
+
+/* reverse a UTF-8 string. len is length in bytes. dest and src must both
+ be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
+int u8_reverse(char *dest, char *src, size_t len);
+
+#endif