ref: 8432c18eaebfda38826a6e648e7380d803c67b8e
parent: 4b98b0f5f4b4ee84286350c1b69c0a6883662e93
author: Lennart Augustsson <lennart@augustsson.net>
date: Fri Aug 30 15:12:35 EDT 2024
Use foreign pointers for utf8/bytestring nodes. This reuses all the GC machinery already in place.
--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -59,6 +59,10 @@
#define MALLOC malloc
#endif
+#if !defined(REALLOC)
+#define REALLOC realloc
+#endif
+
#if !defined(FREE)
#define FREE free
#endif
@@ -188,7 +192,7 @@
T_IO_CCALL, T_IO_GC, T_DYNSYM,
T_NEWCASTRINGLEN, T_PEEKCASTRING, T_PEEKCASTRINGLEN,
T_FROMUTF8,
- T_STR,
+ T_BSTR,
T_LAST_TAG,
};
#if 0
@@ -225,7 +229,7 @@
#endif
struct ioarray;
-struct ustring;
+struct bytestring;
struct forptr;
typedef struct node {
@@ -237,7 +241,6 @@
struct node *uuarg;
value_t uuvalue;
flt_t uufloatvalue;
- struct ustring *uustring;
const char *uucstring;
void *uuptr;
HsFunPtr uufunptr;
@@ -256,11 +259,11 @@
#define SETDBLVALUE(p,v) (p)->uarg.uufloatvalue = v
#define FUN(p) (p)->ufun.uufun
#define ARG(p) (p)->uarg.uuarg
-#define STR(p) (p)->uarg.uustring
#define CSTR(p) (p)->uarg.uucstring
#define PTR(p) (p)->uarg.uuptr
#define FUNPTR(p) (p)->uarg.uufunptr
#define FORPTR(p) (p)->uarg.uuforptr
+#define BSTR(p) (p)->uarg.uuforptr->payload
#define ARR(p) (p)->uarg.uuarray
#define INDIR(p) ARG(p)
#define NODE_SIZE sizeof(node)
@@ -269,11 +272,11 @@
node *cells; /* All cells */
/*
- * UTF-8 encoded strings
+ * byte arrays
*/
-struct ustring {
+struct bytestring {
size_t size;
- unsigned char string[1];
+ void *string;
};
/*
@@ -313,10 +316,16 @@
int marked; /* mark bit for GC */
};
+/*
+ * Foreign pointers are also used to represent bytestrings.
+ * The difference between a foreign pointer and a bytestring
+ * is that we can serialize the latter.
+ * The size field is non-zero only for bytestrings.
+ */
struct forptr {
struct forptr *next; /* the next ForeignPtr that shares the same finilizer */
- void *payload; /* the actual pointer to allocated data */
struct final *finalizer; /* the finalizer for this ForeignPtr */
+ struct bytestring payload; /* the actual pointer to allocated data, and maybe a size */
};
struct final *final_root = 0; /* root of all allocated foreign pointers, linked by next */
@@ -360,6 +369,7 @@
counter_t num_free;
counter_t num_arr_alloc;
counter_t num_arr_free;
+counter_t num_fin_alloc;
counter_t num_fin_free;
#define BITS_PER_WORD (sizeof(bits_t) * 8)
@@ -425,7 +435,7 @@
#if WANT_TICK
struct tick_entry {
- struct ustring *tick_name;
+ struct bytestring tick_name;
counter_t tick_count;
} *tick_table = 0;
size_t tick_table_size;
@@ -433,7 +443,7 @@
/* Allocate a new tick table entry and return the index. */
size_t
-add_tick_table(struct ustring *name)
+add_tick_table(struct bytestring name)
{
if (!tick_table) {
tick_table_size = 100;
@@ -444,7 +454,7 @@
}
if (tick_index >= tick_table_size) {
tick_table_size *= 2;
- tick_table = realloc(tick_table, tick_table_size * sizeof(struct tick_entry));
+ tick_table = REALLOC(tick_table, tick_table_size * sizeof(struct tick_entry));
if (!tick_table)
memerr();
}
@@ -470,7 +480,7 @@
for (size_t i = 0; i < tick_index; i++) {
counter_t n = tick_table[i].tick_count;
if (n)
- fprintf(f, "%-60s %10"PRIcounter"\n", tick_table[i].tick_name->string, n);
+ fprintf(f, "%-60s %10"PRIcounter"\n", (char *)tick_table[i].tick_name.string, n);
}
}
#endif
@@ -987,6 +997,7 @@
break;
}
case T_FORPTR:
+ case T_BSTR:
FORPTR(n)->finalizer->marked = 1;
goto fin;
@@ -1483,11 +1494,16 @@
}
#endif
+struct forptr *mkForPtr(struct bytestring bs);
+
NODEPTR
-mkStrNode(struct ustring *str)
+mkStrNode(struct bytestring str)
{
- NODEPTR n = alloc_node(T_STR);
- STR(n) = str;
+ NODEPTR n = alloc_node(T_BSTR);
+ struct forptr *fp = mkForPtr(str); /* Create a foreign pointer */
+ fp->finalizer->final = (HsFunPtr)FREE; /* and set the finalizer to just free it */
+ FORPTR(n) = fp;
+ //printf("mkForPtr n=%p fp=%p %d %s payload.string=%p\n", n, fp, (int)FORPTR(n)->payload.size, (char*)FORPTR(n)->payload.string, FORPTR(n)->payload.string);
return n;
}
@@ -1530,11 +1546,12 @@
* finalizer for read UTF-8 strings.
* Fix this if there is a lot of deserialization.
*/
-struct ustring *
+struct bytestring
parse_string(BFILE *f)
{
+ struct bytestring bs;
size_t sz = 20;
- struct ustring *buffer = MALLOC(sizeof(struct ustring) + sz);
+ uint8_t *buffer = MALLOC(sz);
size_t i;
int c;
@@ -1546,21 +1563,25 @@
break;
if (i >= sz) {
sz *= 2;
- buffer = realloc(buffer, sizeof(struct ustring) + sz);
+ buffer = REALLOC(buffer, sz);
if (!buffer)
memerr();
}
if (c == '\\') {
- buffer->string[i++] = (char)parse_int(f);
+ buffer[i++] = (uint8_t)parse_int(f);
if (!gobble(f, '&'))
ERR("parse string");
} else {
- buffer->string[i++] = c;
+ buffer[i++] = c;
}
}
- buffer->size = i;
- buffer->string[i++] = 0;
- return realloc(buffer, sizeof(struct ustring) + i);
+ buffer[i] = 0; /* add a trailing 0 in case we need a C string */
+ buffer = REALLOC(buffer, i + 1);
+
+ bs.size = i;
+ bs.string = buffer;
+ //printf("parse_string %d %s\n", (int)bs.size, (char*)bs.string);
+ return bs;
}
NODEPTR
@@ -1863,11 +1884,12 @@
}
void
-print_string(BFILE *f, struct ustring *p)
+print_string(BFILE *f, struct bytestring bs)
{
+ uint8_t *str = bs.string;
putb('"', f);
- for (size_t i = 0; i < p->size; i++) {
- int c = p->string[i];
+ for (size_t i = 0; i < bs.size; i++) {
+ int c = str[i];
if (c == '"' || c == '\\' || c < ' ' || c > '~') {
putb('\\', f);
putdecb(c, f);
@@ -1974,8 +1996,8 @@
case T_FORPTR:
ERR("Cannot serialize foreign pointers");
break;
- case T_STR:
- print_string(f, STR(n));
+ case T_BSTR:
+ print_string(f, FORPTR(n)->payload);
break;
case T_IO_CCALL: putb('^', f); putsb(FFI_IX(GETVALUE(n)).ffi_name, f); break;
case T_BADDYN: putb('^', f); putsb(CSTR(n), f); break;
@@ -2200,26 +2222,34 @@
}
struct forptr*
-mkForPtr(void *p)
+mkForPtr(struct bytestring bs)
{
struct final *fin = malloc(sizeof(struct final));
struct forptr *fp = malloc(sizeof(struct forptr));
if (!fin || !fp)
memerr();
+ num_fin_alloc++;
//printf("mkForPtr p=%p fin=%p fp=%p\n", p, fin, fp);
fin->next = final_root;
final_root = fin;
fin->final = 0;
- fin->arg = p;
+ fin->arg = bs.string;
fin->back = fp;
fin->marked = 0;
fp->next = 0;
- fp->payload = p;
+ fp->payload = bs;
fp->finalizer = fin;
return fp;
}
struct forptr*
+mkForPtrP(void *p)
+{
+ struct bytestring bs = { 0, p };
+ return mkForPtr(bs);
+}
+
+struct forptr*
addForPtr(struct forptr *ofp, int s)
{
struct forptr *fp = malloc(sizeof(struct forptr));
@@ -2228,7 +2258,8 @@
memerr();
fp->next = ofp;
fin->back = fp;
- fp->payload = (char*)ofp->payload + s;
+ fp->payload.size = ofp->payload.size - s;
+ fp->payload.string = ofp->payload.string + s;
fp->finalizer = fin;
return fp;
}
@@ -2278,11 +2309,13 @@
}
NODEPTR
-mkStringU(struct ustring *str)
+mkStringU(struct bytestring bs)
{
- BFILE *ubuf = add_utf8(openb_buf(str->string, str->size));
+ BFILE *ubuf = add_utf8(openb_buf(bs.string, bs.size));
NODEPTR n, *np, nc;
+ //printf("mkStringU %d %s\n", (int)bs.size, (char*)bs.string);
+
n = mkNil();
np = &n;
for(;;) {
@@ -2392,7 +2425,7 @@
for (offs = 0;;) {
if (offs >= sz - 4) {
sz *= 2;
- name = realloc(name, sz);
+ name = REALLOC(name, sz);
if (!name)
memerr();
}
@@ -2653,7 +2686,7 @@
ap:
case T_AP: PUSH(n); n = FUN(n); goto top;
- case T_STR: RET;
+ case T_BSTR: RET;
case T_INT: RET;
case T_DBL: RET;
case T_PTR: RET;
@@ -2801,7 +2834,7 @@
//printf("T_FP2P\n");
xfp = evalforptr(ARG(TOP(0))); POP(1); n = TOP(-1);
//printf("T_FP2P xfp=%p, payload=%p\n", xfp, xfp->payload);
- SETPTR(n, xfp->payload); RET;
+ SETPTR(n, xfp->payload.string); RET;
case T_ARR_EQ:
{
@@ -2818,11 +2851,12 @@
if (doing_rnf) RET;
CHECK(1);
x = evali(ARG(TOP(0)));
- if (GETTAG(x) != T_STR) ERR("FROMUTF8");
+ if (GETTAG(x) != T_BSTR) ERR("FROMUTF8");
POP(1);
n = TOP(-1);
- GCCHECK(strNodes(STR(x)->size));
- GOIND(mkStringU(STR(x)));
+ GCCHECK(strNodes(BSTR(x).size));
+ //printf("T_FROMUTF8 x = %p fp=%p payload.string=%p\n", x, x->uarg.uuforptr, x->uarg.uuforptr->payload.string);
+ GOIND(mkStringU(BSTR(x)));
case T_RAISE:
if (doing_rnf) RET;
@@ -3401,7 +3435,7 @@
void *xp = evalptr(ARG(TOP(1)));
//printf("T_FPNEW xp=%p\n", xp);
n = alloc_node(T_FORPTR);
- SETFORPTR(n, mkForPtr(xp));
+ SETFORPTR(n, mkForPtrP(xp));
RETIO(n);
}
case T_FPFIN:
@@ -3596,6 +3630,7 @@
execio(&TOP(0));
prog = TOP(0);
POP(1);
+ gc(); /* Run finalizers */
#if SANITY
if (GETTAG(prog) != T_AP || GETTAG(FUN(prog)) != T_IO_RETURN)
ERR("main execio");
@@ -3623,7 +3658,8 @@
PRINT("%"PCOMMA"15"PRIcounter" reductions (%"PCOMMA".1f Mred/s)\n", num_reductions, num_reductions / ((double)run_time / 1000) / 1000000);
PRINT("%"PCOMMA"15"PRIcounter" array alloc\n", num_arr_alloc);
PRINT("%"PCOMMA"15"PRIcounter" array free\n", num_arr_free);
- PRINT("%"PCOMMA"15"PRIcounter" foreign free\n", num_fin_free);
+ PRINT("%"PCOMMA"15"PRIcounter" foreign/bytestring alloc\n", num_fin_alloc);
+ PRINT("%"PCOMMA"15"PRIcounter" foreign/bytestring free\n", num_fin_free);
#if MAXSTACKDEPTH
PRINT("%"PCOMMA"15d max stack depth\n", (int)max_stack_depth);
PRINT("%"PCOMMA"15d max C stack depth\n", (int)max_c_stack);
--
⑨