ref: 1f645d217c2faf9cd08b5f4910ca10479821b434
parent: f2d12752c483648d554700e6ac7dc100801dfa59
author: Mura Li <github@ctli.io>
date: Mon Sep 23 13:54:24 EDT 2019
Add a new match compiler implementation References: "When Do Match-Compilation Heuristics Matter?" by Kevin Scott and Norman Ramsey Stats: Sample count: 506 Dtree Size avg: 5.38 95th percentile: 3.00 maximum: 100 Dtree Height avg: 1.39 95th percentile: 1.00 maximum: 12 Sample generation: $ MATCH_STATS=1 make bootstrap && mbld -R support/matchstats.myr ./match.csv
--- /dev/null
+++ b/mi/Makefile.test
@@ -1,0 +1,7 @@
+BIN=match_test
+OBJ=match.o match_test.o
+
+
+DEPS=../parse/libparse.a ../util/libutil.a
+
+include ../mk/c.mk
--- a/mi/match.c
+++ b/mi/match.c
@@ -1,4 +1,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include <inttypes.h>
#include <stdarg.h>
#include <ctype.h>
@@ -13,39 +14,75 @@
#include "parse.h"
#include "mi.h"
-typedef struct Dtree Dtree;
-struct Dtree {
- int id;
- Srcloc loc;
+Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid);
+void dtreedump(FILE *fd, Dtree *dt);
- /* values for matching */
- Node *lbl;
- Node *load;
- size_t nconstructors;
- char accept;
- char emitted;
- char ptrwalk;
- /* the decision tree step */
- Node **pat;
- size_t npat;
- Dtree **next;
- size_t nnext;
- Dtree *any;
+static int ndtree;
- /* captured variables and action */
- Node **cap;
- size_t ncap;
-};
+/* Path is a integer sequence that labels a subtree of a subject tree.
+ * For example,
+ * 0 is the root
+ * 0,0 and 0,1 are two subtrees of the root.
+ *
+ * Note: the order of the sequence is reversed with regard to the reference paper
+ * "When Do Match-Compilation Heuristics Matter" by Kevin Scott and Norman Ramse.
+ *
+ * Each pattern of a match clause conrresponds to a unique Path of the subject tree.
+ * When we have m match clauses, for a given Path, there can be at most m patterns
+ * associated with the Path.
+ *
+ * Given
+ * match v
+ * | (11, 12, 13)
+ * | (21, 22, 23)
+ * | (31, 32, 33)
+ * | _
+ *
+ * the entries 11, 21, 31 and the wildcard pattern at the bottom have the same Path 0,1
+ * 12, 22, 32 have the same Path 0,2
+ * 13, 23, 33 have the same Path 0,3
+ */
+typedef struct Path {
+ size_t len;
+ int *p;
+} Path;
-Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl);
-static int addpat(Node *pat, Node *val,
- Dtree *start, Dtree *accept,
- Node ***cap, size_t *ncap,
- Dtree ***end, size_t *nend);
-void dtreedump(FILE *fd, Dtree *dt);
+/* Slot bundles a pattern with its corresponding Path and load value.
+ * The compiler will generate a comparison instruction for each (pat, load) pair.
+ * Each match clause corresponds to a list of slots which is populated by addrec.
+ */
+typedef struct Slot {
+ Path *path;
+ Node *pat;
+ Node *load;
+} Slot;
+static Slot *
+mkslot(Path *path, Node *pat, Node *val)
+{
+ Slot *s;
+ s = zalloc(sizeof(Slot));
+ s->path = path;
+ s->pat = pat;
+ s->load = val;
+ assert(path != (void *)1);
+ return s;
+}
+/* Frontier bundles a list of slots to be matched for a specific match clause.
+ * The instances of Frontier should be immutable after creation.
+ */
+typedef struct Frontier {
+ int i; /* index of the match clauses from top to bottom */
+ Node *lbl; /* branch target when the rule is matched */
+ Slot **slot; /* unmatched slots (Paths) for this match clause */
+ size_t nslot;
+ Node **cap; /* the captured variables of the pattern of this match clause */
+ size_t ncap;
+ Dtree *final; /* final state, shared by all Frontiers for a specific (indxed by i) match clause */
+} Frontier;
+
static Node *
utag(Node *n)
{
@@ -104,23 +141,6 @@
return NULL;
}
-static Dtree *
-dtbytag(Dtree *t, Ucon *uc)
-{
- uint32_t tagval;
- Node *taglit;
- size_t i;
-
- for (i = 0; i < t->npat; i++) {
- taglit = t->pat[i]->expr.args[0];
- tagval = taglit->lit.intval;
- if (tagval == uc->id) {
- return t->next[i];
- }
- }
- return NULL;
-}
-
static Node *
structmemb(Node *n, Node *name, Type *ty)
{
@@ -153,7 +173,6 @@
static Dtree *
mkdtree(Srcloc loc, Node *lbl)
{
- static int ndtree;
Dtree *t;
t = zalloc(sizeof(Dtree));
@@ -163,564 +182,627 @@
return t;
}
-static Dtree *
-nextnode(Srcloc loc, size_t idx, size_t count, Dtree *accept)
+void
+dtreedumplit(FILE *fd, Dtree *dt, Node *n, size_t depth)
{
- if (idx == count - 1)
- return accept;
- else
- return mkdtree(loc, genlbl(loc));
+ char *s;
+
+ s = lblstr(dt->lbl);
+ switch (n->lit.littype) {
+ case Lvoid: findentf(fd, depth, "%s: Lvoid\n"); break;
+ case Lchr: findentf(fd, depth, "%s: Lchr %c\n", s, n->lit.chrval); break;
+ case Lbool: findentf(fd, depth, "%s: Lbool %s\n", s, n->lit.boolval ? "true" : "false"); break;
+ case Lint: findentf(fd, depth, "%s: Lint %llu\n", s, n->lit.intval); break;
+ case Lflt: findentf(fd, depth, "%s: Lflt %lf\n", s, n->lit.fltval); break;
+ case Lstr: findentf(fd, depth, "%s: Lstr %.*s\n", s, (int)n->lit.strval.len, n->lit.strval.buf); break;
+ case Llbl: findentf(fd, depth, "%s: Llbl %s\n", s, n->lit.lblval); break;
+ case Lfunc: findentf(fd, depth, "%s: Lfunc\n"); break;
+ }
}
-static size_t
-nconstructors(Type *t)
+void
+dtreedumpnode(FILE *fd, Dtree *dt, size_t depth)
{
- if (!t)
- return 0;
+ size_t i;
- t = tybase(t);
- switch (t->type) {
- case Tyvoid: return 1; break;
- case Tybool: return 2; break;
- case Tychar: return 0x10ffff; break;
+ if (dt->accept)
+ findentf(fd, depth, "%s: accept\n", lblstr(dt->lbl));
- /* signed ints */
- case Tyint8: return 0x100; break;
- case Tyint16: return 0x10000; break;
- case Tyint32: return 0x100000000; break;
- case Tyint: return 0x100000000; break;
- case Tyint64: return ~0ull; break;
+ for (i = 0; i < dt->nnext; i++) {
+ dtreedumplit(fd, dt, dt->pat[i]->expr.args[0], depth);
+ dtreedumpnode(fd, dt->next[i], depth + 1);
+ }
+ if (dt->any) {
+ findentf(fd, depth, "%s: wildcard\n", lblstr(dt->lbl));
+ dtreedumpnode(fd, dt->any, depth + 1);
+ }
+}
- /* unsigned ints */
- case Tybyte: return 0x100; break;
- case Tyuint8: return 0x100; break;
- case Tyuint16: return 0x10000; break;
- case Tyuint32: return 0x100000000; break;
- case Tyuint: return 0x100000000; break;
- case Tyuint64: return ~0ull; break;
+void
+dtreedump(FILE *fd, Dtree *dt)
+{
+ dtreedumpnode(fd, dt, 0);
+}
- /* floats */
- case Tyflt32: return ~0ull; break;
- case Tyflt64: return ~0ull; break;
- /* complex types */
- case Typtr: return 1; break;
- case Tyarray: return 1; break;
- case Tytuple: return 1; break;
- case Tystruct: return 1;
- case Tyunion: return t->nmemb; break;
- case Tyslice: return ~0ULL; break;
+static Path*
+mkpath(Path *p, int i)
+{
+ Path *newp;
- case Tyvar: case Typaram: case Tyunres: case Tyname:
- case Tybad: case Tyvalist: case Tygeneric: case Ntypes:
- case Tyfunc: case Tycode:
- die("Invalid constructor type %s in match", tystr(t));
- break;
+ newp = zalloc(sizeof(Path));
+ if (p) {
+ newp->p = zalloc(sizeof(newp->p[0])*(p->len+1));
+ memcpy(newp->p, p->p, sizeof(newp->p[0])*p->len);
+ newp->p[p->len] = i;
+ newp->len = p->len+1;
+ } else {
+ newp->p = zalloc(sizeof(int)*4);
+ newp->p[0] = 0;
+ newp->len = 1;
}
- return 0;
+ assert(newp->p != 0);
+ return newp;
}
static int
-verifymatch(Dtree *t)
+patheq(Path *a, Path *b)
{
size_t i;
- int ret;
- if (t->accept)
- return 1;
+ if (a->len != b->len)
+ return 0;
- ret = 0;
- if (t->nnext == t->nconstructors || t->any)
- ret = 1;
- for (i = 0; i < t->nnext; i++)
- if (!verifymatch(t->next[i]))
- ret = 0;
- return ret;
+ for (i = 0; i < a->len; i++) {
+ if (a->p[i] != b->p[i])
+ return 0;
+ }
+ return 1;
}
static int
-acceptall(Dtree *t, Dtree *accept)
+pateq(Node *a, Node *b)
{
- size_t i;
- int ret;
-
- if (t->accept || t->any == accept)
+ if (exprop(a) != exprop(b))
return 0;
- ret = 1;
- if (t->any)
- ret = acceptall(t->any, accept);
- else
- t->any = accept;
+ switch (exprop(a)) {
+ case Olit:
+ return liteq(a->expr.args[0], b->expr.args[0]);
+ case Ogap:
+ case Ovar:
+ return 1;
+ default:
+ die("unreachable");
+ }
+}
- for (i = 0; i < t->nnext; i++)
- if (acceptall(t->next[i], accept))
- ret = 1;
- return ret;
+char *
+pathfmt(Path *p)
+{
+ size_t i, sz, n;
+ char *buf;
+
+ sz = p->len*3+1;
+ buf = zalloc(sz);
+ n = 0;
+ for (i = 0; i < p->len; i++) {
+ n += snprintf(&buf[n], sz-n, "%02x,", p->p[i]);
+ }
+ buf[n-1] = '\0';
+ return buf;
}
-static int
-isnonrecursive(Dtree *dt, Type *ty)
+void
+pathdump(Path *p, FILE *out)
{
- if (istyprimitive(ty) || ty->type == Tyvoid || ty->type == Tyfunc || ty->type == Typtr)
- return 1;
- if (ty->type == Tystruct)
- return ty->nmemb == 0;
- if (ty->type == Tyarray)
- return fold(ty->asize, 1)->expr.args[0]->lit.intval == 0;
+ size_t i;
+ for (i = 0; i < p->len; i++) {
+ fprintf(out, "%x,", p->p[i]);
+ }
+ fprintf(out, "\n");
+}
+
+static size_t
+nconstructors(Type *t)
+{
+ if (!t)
+ return 0;
+
+ t = tybase(t);
+ switch (t->type) {
+ case Tyvoid: return 1;
+ case Tybool: return 2;
+ case Tychar: return 0x10ffff;
+
+ /* signed ints */
+ case Tyint8: return 0x100;
+ case Tyint16: return 0x10000;
+ case Tyint32: return 0x100000000;
+ case Tyint: return 0x100000000;
+ case Tyint64: return ~0ull;
+
+ /* unsigned ints */
+ case Tybyte: return 0x100;
+ case Tyuint8: return 0x100;
+ case Tyuint16: return 0x10000;
+ case Tyuint32: return 0x100000000;
+ case Tyuint: return 0x100000000;
+ case Tyuint64: return ~0ull;
+
+ /* floats */
+ case Tyflt32: return ~0ull;
+ case Tyflt64: return ~0ull;
+
+ /* complex types */
+ case Typtr: return 1;
+ case Tyarray: return 1;
+ case Tytuple: return 1;
+ case Tystruct: return 1;
+ case Tyunion: return t->nmemb;
+ case Tyslice: return ~0ULL;
+
+ case Tyvar: case Typaram: case Tyunres: case Tyname:
+ case Tybad: case Tyvalist: case Tygeneric: case Ntypes:
+ case Tyfunc: case Tycode:
+ die("Invalid constructor type %s in match", tystr(t));
+ break;
+
+ }
return 0;
}
-static int
-addwildrec(Srcloc loc, Type *ty, Dtree *start, Dtree *accept, Dtree ***end, size_t *nend)
+/* addrec generates a list of slots for a Frontier by walking a pattern tree.
+ * It collects only the terminal patterns like union tags and literals.
+ * Non-terminal patterns like tuple/struct/array help encode the path only.
+ */
+static void
+addrec(Frontier *fs, Node *pat, Node *val, Path *path)
{
- Dtree *next, **last, **tail;
- size_t i, j, nelt, nlast, ntail;
- Node *asize;
+ size_t i, n;
+ Type *ty, *mty;
+ Node *memb, *name, *tagid, *p, *v, *lit, *dcl, *asn, *deref;
Ucon *uc;
- int ret;
+ char *s;
- tail = NULL;
- ntail = 0;
- ty = tybase(ty);
- if (ty->type == Typtr && start->any && start->any->ptrwalk) {
- return addwildrec(loc, ty->sub[0], start->any, accept, end, nend);
- } else if (isnonrecursive(start, ty)) {
- if (start->accept || start == accept)
- return 0;
- for (i = 0; i < start->nnext; i++)
- lappend(end, nend, start->next[i]);
- if (start->any) {
- lappend(end, nend, start->any);
- return 0;
+ pat = fold(pat, 1);
+ switch (exprop(pat)) {
+ case Ogap:
+ lappend(&fs->slot, &fs->nslot, mkslot(path, pat, val));
+ break;
+ case Ovar:
+ dcl = decls[pat->expr.did];
+ if (dcl->decl.isconst) {
+ ty = decltype(dcl);
+ if (ty->type == Tyfunc || ty->type == Tycode || ty->type == Tyvalist)
+ fatal(dcl, "bad pattern %s:%s: unmatchable type", declname(dcl), tystr(ty));
+ if (!dcl->decl.init)
+ fatal(dcl, "bad pattern %s:%s: missing initializer", declname(dcl), tystr(ty));
+ addrec(fs, dcl->decl.init, val, mkpath(path, 0));
} else {
- start->any = accept;
- lappend(end, nend, accept);
- return 1;
+ asn = mkexpr(pat->loc, Oasn, pat, val, NULL);
+ asn->expr.type = exprtype(pat);
+ lappend(&fs->cap, &fs->ncap, asn);
+
+ lappend(&fs->slot, &fs->nslot, mkslot(path, pat, val));
}
- }
+ break;
+ case Olit:
+ if (pat->expr.args[0]->lit.littype == Lstr) {
+ lit = pat->expr.args[0];
+ n = lit->lit.strval.len;
+ s = lit->lit.strval.buf;
- ret = 0;
- last = NULL;
- nlast = 0;
- switch (ty->type) {
- case Tytuple:
- lappend(&last, &nlast, start);
- for (i = 0; i < ty->nsub; i++) {
- next = nextnode(loc, i, ty->nsub, accept);
- tail = NULL;
- ntail = 0;
- for (j = 0; j < nlast; j++)
- if (addwildrec(loc, ty->sub[i], last[j], next, &tail, &ntail))
- ret = 1;
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
+ ty = mktype(pat->loc, Tyuint64);
+ p = mkintlit(lit->loc, n);
+ p ->expr.type = ty;
+ v = structmemb(val, mkname(pat->loc, "len"), ty);
+
+ addrec(fs, p, v, mkpath(path, 0));
+
+ ty = mktype(pat->loc, Tybyte);
+ for (i = 0; i < n; i++) {
+ p = mkintlit(lit->loc, s[i]);
+ p->expr.type = ty;
+ v = arrayelt(val, i);
+ addrec(fs, p, v, mkpath(path, 1+i));
+ }
+ } else {
+ lappend(&fs->slot, &fs->nslot, mkslot(path, pat, val));
}
break;
- case Tyarray:
- lappend(&last, &nlast, start);
- asize = fold(ty->asize, 1);
- nelt = asize->expr.args[0]->lit.intval;
- for (i = 0; i < nelt; i++) {
- next = nextnode(loc, i, nelt, accept);
- tail = NULL;
- ntail = 0;
- for (j = 0; j < nlast; j++)
- if (addwildrec(loc, ty->sub[0], last[j], next, &tail, &ntail))
- ret = 1;
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
+ case Oaddr:
+ deref = mkexpr(val->loc, Oderef, val, NULL);
+ deref->expr.type = exprtype(pat->expr.args[0]);
+ addrec(fs, pat->expr.args[0], deref, mkpath(path, 0));
+ break;
+ case Oucon:
+ uc = finducon(tybase(exprtype(pat)), pat->expr.args[0]);
+ tagid = mkintlit(pat->loc, uc->id);
+ tagid->expr.type = mktype(pat->loc, Tyint32);
+ addrec(fs, tagid, utag(val), mkpath(path, 0));
+ if (uc->etype)
+ addrec(fs, pat->expr.args[1], uvalue(val, uc->etype), mkpath(path, 1));
+ break;
+ case Otup:
+ for (i = 0; i < pat->expr.nargs; i++) {
+ addrec(fs, pat->expr.args[i], tupelt(val, i), mkpath(path, i));
}
break;
- case Tystruct:
- lappend(&last, &nlast, start);
- for (i = 0; i < ty->nmemb; i++) {
- next = nextnode(loc, i, ty->nmemb, accept);
- tail = NULL;
- ntail = 0;
- for (j = 0; j < nlast; j++)
- if (addwildrec(loc, decltype(ty->sdecls[i]), last[j], next, &tail, &ntail))
- ret = 1;
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
+ case Oarr:
+ for (i = 0; i < pat->expr.nargs; i++) {
+ addrec(fs, pat->expr.args[i], arrayelt(val, i), mkpath(path, i));
}
break;
- case Tyunion:
+ case Ostruct:
+ ty = tybase(exprtype(pat));
for (i = 0; i < ty->nmemb; i++) {
- uc = ty->udecls[i];
- next = dtbytag(start, uc);
- if (next) {
- if (uc->etype) {
- if (addwildrec(loc, uc->etype, next, accept, end, nend))
- ret = 1;
- } else {
- lappend(end, nend, next);
- }
+ mty = decltype(ty->sdecls[i]);
+ name = ty->sdecls[i]->decl.name;
+ memb = findmemb(pat, name);
+ if (!memb) {
+ memb = mkexpr(ty->sdecls[i]->loc, Ogap, NULL);
+ memb->expr.type = mty;
}
+ addrec(fs, memb, structmemb(val, name, mty), mkpath(path, i));
}
- if (!start->any) {
- start->any = accept;
- ret = 1;
- }
- lappend(&last, &nlast, accept);
break;
- case Tyslice:
- ret = acceptall(start, accept);
- lappend(&last, &nlast, accept);
- break;
default:
- die("unreachable");
+ fatal(pat, "unsupported pattern %s of type %s", opstr[exprop(pat)], tystr(exprtype(pat)));
+ break;
}
- lcat(end, nend, last, nlast);
- lfree(&last, &nlast);
- return ret;
}
-static int
-addwild(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
+static void
+genfrontier(int i, Node *val, Node *pat, Node *lbl, Frontier ***frontier, size_t *nfrontier)
{
- Node *asn;
+ Frontier *fs;
- if (cap && ncap) {
- asn = mkexpr(pat->loc, Oasn, pat, val, NULL);
- asn->expr.type = exprtype(pat);
- lappend(cap, ncap, asn);
- }
- return addwildrec(pat->loc, exprtype(pat), start, accept, end, nend);
+ fs = zalloc(sizeof(Frontier));
+ fs->i = i;
+ fs->lbl = lbl;
+ fs->final = mkdtree(lbl->loc, lbl);
+ fs->final->accept = 1;
+ addrec(fs, pat, val, mkpath(NULL, 0));
+ lappend(frontier, nfrontier, fs);
}
-static int
-addunion(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
+/*
+ * project generates a new frontier with a new the set of slots by reducing the input slots.
+ * literally, it deletes the slot at the path pi.
+ */
+static Frontier *
+project(Node *pat, Path *pi, Node *val, Frontier *fs)
{
- Node *tagid;
- Dtree *next;
- Ucon *uc;
+ Frontier *out;
+ Slot *c, **slot;
+ size_t i, nslot;
- if (start->any) {
- lappend(end, nend, start->any);
- return 0;
- }
+ assert (fs->nslot > 0);
- uc = finducon(tybase(exprtype(pat)), pat->expr.args[0]);
- next = dtbytag(start, uc);
- if (next) {
- if (!uc->etype) {
- lappend(end, nend, next);
- return 0;
- } else {
- return addpat(pat->expr.args[1], uvalue(val, uc->etype), next, accept, cap, ncap, end, nend);
+ /*
+ * copy a new set of slots from fs without the slot at the path pi.
+ * c points to the slot at the path pi.
+ */
+ c = NULL;
+ slot = NULL;
+ nslot = 0;
+ for (i = 0; i < fs->nslot; i++) {
+ if (patheq(pi, fs->slot[i]->path)) {
+ c = fs->slot[i];
+ continue;
}
+ lappend(&slot, &nslot, fs->slot[i]);
}
- if (!start->load) {
- start->load = utag(val);
- start->nconstructors = nconstructors(tybase(exprtype(pat)));
- }
+ out = zalloc(sizeof(Frontier));
+ out->i = fs->i;
+ out->lbl = fs->lbl;
+ out->slot = slot;
+ out->nslot = nslot;
+ out->cap = fs->cap;
+ out->ncap = fs->ncap;
+ out->final = fs->final;
- tagid = mkintlit(pat->loc, uc->id);
- tagid->expr.type = mktype(pat->loc, Tyint32);
- lappend(&start->pat, &start->npat, tagid);
- if (uc->etype) {
- next = mkdtree(pat->loc, genlbl(pat->loc));
- lappend(&start->next, &start->nnext, next);
- addpat(pat->expr.args[1], uvalue(val, uc->etype), next, accept, cap, ncap, end, nend);
- } else {
- lappend(&start->next, &start->nnext, accept);
- lappend(end, nend, accept);
+ /*
+ * if the sub-term at pi is not in the frontier,
+ * then we do not reduce the frontier.
+ */
+ if (c == NULL)
+ return out;
+
+ switch (exprop(c->pat)) {
+ case Ovar:
+ case Ogap:
+ /*
+ * if the pattern at the sub-term pi of this frontier is not a constructor,
+ * then we do not reduce the frontier.
+ */
+ return out;
+ default:
+ break;
}
- return 1;
+
+ /*
+ * if constructor at the path pi is not the constructor we want to project,
+ * then return null.
+ */
+ if (!pateq(pat, c->pat))
+ return NULL;
+
+ return out;
}
-static int
-addstr(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
+/* compile implements the algorithm outlined in the paper
+ * "When Do Match-Compilation Heuristics Matter?" by Kevin Scott and Norman Ramsey
+ * It generates either a TEST or MATCH (accept=1) dtree, where MATCH is the base case.
+ *
+ * Summary:
+ * 1. if the first Frontier of the input list of Frontiers does not contain any
+ * non-wildcard pattern, return a MATCH dtree (accept=1)
+ * 2. for each call to the function, it will select a Path from
+ * the first match clause in the input Frontiers.
+ * 3. scan the input frontiers 'vertically' at the selected Path to form a set
+ * of unique constructors. (the list 'cs')
+ * 4. for each constructor in 'cs', recursively compile the 'projected' frontiers,
+ * which is roughly the frontiers minus the slot at the Path.
+ * 5. recursively compile the remaining Frontiers (if any) corresponding to the matches
+ * rules with a wildcard at the selected Path.
+ * 6. return a new dtree, where the compiled outputs at step 4 form the outgoing edges
+ * (dt->next), and the one produced at step 5 serves as the default edage (dt->any)
+ *
+ * NOTE:
+ * a. how we select the Path at the step 2 is determined by heuristics.
+ * b. we don't expand the frontiers at the 'project' step as the reference paper does.
+ * rather, we separate the whole compile algorithm into two phases:
+ * 1) construction of the initial frontiers by 'addrec'.
+ * 2) construction of the decision tree (with the generated frontiers) by 'compile'
+ */
+static Dtree *
+compile(Frontier **frontier, size_t nfrontier)
{
- Dtree **tail, **last, *next;
- size_t i, j, n, ntail, nlast;
- Node *p, *v, *lit;
- Type *ty;
- char *s;
- int ret;
+ size_t i, j, k;
+ Dtree *dt, *out, **edge, *any;
+ Frontier *fs, **row, **defaults ;
+ Node **cs, **pat;
+ Slot *slot, *s;
+ size_t ncs, ncons, nrow, nedge, ndefaults, npat;
- lit = pat->expr.args[0];
- n = lit->lit.strval.len;
- s = lit->lit.strval.buf;
+ assert(nfrontier > 0);
- ty = mktype(pat->loc, Tyuint64);
- p = mkintlit(lit->loc, n);
- v = structmemb(val, mkname(pat->loc, "len"), ty);
- p->expr.type = ty;
+ fs = frontier[0];
- if (n == 0)
- next = accept;
- else
- next = mkdtree(pat->loc, genlbl(pat->loc));
+ /* scan constructors horizontally */
+ ncons = 0;
+ for (i = 0; i < fs->nslot; i++) {
+ switch (exprop(fs->slot[i]->pat)) {
+ case Ovar:
+ case Ogap:
+ break;
+ default:
+ ncons++;
+ }
+ }
+ if (ncons == 0) {
+ fs->final->refcnt++;
+ return fs->final;
+ }
- last = NULL;
- nlast = 0;
- ret = 0;
- if (addpat(p, v, start, next, cap, ncap, &last, &nlast))
- ret = 1;
+ assert(fs->nslot > 0);
- ty = mktype(pat->loc, Tybyte);
- for (i = 0; i < n; i++) {
- p = mkintlit(lit->loc, s[i]);
- p->expr.type = ty;
- v = arrayelt(val, i);
+ /* NOTE:
+ * at the moment we have not implemented any smarter heuristics described in the papers.
+ * we always select the first found constructor, i.e. the top-left one.
+ */
- tail = NULL;
- ntail = 0;
- next = nextnode(pat->loc, i, n, accept);
- for (j = 0; j < nlast; j++)
- if (addpat(p, v, last[j], next, NULL, NULL, &tail, &ntail))
- ret = 1;
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
+ slot = NULL;
+ for (i = 0; i < fs->nslot; i++) {
+ switch (exprop(fs->slot[i]->pat)) {
+ case Ovar:
+ case Ogap:
+ continue;
+ default:
+ slot = fs->slot[i];
+ goto pi_found;
+ }
}
- lcat(end, nend, last, nlast);
- lfree(&last, &nlast);
- return ret;
-}
-static int
-addlit(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
-{
- size_t i;
-
- if (pat->expr.args[0]->lit.littype == Lstr) {
- return addstr(pat, val, start, accept, cap, ncap, end, nend);
- } else {
- /* if we already have a match, we're not adding a new node */
- if (start->any) {
- lappend(end, nend, start->any);
- return 0;
- }
-
- for (i = 0; i < start->npat; i++) {
- if (liteq(start->pat[i]->expr.args[0], pat->expr.args[0])) {
- lappend(end, nend, start->next[i]);
- return 0;
+pi_found:
+ /* scan constructors vertically at pi to create the set 'CS' */
+ cs = NULL;
+ ncs = 0;
+ for (i = 0; i < nfrontier; i++) {
+ fs = frontier[i];
+ for (j = 0; j < fs->nslot; j++) {
+ s = fs->slot[j];
+ switch (exprop(s->pat)) {
+ case Ovar:
+ case Ogap:
+ break;
+ case Olit:
+ if (patheq(slot->path, s->path)) {
+ /* NOTE:
+ * we could use a hash table, but given that the n is usually small,
+ * an exhaustive search would suffice.
+ */
+ /* look for a duplicate entry to skip it; we want a unique set of constructors. */
+ for (k = 0; k < ncs; k++) {
+ if (pateq(cs[k], s->pat))
+ break;
+ }
+ if (ncs == 0 || k == ncs)
+ lappend(&cs, &ncs, s->pat);
+ }
+ break;
+ default:
+ assert(0);
}
}
+ }
+ /* project a new frontier for each selected constructor */
+ edge = NULL;
+ nedge = 0;
+ pat = NULL;
+ npat = 0;
+ for (i = 0; i < ncs; i++) {
+ row = NULL;
+ nrow = 0;
+ for (j = 0; j < nfrontier; j++) {
+ fs = frontier[j];
+ fs = project(cs[i], slot->path, slot->load, frontier[j]);
+ if (fs != NULL)
+ lappend(&row, &nrow, fs);
+ }
- /* wire up an edge from start to 'accept' */
- if (!start->load) {
- start->load = val;
- start->nconstructors = nconstructors(exprtype(pat));
+ if (nrow > 0) {
+ dt = compile(row, nrow);
+ lappend(&edge, &nedge, dt);
+ lappend(&pat, &npat, cs[i]);
}
- lappend(&start->pat, &start->npat, pat);
- lappend(&start->next, &start->nnext, accept);
- lappend(end, nend, accept);
- return 1;
}
-}
-static int
-addtup(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
-{
- size_t nargs, nlast, ntail, i, j;
- Dtree *next, **last, **tail;
- Node **args;
- int ret;
+ /* compile the defaults */
+ defaults = NULL;
+ ndefaults = 0;
+ for (i = 0; i < nfrontier; i++) {
+ fs = frontier[i];
+ k = -1;
+ for (j = 0; j < fs->nslot; j++) {
+ /* locate the occurrence of pi in fs */
+ if (patheq(slot->path, fs->slot[j]->path))
+ k = j;
+ }
- args = pat->expr.args;
- nargs = pat->expr.nargs;
- last = NULL;
- nlast = 0;
- lappend(&last, &nlast, start);
- ret = 0;
-
- for (i = 0; i < nargs; i++) {
- next = nextnode(args[i]->loc, i, nargs, accept);
- tail = NULL;
- ntail = 0;
- for (j = 0; j < nlast; j++)
- if (addpat(pat->expr.args[i], tupelt(val, i), last[j], next, cap, ncap, &tail, &ntail))
- ret = 1;
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
+ if (k == -1 || exprop(fs->slot[k]->pat) == Ovar || exprop(fs->slot[k]->pat) == Ogap)
+ lappend(&defaults, &ndefaults, fs);
}
- lcat(end, nend, last, nlast);
- lfree(&last, &nlast);
- return ret;
-}
+ if (ndefaults)
+ any = compile(defaults, ndefaults);
+ else
+ any = NULL;
-static int
-addarr(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
-{
- size_t nargs, nlast, ntail, i, j;
- Dtree *next, **last, **tail;
- Node **args;
- int ret;
+ /* construct the result dtree */
+ out = mkdtree(slot->pat->loc, genlbl(slot->pat->loc));
+ out->load = slot->load;
+ out->npat = npat,
+ out->pat = pat,
+ out->nnext = nedge;
+ out->next = edge;
+ out->any = any;
- args = pat->expr.args;
- nargs = pat->expr.nargs;
- last = NULL;
- nlast = 0;
- lappend(&last, &nlast, start);
- ret = 0;
-
- for (i = 0; i < nargs; i++) {
- next = nextnode(args[i]->loc, i, nargs, accept);
- tail = NULL;
- ntail = 0;
- for (j = 0; j < nlast; j++)
- if (addpat(pat->expr.args[i], arrayelt(val, i), last[j], next, cap, ncap, &tail, &ntail))
- ret = 1;
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
+ switch (exprop(slot->load)) {
+ case Outag:
+ out->nconstructors = nconstructors(tybase(exprtype(slot->load->expr.args[0])));
+ break;
+ case Oudata:
+ case Omemb:
+ case Otupget:
+ case Oidx:
+ case Oderef:
+ case Ovar:
+ out->nconstructors = nconstructors(tybase(exprtype(slot->load)));
+ break;
+ default:
+ fatal(slot->pat, "unsupported pattern %s of type %s", opstr[exprop(slot->pat)], tystr(exprtype(slot->pat)));
}
- lcat(end, nend, last, nlast);
- lfree(&last, &nlast);
- return ret;
+ return out;
}
static int
-addstruct(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
+verifymatch(Dtree *t)
{
- Dtree *next, **last, **tail;
- Node *memb, *name;
- Type *ty, *mty;
- size_t i, j, ntail, nlast;
+ size_t i;
int ret;
- ret = 0;
- last = NULL;
- nlast = 0;
- lappend(&last, &nlast, start);
- ty = tybase(exprtype(pat));
+ if (t->accept)
+ return 1;
- for (i = 0; i < ty->nmemb; i++) {
- mty = decltype(ty->sdecls[i]);
- name = ty->sdecls[i]->decl.name;
- memb = findmemb(pat, name);
- next = nextnode(pat->loc, i, ty->nmemb, accept);
- tail = NULL;
- ntail = 0;
-
- /* add a _ capture if we don't specify the value */
- if (!memb) {
- memb = mkexpr(ty->sdecls[i]->loc, Ogap, NULL);
- memb->expr.type = mty;
- }
- for (j = 0; j < nlast; j++) {
- if (addpat(memb, structmemb(val, name, mty), last[j], next, cap, ncap, &tail, &ntail))
- ret = 1;
- }
- lfree(&last, &nlast);
- last = tail;
- nlast = ntail;
- }
- lcat(end, nend, last, nlast);
- lfree(&last, &nlast);
+ ret = 0;
+ if (t->nnext == t->nconstructors || t->any)
+ ret = 1;
+ for (i = 0; i < t->nnext; i++)
+ if (!verifymatch(t->next[i]))
+ ret = 0;
return ret;
}
-static int
-addderefpat(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
+static size_t
+dtheight(Dtree *dt)
{
- Node *deref;
- Dtree *walk;
+ size_t i, h, m;
- deref = mkexpr(val->loc, Oderef, val, NULL);
- deref->expr.type = exprtype(pat->expr.args[0]);
- start->nconstructors = nconstructors(exprtype(deref));
- if (start->any && !start->any->ptrwalk)
+ if (dt == NULL)
return 0;
- else if (!start->any)
- start->any = mkdtree(pat->loc, genlbl(pat->loc));
- walk = start->any;
- walk->ptrwalk = 1;
- return addpat(pat->expr.args[0], deref, walk, accept, cap, ncap, end, nend);
-}
+ if (dt->accept)
+ return 0;
-static int
-addpat(Node *pat, Node *val, Dtree *start, Dtree *accept, Node ***cap, size_t *ncap, Dtree ***end, size_t *nend)
-{
- int ret;
- Node *dcl;
- Type *ty;
-
- pat = fold(pat, 1);
- ret = 0;
- switch (exprop(pat)) {
- case Ovar:
- dcl = decls[pat->expr.did];
- if (dcl->decl.isconst) {
- ty = decltype(dcl);
- if (ty->type == Tyfunc || ty->type == Tycode || ty->type == Tyvalist)
- fatal(dcl, "bad pattern %s:%s: unmatchable type", declname(dcl), tystr(ty));
- if (!dcl->decl.init)
- fatal(dcl, "bad pattern %s:%s: missing initializer", declname(dcl), tystr(ty));
- ret = addpat(dcl->decl.init, val, start, accept, cap, ncap, end, nend);
- } else {
- ret = addwild(pat, val, start, accept, cap, ncap, end, nend);
- }
- break;
- case Oucon:
- ret = addunion(pat, val, start, accept, cap, ncap, end, nend);
- break;
- case Olit:
- ret = addlit(pat, val, start, accept, cap, ncap, end, nend);
- break;
- case Otup:
- ret = addtup(pat, val, start, accept, cap, ncap, end, nend);
- break;
- case Oarr:
- ret = addarr(pat, val, start, accept, cap, ncap, end, nend);
- break;
- case Ostruct:
- ret = addstruct(pat, val, start, accept, cap, ncap, end, nend);
- break;
- case Ogap:
- ret = addwild(pat, NULL, start, accept, NULL, NULL, end, nend);
- break;
- case Oaddr:
- ret = addderefpat(pat, val, start, accept, cap, ncap, end, nend);
- break;
- default:
- fatal(pat, "unsupported pattern %s of type %s", opstr[exprop(pat)], tystr(exprtype(pat)));
- break;
+ m = 0;
+ for (i = 0; i < dt->nnext; i++) {
+ h = dtheight(dt->next[i]);
+ if (h > m)
+ m = h;
}
- return ret;
+ h = dtheight(dt->any);
+ if (h > m)
+ m = h;
+ return m+1;
}
-
-/* val must be a pure, fully evaluated value */
Dtree *
-gendtree(Node *m, Node *val, Node **lbl, size_t nlbl)
+gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid)
{
- Dtree *start, *accept, **end;
- Node **pat, **cap;
- size_t npat, ncap, nend;
+ Dtree *root;
+ Node **pat;
+ size_t npat;
size_t i;
+ Frontier **frontier;
+ size_t nfrontier;
+ FILE *csv;
+ char *dbgloc, *dbgfn, *dbgln;
+
+ ndtree = startid;
+
pat = m->matchstmt.matches;
npat = m->matchstmt.nmatches;
- end = NULL;
- nend = 0;
- start = mkdtree(m->loc, genlbl(m->loc));
+ frontier = NULL;
+ nfrontier = 0;
for (i = 0; i < npat; i++) {
- cap = NULL;
- ncap = 0;
- accept = mkdtree(lbl[i]->loc, lbl[i]);
- accept->accept = 1;
+ genfrontier(i, val, pat[i]->match.pat, lbl[i], &frontier, &nfrontier);
+ }
+ for (i = 0; i < nfrontier; i++) {
+ addcapture(pat[i]->match.block, frontier[i]->cap, frontier[i]->ncap);
+ }
+ root = compile(frontier, nfrontier);
- if (!addpat(pat[i]->match.pat, val, start, accept, &cap, &ncap, &end, &nend))
+ for (i = 0; i < nfrontier; i++)
+ if (frontier[i]->final->refcnt == 0)
fatal(pat[i], "pattern matched by earlier case");
- addcapture(pat[i]->match.block, cap, ncap);
+
+ if (debugopt['M'] || getenv("M")) {
+ dbgloc = strdup(getenv("M"));
+ if (strchr(dbgloc, '@')) {
+ dbgfn = strtok(dbgloc, "@");
+ dbgln = strtok(NULL, "@")+1; /* offset by 1 to skip the charactor 'L' */
+ if (!strcmp(fname(m->loc), dbgfn) && lnum(m->loc) == strtol(dbgln, 0, 0))
+ dtreedump(stdout, root);
+ }
+ else
+ dtreedump(stdout, root);
+
}
- if (debugopt['M'])
- dtreedump(stdout, start);
- if (!verifymatch(start))
+ if (!verifymatch(root)) {
fatal(m, "nonexhaustive pattern set in match statement");
- return start;
+ }
+ if (getenv("MATCH_STATS")) {
+ csv = fopen("match.csv", "a");
+ assert(csv != NULL);
+ fprintf(csv, "%s@L%d, %d, %ld\n", fname(m->loc), lnum(m->loc), ndtree, dtheight(root));
+ fclose(csv);
+ }
+
+ return root;
}
void
@@ -773,21 +855,18 @@
void
genonematch(Node *pat, Node *val, Node *iftrue, Node *iffalse, Node ***out, size_t *nout, Node ***cap, size_t *ncap)
{
- Dtree *start, *accept, *reject, **end;
- size_t nend;
+ Frontier **frontier;
+ size_t nfrontier;
+ Dtree *root;
- end = NULL;
- nend = 0;
-
- start = mkdtree(pat->loc, genlbl(pat->loc));
- accept = mkdtree(iftrue->loc, iftrue);
- accept->accept = 1;
- reject = mkdtree(iffalse->loc, iffalse);
- reject->accept = 1;
-
- assert(addpat(pat, val, start, accept, cap, ncap, &end, &nend));
- acceptall(start, reject);
- genmatchcode(start, out, nout);
+ ndtree = 0;
+ frontier = NULL;
+ nfrontier = 0;
+ genfrontier(0, val, pat, iftrue, &frontier, &nfrontier);
+ lcat(cap, ncap, frontier[0]->cap, frontier[0]->ncap);
+ genfrontier(1, val, mkexpr(iffalse->loc, Ogap, NULL), iffalse, &frontier, &nfrontier);
+ root = compile(frontier, nfrontier);
+ genmatchcode(root, out, nout);
}
void
@@ -807,7 +886,7 @@
endlbl = genlbl(m->loc);
- dt = gendtree(m, val, lbl, nlbl);
+ dt = gendtree(m, val, lbl, nlbl, 0);
genmatchcode(dt, out, nout);
for (i = 0; i < npat; i++) {
@@ -825,47 +904,3 @@
dumpn((*out)[i], stdout);
}
}
-
-
-void
-dtreedumplit(FILE *fd, Dtree *dt, Node *n, size_t depth)
-{
- char *s;
-
- s = lblstr(dt->lbl);
- switch (n->lit.littype) {
- case Lvoid: findentf(fd, depth, "%s: Lvoid\n"); break;
- case Lchr: findentf(fd, depth, "%s: Lchr %c\n", s, n->lit.chrval); break;
- case Lbool: findentf(fd, depth, "%s: Lbool %s\n", s, n->lit.boolval ? "true" : "false"); break;
- case Lint: findentf(fd, depth, "%s: Lint %llu\n", s, n->lit.intval); break;
- case Lflt: findentf(fd, depth, "%s: Lflt %lf\n", s, n->lit.fltval); break;
- case Lstr: findentf(fd, depth, "%s: Lstr %.*s\n", s, (int)n->lit.strval.len, n->lit.strval.buf); break;
- case Llbl: findentf(fd, depth, "%s: Llbl %s\n", s, n->lit.lblval); break;
- case Lfunc: findentf(fd, depth, "%s: Lfunc\n"); break;
- }
-}
-
-void
-dtreedumpnode(FILE *fd, Dtree *dt, size_t depth)
-{
- size_t i;
-
- if (dt->accept)
- findentf(fd, depth, "%s: accept\n", lblstr(dt->lbl));
-
- for (i = 0; i < dt->nnext; i++) {
- dtreedumplit(fd, dt, dt->pat[i]->expr.args[0], depth);
- dtreedumpnode(fd, dt->next[i], depth + 1);
- }
- if (dt->any) {
- findentf(fd, depth, "%s: wildcard\n", lblstr(dt->lbl));
- dtreedumpnode(fd, dt->any, depth + 1);
- }
-}
-
-void
-dtreedump(FILE *fd, Dtree *dt)
-{
- dtreedumpnode(fd, dt, 0);
-}
-
--- /dev/null
+++ b/mi/match_test.c
@@ -1,0 +1,1113 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "util.h"
+#include "parse.h"
+#include "mi.h"
+
+#define V1
+/**
+ * TODO
+ * Recursively check the types of the pattern and the subject tree.
+ *
+ */
+
+File file;
+char debugopt[128];
+
+typedef struct Dtree Dtree;
+extern Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid);
+extern void dtreedump(FILE *fd, Dtree *dt);
+
+
+static char *
+errorf(char *fmt, ...)
+{
+ va_list ap;
+ char *p;
+ size_t n;
+
+ p = malloc(2048);
+ if (p == NULL)
+ return "(NULL)";
+
+ va_start(ap, fmt);
+ n = vsnprintf(p, 2048, fmt, ap);
+ va_end(ap);
+ if (n > 0) {
+ p = realloc(p, n+1);
+ } else {
+ free(p);
+ p = "(NULL)";
+ }
+ return p;
+}
+
+static Type *
+installucons(Type *t)
+{
+ Type *b;
+ size_t i;
+
+ if (!t)
+ return NULL;
+ b = tybase(t);
+ switch (b->type) {
+ case Tystruct:
+ for (i = 0; i < b->nmemb; i++)
+ installucons(b->sdecls[i]->decl.type);
+ break;
+ case Tyunion:
+ for (i = 0; i < b->nmemb; i++) {
+ if (!b->udecls[i]->utype)
+ b->udecls[i]->utype = b;
+ b->udecls[i]->id = i;
+ }
+ break;
+ default:
+ break;
+ }
+ return t;
+}
+
+static Node *
+mkdummymatch(Node *p)
+{
+ Node *b;
+
+ b = mkblock(Zloc, mkstab(0));
+ b->block.stmts = NULL;
+ b->block.nstmts = 0;
+
+ return mkmatch(Zloc, p, b);
+}
+
+static Node *
+ty(Node *n, Type *t)
+{
+ switch (n->type) {
+ case Nlit:
+ n->lit.type = t;
+ break;
+ case Nexpr:
+ n->expr.type = t;
+ default:
+ ;
+ }
+ return n;
+}
+
+static char *
+exprequal(Node *a, Node *b, int depth)
+{
+ size_t i;
+ char *err;
+
+ if (a == NULL && b == NULL)
+ return NULL;
+
+ if (a == NULL || b == NULL)
+ return "either one is null";
+
+ if (false && a->nid != b->nid)
+ return errorf("nid is not matched. (%d) want %d, got %d", depth, a->nid, b->nid);
+
+ if (a->type != b->type)
+ return errorf("ntype is not matched. (%d) want %s, got %s", depth, nodestr[a->type], nodestr[b->type]);
+
+ switch (a->type) {
+ case Nexpr:
+ if (exprop(a) != exprop(b))
+ return errorf("op is not matched. (%d) want %s, got %s", depth, opstr[exprop(a)], opstr[exprop(b)]);
+
+ if (a->expr.nargs != b->expr.nargs) {
+ fprintf(stderr, "op:%s\n", opstr[exprop(a)]);
+ return errorf("nargs is not matched. (%d) want %d, got %d", depth, a->expr.nargs, b->expr.nargs);
+ }
+
+ for (i = 0; i < a->expr.nargs; i++) {
+ err = exprequal(a->expr.args[i], b->expr.args[i], depth+1);
+ if (err != NULL)
+ return err;
+ }
+ break;
+ case Nlit:
+ switch (a->lit.littype) {
+ case Lint:
+ if (a->lit.intval != b->lit.intval)
+ return errorf("lit.intval is not matched. (%d) want %d, got %d", depth, a->lit.intval, b->lit.intval);
+ break;
+ default:
+ return errorf("unexpected littype: %s", nodestr[a->type]);
+ }
+ break;
+ case Nname:
+ break;
+ default:
+ return errorf("unexpected ntype: %s", nodestr[a->type]);
+ }
+
+ return NULL;
+}
+
+static char *
+dtequal(Dtree *a, Dtree *b, int depth)
+{
+ size_t i;
+ char *err;
+
+ if (a == NULL && b == NULL)
+ return NULL;
+ if (a == NULL || b == NULL)
+ return "either one is null";
+
+ if (a->id != b->id)
+ return errorf("id is not matched. (depth:%d) want %d, got %d", depth, a->id, b->id);
+
+ if (a->nconstructors != b->nconstructors)
+ return errorf("nconstructors is not matched. (depth:%d id:%d) want %ld, got %ld", depth, a->id, a->nconstructors, b->nconstructors);
+
+ if (a->accept != b->accept)
+ return errorf("accept is not matched. (depth:%d id:%d) want %ld, got %ld", depth, a->id, a->accept, b->accept);
+
+ if (a->emitted != b->emitted)
+ return errorf("emitted is not matched. (depth:%d id:%d) want %ld, got %ld", depth, a->id, a->emitted, b->emitted);
+
+ err = exprequal(a->load, b->load, 0);
+ if (err != NULL) {
+ fprintf(stderr, "WANT:\n");
+ dumpn(a->load, stderr);
+ fprintf(stderr, "GOT:\n");
+ dumpn(b->load, stderr);
+ return errorf("load is not matched. (depth:%d id:%d) want %p, got %p, %s", depth, a->id, a->load, b->load, err);
+ }
+
+ if (a->nnext != b->nnext)
+ return errorf("nnext is not matched. (depth:%d id:%d) want %d, got %d", depth, a->id, a->nnext, b->nnext);
+
+ for (i = 0; i < a->npat; i++) {
+ err = exprequal(a->pat[i], b->pat[i], 0);
+ if (err != NULL) {
+ fprintf(stderr, "WANT:\n");
+ dumpn(a->pat[i], stderr);
+ fprintf(stderr, "GOT:\n");
+ dumpn(b->pat[i], stderr);
+ return errorf("pat is not matched. (depth:%d id:%d) want %p, got %p, %s", depth, a->id, a->pat[i], b->pat[i], err);
+ }
+ }
+ for (i = 0; i < a->nnext; i++) {
+ err = dtequal(a->next[i], b->next[i], depth+1);
+ if (err != NULL)
+ return errorf("next[%d] is not matched. (depth:%d id:%d) want %p, got %p, %s", i, depth, a->id, a->next[i], b->next[i], err);
+ }
+ err = dtequal(a->any, b->any, depth+1);
+ if (err != NULL)
+ return errorf("any is not matched. (depth:%d id:%d) want %p, got %p, %s", depth, a->id, a->any, b->any, err);
+
+ return NULL;
+}
+
+
+
+static char *
+test_match(int idx, Node *val, Node **pat, Dtree *want)
+{
+ Dtree *dt;
+ Node *m, *v, *r, **lbl, **matches;
+ size_t i, nlbl, npat, nmatches;
+ char *err;
+
+ matches = NULL;
+ nmatches = 0;
+ for (npat = 0; pat[npat] != NULL; npat++) {
+ lappend(&matches, &nmatches, mkdummymatch(pat[npat]));
+ }
+
+ m = mkmatchstmt(Zloc, val, matches, nmatches);
+ r = val;
+ v = gentemp(r->loc, r->expr.type, NULL);
+
+
+ if (getenv("D")) {
+ fprintf(stderr, "[%.3d]>\n", idx);
+ dumpn(m, stdout);
+ }
+
+ if (1) {
+ lbl = NULL;
+ nlbl = 0;
+ for (i = 0; i < nmatches; i++) {
+ lappend(&lbl, &nlbl, genlbl(pat[i]->match.block->loc));
+ }
+
+ dt = gendtree(m, v, lbl, nlbl, 0);
+ if (getenv("d")) {
+ fprintf(stderr, "dtree >>\n");
+ dtreedump(stderr, dt);
+ }
+
+ err = dtequal(want, dt, 0);
+ if (err)
+ return err;
+ }
+
+ if (getenv("G")) {
+ Node **matches = NULL;
+ size_t nmatches = 0;
+ genmatch(m, v, &matches, &nmatches);
+
+ fprintf(stdout, "===========\n");
+ for (i = 0; i < nmatches; i++) {
+ dumpn(matches[i], stdout);
+ }
+ }
+
+ return NULL;
+}
+
+typedef struct Test {
+ char *name;
+ char *desc;
+ Node *val;
+ Node **pats;
+ Dtree *dt;
+} Test;
+
+inline static Node *
+setnode(Node **dst, Node *src)
+{
+ *dst = src;
+ return *dst;
+}
+
+inline static Node *
+getnode(Node *n)
+{
+ return n;
+}
+
+int
+main(int argc, char **argv)
+{
+ size_t i;
+ char *err;
+ Node *t, *p_, *p0, *p1, *p2;
+
+#define P(x) getnode(p##x)
+
+#define InitP0(p) setnode(&p0, p)
+#define InitP1(p) setnode(&p1, p)
+#define InitP2(p) setnode(&p2, p)
+#define InitP_(p) setnode(&p_, p)
+
+
+#define InitT(v) setnode(&t, v)
+#define T getnode(t)
+
+
+ Type *_int32 = mktype(Zloc, Tyint32);
+ Type *_int64 = mktype(Zloc, Tyint64);
+
+ Type *_int32t1 = mktytuple(Zloc, (Type*[]){_int32}, 1);
+ Type *_int32t2 = mktytuple(Zloc, (Type*[]){_int32, _int32}, 2);
+ Type *_int32t3 = mktytuple(Zloc, (Type*[]){_int32, _int32, _int32}, 3);
+
+ Type *_int32s1 = mktystruct(Zloc, (Node*[]){mkdecl(Zloc, mkname(Zloc, "foo"), _int32)}, 1);
+ Type *_enum1 = installucons(mktyunion(Zloc, (Ucon*[]){mkucon(Zloc, mkname(Zloc, "Foo"), NULL, NULL)}, 1));
+ Type *_enum2 = installucons(mktyunion(Zloc,
+ (Ucon*[]){
+ mkucon(Zloc, mkname(Zloc, "Foo"), NULL, NULL),
+ mkucon(Zloc, mkname(Zloc, "Bar"), NULL, NULL)},
+ 2));
+ Type *_enum3 = installucons(mktyunion(Zloc,
+ (Ucon*[]){
+ mkucon(Zloc, mkname(Zloc, "Foo"), NULL, NULL),
+ mkucon(Zloc, mkname(Zloc, "Bar"), NULL, NULL),
+ mkucon(Zloc, mkname(Zloc, "Baz"), NULL, NULL)},
+ 3));
+
+ Type *_int32u1 = mktyunion(Zloc, (Ucon*[]){
+ mkucon(Zloc, mkname(Zloc, "Foo"), NULL, _int32t1),
+ }, 1);
+
+ Type *_int32a1 = mktyslice(Zloc, _int32);
+
+
+ Type *_bug001u = installucons(mktyunion(Zloc, (Ucon*[]){
+ mkucon(Zloc, mkname(Zloc, "Foo"), NULL, _int32t1),
+ mkucon(Zloc, mkname(Zloc, "Bar"), NULL, NULL)
+ }, 2));
+ Type *_bug001s = mktystruct(Zloc, (Node*[]){
+ mkdecl(Zloc, mkname(Zloc, "s1_slice"), _int32a1),
+ mkdecl(Zloc, mkname(Zloc, "s1_int32"), _int32),
+ }, 2);
+
+ Type *_bug002s = mktystruct(Zloc, (Node*[]){
+ mkdecl(Zloc, mkname(Zloc, "s2_ufoo"), _bug001u),
+ mkdecl(Zloc, mkname(Zloc, "s2_sbar"), _bug001s),
+ mkdecl(Zloc, mkname(Zloc, "s2_int32"), _int32),
+ }, 3);
+
+
+ Dtree *dt11 = &(Dtree){
+ .id = 11,
+ .any = NULL,
+ };
+ Dtree *dt8 = &(Dtree){
+ .id = 8,
+ .any =dt11,
+ };
+ dt11->any = dt8;
+
+ Test tests[] = {
+ {
+ .name = "int32 matched by 1 wildcard",
+ .val = InitT(gentemp(Zloc, _int32, NULL)),
+ .pats = (Node*[]){
+ ty(mkexpr(Zloc, Ogap, NULL), _int32),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ {
+ .name = "int32 matched by 1 capture variable",
+ .val = InitT(gentemp(Zloc, _int32, NULL)),
+ .pats = (Node*[]){
+ InitP0(ty(mkexpr(Zloc, Ovar, mkname(Zloc, "foovar"), NULL), _int32)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ {
+ .name = "int32 matched by 1 literals",
+ .val = InitT(gentemp(Zloc, _int32, NULL)),
+ .pats = (Node*[]){
+ InitP0(ty(mkexpr(Zloc, Olit, mkint(Zloc, 123), NULL), _int32)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 2,
+ .load = gentemp(Zloc, _int32, NULL),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ [0] = p0,
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ {
+ .name = "int32 matched by 2 literals",
+ .val = InitT(gentemp(Zloc, _int32, NULL)),
+ .pats = (Node*[]){
+ InitP0(ty(mkexpr(Zloc, Olit, mkint(Zloc, 123), NULL), _int32)),
+ InitP1(ty(mkexpr(Zloc, Olit, mkint(Zloc, 456), NULL), _int32)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 3,
+ .load = gentemp(Zloc, _int32, NULL),
+ .nconstructors = 4294967296,
+ .npat = 2,
+ .nnext = 2,
+ .pat = (Node*[]){
+ [0] = P(0),
+ [1] = P(1),
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ [1] = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 2,
+ .accept = 1,
+ },
+ },
+ },
+ {
+ .name = "1-tuple, matched by wildcard only",
+ .val = InitT(gentemp(Zloc, _int32t1, NULL)),
+ .pats = (Node*[]){
+ ty(mkexpr(Zloc, Otup, ty(mkexpr(Zloc, Ogap, NULL), _int32), NULL), _int32t1),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ .nnext = 0,
+ .npat = 0,
+ .any = NULL,
+ },
+ },
+ {
+ .name = "1-tuple",
+ .val = InitT(gentemp(Zloc, _int32t1, NULL)),
+ .pats = (Node *[]){
+ InitP0(ty(mkexpr(Zloc, Otup, ty(mkexpr(Zloc, Olit, mkint(Zloc, 123), NULL), _int32), NULL), _int32t1)),
+ InitP1(ty(mkexpr(Zloc, Ogap, NULL), _int32t1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 2,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 0), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1
+ },
+ },
+ },
+ {
+ .name = "2-tuple",
+ .val = InitT(gentemp(Zloc, _int32t2, NULL)),
+ .pats = (Node *[]){
+ /**
+ * | (123, 456):
+ */
+ InitP0(ty(mkexpr(Zloc, Otup,
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 123), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 456), NULL), _int32),
+ NULL), _int32t2)),
+ /**
+ * | (_, _):
+ */
+ InitP1(ty(mkexpr(Zloc, Ogap, NULL), _int32t2)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 3,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 0), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 2,
+ .load =ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 1), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[1],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ }
+ },
+ },
+ {
+ .name = "3-tuple",
+ .val = InitT(gentemp(Zloc, _int32t3, NULL)),
+ .pats = (Node *[]){
+ /**
+ * | (123, 456):
+ */
+ InitP0(ty(mkexpr(Zloc, Otup,
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 123), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 456), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 789), NULL), _int32),
+ NULL), _int32t3)),
+ /**
+ * | (_, _):
+ */
+ InitP1(ty(mkexpr(Zloc, Ogap, NULL), _int32t3)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 4,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 0), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 3,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 1), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[1],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 2,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 2), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[2],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ }
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept =1,
+ },
+ },
+ },
+ {
+ .name = "3-tuple-3-pat",
+ .val = InitT(gentemp(Zloc, _int32t3, NULL)),
+ .pats = (Node *[]){
+ /**
+ * | (123, 456):
+ */
+ InitP0(ty(mkexpr(Zloc, Otup,
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 101), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 102), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 103), NULL), _int32),
+ NULL), _int32t3)),
+ InitP1(ty(mkexpr(Zloc, Otup,
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 201), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 202), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 203), NULL), _int32),
+ NULL), _int32t3)),
+ InitP2(ty(mkexpr(Zloc, Otup,
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 301), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 302), NULL), _int32),
+ ty(mkexpr(Zloc, Olit, mkint(Zloc, 303), NULL), _int32),
+ NULL), _int32t3)),
+
+ /**
+ * | (_, _):
+ */
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32t3)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 10,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 0), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 3,
+ .npat = 3,
+ .pat = (Node*[]){
+ P(0)->expr.args[0],
+ P(1)->expr.args[0],
+ P(2)->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 5,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 1), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[1],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 4,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 2), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[2],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ [1] = &(Dtree){
+ .id = 7,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 1), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(1)->expr.args[1],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 6,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 2), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(1)->expr.args[2],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ [2] = &(Dtree){
+ .id = 9,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 1), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(2)->expr.args[1],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 8,
+ .load = ty(mkexpr(Zloc, Otupget, T, mkintlit(Zloc, 2), NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(2)->expr.args[2],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 2,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 3,
+ .accept = 1,
+ },
+ },
+ },
+
+ {
+ .name = "1-int32-struct",
+ .val = InitT(gentemp(Zloc, _int32s1, NULL)),
+ .pats = (Node*[]){
+ InitP0(ty(mkexprl(Zloc, Ostruct, (Node*[]){mkidxinit(Zloc, mkname(Zloc, "foo"), mkintlit(Zloc, 123))}, 1), _int32s1)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32s1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 2,
+ .load = ty(mkexpr(Zloc, Omemb, T, tybase(exprtype(P(0)))->sdecls[0]->decl.name, NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ {
+ .name = "1-enum, matched by wildcard only",
+ .val = InitT(gentemp(Zloc, _enum1, NULL)),
+ .pats = (Node*[]){
+ InitP_( ty(mkexpr(Zloc, Ogap, NULL), _enum1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ {
+ .name = "1-enum, matched by a valid constructor",
+ .val = InitT(gentemp(Zloc, _enum1, NULL)),
+ .pats = (Node*[]){
+ InitP0(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Foo"), NULL), _enum1)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _enum1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 2,
+ .nconstructors = 1,
+ .load = ty(mkexpr(Zloc, Outag, T, NULL), _int32),
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ /*
+ * the matcher will convert the Oucon expr to an Nlit for the Dtree
+ */
+ ty(mkintlit(Zloc, finducon(_enum1, P(0)->expr.args[0])->id), _int32),
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ /**
+ * match v : _enum2
+ * | `Foo
+ * | `Bar
+ * ;;
+ *
+ */
+ {
+ .name = "2-enum, matched by 2 valid constructors",
+ .val = InitT(gentemp(Zloc, _enum2, NULL)),
+ .pats = (Node*[]) {
+ InitP0(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Foo"), NULL), _enum2)),
+ InitP1(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Bar"), NULL), _enum2)),
+
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 2,
+ .load = ty(mkexpr(Zloc, Outag, T, NULL), _int32),
+ .nconstructors = 2,
+ .nnext = 2,
+ .npat = 2,
+ .pat = (Node*[]){
+ ty(mkintlit(Zloc, finducon(_enum2, P(0)->expr.args[0])->id), _int32),
+ ty(mkintlit(Zloc, finducon(_enum2, P(1)->expr.args[0])->id), _int32),
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ [1] = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ .any = NULL,
+ },
+ },
+ {
+ .name = "3-enum, matched by 3 valid constructors",
+ .val = InitT(gentemp(Zloc, _enum3, NULL)),
+ .pats = (Node*[]) {
+ InitP0(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Foo"), NULL), _enum3)),
+ InitP1(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Bar"), NULL), _enum3)),
+ InitP2(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Baz"), NULL), _enum3)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 3,
+ .load = ty(mkexpr(Zloc, Outag, T, NULL), _int32),
+ .nconstructors = 3,
+ .nnext = 3,
+ .npat = 3,
+ .pat = (Node*[]){
+ ty(mkintlit(Zloc, finducon(_enum3, P(0)->expr.args[0])->id), _int32),
+ ty(mkintlit(Zloc, finducon(_enum3, P(1)->expr.args[0])->id), _int32),
+ ty(mkintlit(Zloc, finducon(_enum3, P(2)->expr.args[0])->id), _int32),
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ [1] = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ [2] = &(Dtree){
+ .id = 2,
+ .accept = 1,
+ },
+ },
+ .any = NULL,
+ },
+ },
+ {
+ .name = "1-int32-array, matched by an element",
+ .val = InitT(gentemp(Zloc, _int32a1, NULL)),
+ .pats = (Node*[]) {
+ InitP0(ty(mkexpr(Zloc, Oarr,
+ /**
+ * {.[0] = 123}
+ */
+ mkidxinit(Zloc, mkintlit(Zloc, 0), mkintlit(Zloc, 123)),
+ NULL),
+ _int32s1)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32a1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 2,
+ .nconstructors = 4294967296,
+ .load = ty(mkexpr(Zloc, Oidx, T, ty(mkintlit(Zloc, 0), _int64), NULL), _int32),
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ /**
+ * | `Foo (int32)
+ */
+ {
+ .name = "1-union of 1-tuple, matched by wildcard only",
+ .val = InitT(gentemp(Zloc, _int32u1, NULL)),
+ .pats = (Node*[]){
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32u1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ {
+ .name = "1-union of 1-tuple",
+ .val = InitT(gentemp(Zloc, _int32u1, NULL)),
+ .pats = (Node*[]){
+ /**
+ * `Foo (123)
+ */
+ InitP0(ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Foo"),
+ ty(mkexpr(Zloc, Otup, ty(mkexpr(Zloc, Olit, mkint(Zloc, 123), NULL), _int32), NULL), _int32t1), NULL), _int32u1)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _int32u1)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 3,
+ .load = ty(mkexpr(Zloc, Outag, T, NULL), _int32),
+ .nconstructors = 1,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ /*
+ * the matcher will convert the Oucon expr to an Nlit for the Dtree
+ */
+ ty(mkintlit(Zloc, finducon(_enum1, P(0)->expr.args[0])->id), _int32),
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 2,
+ .load = ty(mkexpr(Zloc, Otupget,
+ ty(mkexpr(Zloc, Oudata, T, NULL), _int32t1),
+ mkintlit(Zloc, 0),
+ NULL), _int32),
+ .nconstructors = 4294967296,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[1]->expr.args[0],
+ },
+ .next = (Dtree*[]){
+ &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+ {
+ .name = "bug1",
+ .val = InitT(gentemp(Zloc, _bug002s, NULL)),
+ .pats = (Node*[]){
+ InitP0(ty(mkexprl(Zloc, Ostruct, (Node*[]){
+ mkidxinit(Zloc, mkname(Zloc, "s2_ufoo"),
+ ty(mkexpr(Zloc, Oucon, mkname(Zloc, "Foo"),
+ mkintlit(Zloc, 123), NULL), _int32u1)
+ )}, 1), _bug002s)),
+ InitP_(ty(mkexpr(Zloc, Ogap, NULL), _bug002s)),
+ NULL,
+ },
+ .dt = &(Dtree){
+ .id = 3,
+ .load = ty(mkexpr(Zloc, Outag,
+ ty(mkexpr(Zloc, Omemb, T, tybase(exprtype(P(0)))->sdecls[0]->decl.name, NULL), _bug001u),
+ NULL), _int32),
+ .nconstructors = 2,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ ty(mkintlit(Zloc, finducon(_bug001u, P(0)->expr.args[0]->expr.args[0])->id), _int32),
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 2,
+ .load = ty(mkexpr(Zloc, Oudata, ty(mkexpr(Zloc, Omemb, T, tybase(exprtype(P(0)))->sdecls[0]->decl.name, NULL), _int32), NULL), _int32t1),
+ .nconstructors = 1,
+ .nnext = 1,
+ .npat = 1,
+ .pat = (Node*[]){
+ P(0)->expr.args[0]->expr.args[1],
+ },
+ .next = (Dtree*[]){
+ [0] = &(Dtree){
+ .id = 0,
+ .accept = 1,
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ }
+ },
+ },
+ .any = &(Dtree){
+ .id = 1,
+ .accept = 1,
+ },
+ },
+ },
+
+ };
+
+ for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
+ Test *t = &tests[i];
+ char *name = t->name;
+ char *filter = getenv("N");
+ if (filter && !strstr(name, filter))
+ continue;
+
+ initfile(&file, errorf("(test-%d-%s)", i, name));
+ fprintf(stderr, "[%03lu]------ %s ##\n", i, name);
+ err = test_match(i, t->val, t->pats, t->dt);
+ if (err) {
+ fprintf(stderr, "FAILED id: %ld name: %s\n", i, name);
+ fprintf(stderr, "%s\r\n", err);
+ break;
+ }
+ }
+ return 0;
+}
+
--- a/mi/mi.h
+++ b/mi/mi.h
@@ -1,6 +1,7 @@
typedef struct Cfg Cfg;
typedef struct Bb Bb;
typedef struct Reaching Reaching;
+typedef struct Dtree Dtree;
struct Cfg {
Node *fn;
@@ -34,6 +35,27 @@
size_t **defs;
size_t *ndefs;
size_t nbb;
+};
+
+struct Dtree {
+ int id;
+ Srcloc loc;
+
+ /* values for matching */
+ Node *lbl;
+ Node *load;
+ size_t nconstructors;
+ char accept;
+ char emitted;
+
+ /* the decision tree step */
+ Node **pat;
+ size_t npat;
+ Dtree **next;
+ size_t nnext;
+ Dtree *any;
+
+ size_t refcnt;
};
/* dataflow analysis */
--- /dev/null
+++ b/support/matchstats.myr
@@ -1,0 +1,105 @@
+use std
+use bio
+use math
+
+const atoi = {s
+ match std.intparse(s)
+ | `std.Some v: -> v
+ | `std.None: std.fatal("error")
+ ;;
+}
+
+const avg = {xs
+ var sum
+
+ sum = 0
+ for x : xs
+ sum += x
+ ;;
+ -> (sum : flt64) / (xs.len : flt64)
+}
+
+const intcmp = {a, b
+ if a < b
+ -> `std.After
+ elif a > b
+ -> `std.Before
+ else
+ -> `std.Equal
+ ;;
+}
+
+const percentile = {percent, xs
+ var sorted
+ var idx, i
+ var ret
+
+ sorted = std.sort(xs, intcmp)
+ idx = ((percent : flt64) / 100.0) * (sorted.len : flt64)
+ i = (math.floor(idx) : int)
+ if idx == math.floor(idx)
+ ret = (xs[i-1] : flt64)
+ elif idx > 1.0
+ ret = ((xs[i-1] + xs[i]) / 2 : flt64)
+ else
+ std.fatal("percentile out-of-bunds\n")
+ ;;
+ -> ret
+}
+
+const maximum = {xs
+ var m
+
+ m = xs[0]
+ for v : xs
+ if v > m
+ m = v
+ ;;
+ ;;
+ -> m
+}
+
+const main = {args : byte[:][:]
+ var f, locs, sizes, heights, count
+
+ if args.len < 2
+ std.put("need input file\n")
+ std.exit(1)
+ ;;
+
+ match bio.open(args[1], bio.Rd)
+ | `std.Ok fd: f = fd
+ | `std.Err e: std.fatal("error opening {}: {}\n", args[0], e)
+ ;;
+
+ locs = [][:]
+ sizes = [][:]
+ heights = [][:]
+ count = 0
+
+ while true
+ match bio.readto(f, ",")
+ | `std.Ok loc: std.slpush(&locs, std.strstrip(loc))
+ | `std.Err `bio.Eof: break
+ | `std.Err e: std.fatal("error read loc: {}\n", e)
+ ;;
+
+ match bio.readto(f, ",")
+ | `std.Ok size: std.slpush(&sizes, atoi(std.strstrip(size)))
+ | `std.Err e: std.fatal("error read size: {}\n", e)
+ ;;
+
+ match bio.readto(f, "\n")
+ | `std.Ok height: std.slpush(&heights, atoi(std.strstrip(height)))
+ | `std.Err e: std.fatal("error read height: {}\n", e)
+ ;;
+ count ++
+ ;;
+
+ std.put("Sample count: {}\n", count)
+ std.put("Dtree Size\tavg: {s=3}\t95th percentile: {s=3}\t maximum: {}\n", avg(sizes), percentile(95, sizes), maximum(sizes))
+ std.put("Dtree Height\tavg: {s=3}\t95th percentile: {s=3}\t maximum: {}\n", avg(heights), percentile(95, heights), maximum(heights))
+
+ bio.close(f)
+}
+