ref: dcb761aebd2815fdf2d04c0b05724292de9dc98b
parent: ad872eeb19b4fcc41a5d34750ca6cdcf88a39795
author: Peter Mikkelsen <peter@pmikkelsen.com>
date: Thu Jul 18 16:57:49 EDT 2024
Start working on a parser
--- a/aplan.c
+++ /dev/null
@@ -1,32 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <thread.h>
-
-#include "dat.h"
-#include "fns.h"
-
-Array *
-parseaplan(TokenList *tokens, char **errp)
-{
- /* TODO: write a recursive descent parser for APLAN here. */
- Array *val;
-
- int ok = 1;
- for(uvlong i = 0; i < tokens->count; i++)
- ok &= tokens->tokens[i].tag == TokNumber;
- if(!ok){
- *errp = "can only parse simple constants";
- return nil;
- }
-
- if(tokens->count == 1){
- val = allocarray(TypeNumber, 0, 1);
- setint(val, 0, tokens->tokens[0].num);
- }else{
- val = allocarray(TypeNumber, 1, tokens->count);
- setshape(val, 0, tokens->count);
- for(uvlong i = 0; i < tokens->count; i++)
- setint(val, i, tokens->tokens[i].num);
- }
- return val;
-}
\ No newline at end of file
--- a/array.c
+++ b/array.c
@@ -12,6 +12,7 @@
struct Array
{
int rank;
+ usize size;
usize *shape;
union {
void *data;
@@ -31,6 +32,7 @@
{
Array *a = alloc(DataArray);
a->rank = rank;
+ a->size = size;
switch(type){
case TypeNumber:
@@ -57,4 +59,19 @@
setshape(Array *a, int dim, usize size)
{
a->shape[dim] = size;
+}
+
+char *
+printarray(Array *a)
+{
+ /* TODO: this is just for debugging */
+ char buf[2048];
+ char *p = buf;
+ p += sprint(p, "type: %s shape: ", "numeric");
+ for(int i = 0; i < a->rank; i++)
+ p += sprint(p, "%lld ", a->shape[i]);
+ p += sprint(p, "data: ");
+ for(uvlong i = 0; i < a->size; i++)
+ p += sprint(p, "%lld ", a->intdata[i]);
+ return buf;
}
\ No newline at end of file
--- a/dat.h
+++ b/dat.h
@@ -96,6 +96,12 @@
void **items;
};
+enum Primitive
+{
+ PrimPlus,
+ PrimMinus,
+};
+
enum TokenTag
{
TokNumber,
@@ -104,8 +110,16 @@
TokRparen,
TokLbrack,
TokRbrack,
+ TokLbrace,
+ TokRbrace,
TokNewline,
TokDiamond,
+ TokPrimitive,
+ TokDel,
+ TokLarrow,
+ TokSemi,
+
+ TokEnd,
};
typedef struct Token Token;
@@ -112,9 +126,11 @@
struct Token
{
int tag;
+ int nameclass;
union {
vlong num; /* TokNumber */
char *name; /* TokName: UTF-8 encoded name */
+ int prim; /* TokPrimitive */
};
};
@@ -123,6 +139,10 @@
{
uvlong count;
Token *tokens;
+
+ uvlong offset;
+ jmp_buf errbuf;
+ char *err;
};
enum ArrayType
@@ -134,8 +154,47 @@
typedef struct Array Array;
#pragma incomplete Array
+enum AstTag
+{
+ AstProg,
+ AstFunc,
+ AstName,
+ AstLocals,
+ AstAssign,
+ AstMonadic,
+ AstDyadic,
+ AstConst,
+ AstPrim,
+};
+
typedef struct Ast Ast;
struct Ast
{
- int lol;
+ int tag;
+ char *name;
+ int nameclass;
+ int prim;
+
+ Ast *funcname;
+ Ast *funcresult;
+ Ast *funcleftarg;
+ Ast *funcrightarg;
+ Ast *funclocals;
+
+ Ast *func;
+ Ast *left;
+ Ast *right;
+
+ Array *val;
+
+ uvlong childcount;
+ Ast **children;
+};
+
+enum Nameclass
+{
+ NameclassUndef, /* Unknown name */
+ NameclassLocal, /* Local variable, but no value yet */
+ NameclassArray, /* Array value */
+ NameclassFunc, /* Function value */
};
\ No newline at end of file
--- a/fns.h
+++ b/fns.h
@@ -1,6 +1,3 @@
-/* aplan.c */
-Array *parseaplan(TokenList *, char **);
-
/* array.c */
void initarrays(void);
Array *allocarray(int, int, usize);
@@ -7,6 +4,8 @@
void setint(Array *, usize, vlong);
void setshape(Array *, int, usize);
+char *printarray(Array *);
+
/* fs.c */
Qid freshobjqid(void);
void startfs(char *, char *);
@@ -33,12 +32,13 @@
void appendlog(Session *s, char *data);
/* symtab.c */
-Symtab *allocsymtab(int);
+Symtab *allocsymtab(void);
uvlong sym(Symtab *, char *);
char *symname(Symtab *, uvlong);
void *symval(Symtab *, uvlong);
-Qid symqid(Symtab *, uvlong);
void symset(Symtab *, uvlong, void *);
+Symbol *symptr(Symtab *, uvlong);
+
Enumeration *enumsymbols(Symtab *);
/* systemcmd.c */
@@ -47,9 +47,9 @@
/* util.c */
Enumeration *allocenum(uvlong);
void trim(char *);
+void debugast(Ast *, int);
/* value.c */
char *printval(void *);
void *parseval(char *, char **);
-void *init_quadio(void);
--- a/fs.c
+++ b/fs.c
@@ -556,14 +556,17 @@
char *err = nil;
Aux *aux = r->fid->aux;
Module *m = aux->module;
- uvlong symb;
+ uvlong symid;
+ Symbol *symb;
switch(QID_TYPE(r->fid->qid)){
case Qmodule: /* create a new symbol */
- symb = sym(m->symtab, r->ifcall.name);
- if(symb == -1)
+ symid = sym(m->symtab, r->ifcall.name);
+ if(symid == -1)
err = Einvalidname;
- r->fid->qid = r->ofcall.qid = symqid(m->symtab, symb);
+ symb = symptr(m->symtab, symid);
+ aux->symbol = symb;
+ r->fid->qid = r->ofcall.qid = symb->qsymbol;
break;
default:
err = Ecreate;
--- a/mkfile
+++ b/mkfile
@@ -3,7 +3,6 @@
TARG=lpafs
SCRIPTS=lpa
OFILES=\
- aplan.$O\
array.$O\
fs.$O\
main.$O\
--- a/module.c
+++ b/module.c
@@ -12,7 +12,7 @@
Module *m = alloc(DataModule);
m->name = strdup(name);
- m->symtab = allocsymtab(1);
+ m->symtab = allocsymtab();
m->id = id++;
wlock(&s->modules->lock);
--- a/parse.c
+++ b/parse.c
@@ -5,11 +5,332 @@
#include "dat.h"
#include "fns.h"
+static void _Noreturn error(TokenList *, char *);
+static int peek(TokenList *);
+static int peekclass(TokenList *);
+static void match(TokenList *, int);
+static void addchild(Ast *, Ast *);
+static int issep(TokenList *t);
+static int nameclass(char *, Ast *);
+
+static void parsesep(TokenList *);
+static void parseseps(TokenList *, int);
+static Ast *parseprog(TokenList *);
+static Ast *parsefuncdef(TokenList *);
+static Ast *parsefuncheader(TokenList *);
+static Ast *parselocals(TokenList *);
+static Ast *parseexpr(TokenList *, Ast *);
+static Ast *parseexprsub(TokenList *);
+static Ast *parseline(TokenList *);
+static Ast *parsename(TokenList *);
+static Ast *parsefunc(TokenList *);
+static Ast *parseconst(TokenList *);
+
Ast *
parse(TokenList *tokens, char **errp)
{
- /* Ast *ast = alloc(DataAst); */
- USED(tokens);
- *errp = "parsing not implemented yet";
- return nil;
-}
\ No newline at end of file
+ Ast *ast;
+
+ tokens->offset = 0;
+ tokens->err = nil;
+ if(setjmp(tokens->errbuf)){
+ *errp = tokens->err;
+ return nil;
+ }else{
+ ast = parseprog(tokens);
+ match(tokens, TokEnd);
+ }
+ return ast;
+}
+
+static void _Noreturn
+error(TokenList *tokens, char *msg)
+{
+ tokens->err = msg;
+ longjmp(tokens->errbuf, 1);
+}
+
+static int
+peek(TokenList *tokens)
+{
+ if(tokens->offset >= tokens->count)
+ error(tokens, "unexpected end of token stream");
+
+ return tokens->tokens[tokens->offset].tag;
+}
+
+static int
+peekclass(TokenList *tokens)
+{
+ peek(tokens); /* triggers an error if we are at the end */
+ return tokens->tokens[tokens->offset].nameclass;
+}
+
+static void
+match(TokenList *tokens, int tag)
+{
+ if(peek(tokens) != tag)
+ error(tokens, "Unexpected token (match failed)");
+ tokens->offset++;
+}
+
+static void
+addchild(Ast *ast, Ast *child)
+{
+ ast->childcount++;
+ ast->children = allocextra(ast, sizeof(Ast *) * ast->childcount);
+ ast->children[ast->childcount-1] = child;
+}
+
+static int
+issep(TokenList *t)
+{
+ switch(peek(t)){
+ case TokNewline:
+ case TokDiamond:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int
+nameclass(char *name, Ast *func)
+{
+ int class;
+
+ if(func == nil)
+ class = NameclassUndef;
+ else if(strcmp(name, func->funcname->name) == 0)
+ class = NameclassFunc;
+ else if(func->funcresult && strcmp(name, func->funcresult->name) == 0)
+ class = NameclassLocal;
+ else if(func->funcleftarg && strcmp(name, func->funcleftarg->name) == 0)
+ class = NameclassLocal;
+ else if(func->funcrightarg && strcmp(name, func->funcrightarg->name) == 0)
+ class = NameclassLocal;
+ else{
+ /* Check if the name exist in the locallist */
+ class = NameclassUndef;
+ }
+ return class;
+}
+
+static void
+parsesep(TokenList *t)
+{
+ if(issep(t))
+ match(t, peek(t));
+}
+
+static void
+parseseps(TokenList *t, int required)
+{
+ while(issep(t)){
+ match(t, peek(t));
+ required = 0;
+ }
+ if(required)
+ match(t, TokNewline);
+}
+
+static Ast *
+parseprog(TokenList *t)
+{
+ Ast *prog = alloc(DataAst);
+ prog->tag = AstProg;
+
+ while(peek(t) != TokEnd){
+ Ast *child;
+ if(peek(t) == TokDel)
+ child = parsefuncdef(t);
+ else
+ child = parseexpr(t, nil);
+ print("After expr: %d\n", peek(t));
+ if(peek(t) != TokEnd)
+ parseseps(t, 1);
+ addchild(prog, child);
+ }
+ print("got prog, peek is: %d\n", peek(t));
+ return prog;
+}
+
+static Ast *
+parsefuncdef(TokenList *t)
+{
+ Ast *func = parsefuncheader(t);
+ while(peek(t) != TokDel){
+ Ast *expr = parseexpr(t, func);
+ addchild(func, expr);
+ if(peek(t) != TokDel)
+ parseseps(t, 1);
+ }
+ match(t, TokDel);
+ return func;
+}
+
+static Ast *
+parsefuncheader(TokenList *t)
+{
+ Ast *func = alloc(DataAst);
+ func->tag = AstFunc;
+
+ match(t, TokDel);
+
+ func->funcname = parsename(t);
+ if(peek(t) == TokLarrow){
+ match(t, TokLarrow);
+ func->funcresult = func->funcname;
+ func->funcname = parsename(t);
+ }
+ if(peek(t) == TokName)
+ func->funcrightarg = parsename(t);
+ if(peek(t) == TokName){
+ func->funcleftarg = func->funcname;
+ func->funcname = func->funcrightarg;
+ func->funcrightarg = parsename(t);
+ }
+ func->funclocals = parselocals(t);
+
+ return func;
+}
+
+static Ast *
+parselocals(TokenList *t)
+{
+ Ast *locals = alloc(DataAst);
+ locals->tag = AstLocals;
+ while(peek(t) == TokSemi){
+ match(t, TokSemi);
+ Ast *name = parsename(t);
+ name->nameclass = NameclassLocal;
+ addchild(locals, name);
+ }
+ parseseps(t, 1);
+ return locals;
+}
+
+static Ast *
+parseexpr(TokenList *t, Ast *func)
+{
+ uvlong start, end;
+ vlong depth;
+
+ depth = 0;
+ start = t->offset;
+ while(!(issep(t) || (peek(t) == TokDel) || (peek(t) == TokEnd)) || depth != 0){
+ switch(peek(t)){
+ case TokLparen: depth++; break;
+ case TokRparen: depth--; break;
+ }
+ match(t, peek(t));
+ }
+ end = t->offset;
+ t->offset = start;
+
+ for(uvlong i = start; i < end; i++){
+ char *name;
+ int class;
+
+ if(t->tokens[i].tag != TokName)
+ continue;
+ name = t->tokens[i].name;
+ class = nameclass(name, func);
+ t->tokens[i].nameclass = class;
+ if(class == 0)
+ error(t, "parseexpr() can't deal with free variables yet");
+ }
+
+ /* We know the nameclass of each name, and assume that the nameclasses do not change.
+ * Now create the AST.
+ */
+ return parseexprsub(t);
+}
+
+static Ast *
+parseexprsub(TokenList *t)
+{
+ Ast *expr, *val;
+
+ if(peek(t) == TokLparen){
+ match(t, TokLparen);
+ val = parseexprsub(t);
+ match(t, TokRparen);
+ }else
+ val = nil;
+
+ if(peekclass(t) == NameclassFunc){
+func:
+ expr = alloc(DataAst);
+ if(val){
+ expr->tag = AstDyadic;
+ expr->left = val;
+ }else
+ expr->tag = AstMonadic;
+ expr->func = parsefunc(t);
+ expr->right = parseexprsub(t);
+ return expr;
+ }
+
+ if(peek(t) == TokName){
+ val = parsename(t);
+ if(peek(t) == TokLarrow){
+ match(t, TokLarrow);
+ expr = alloc(DataAst);
+ expr->tag = AstAssign;
+ expr->left = val;
+ expr->right = parseexprsub(t);
+ return expr;
+ }
+ }
+
+ /* We need a value now. Stranding is not implemented */
+ if(val == nil)
+ val = parseconst(t);
+
+ if(peekclass(t) == NameclassFunc)
+ goto func;
+
+ return val;
+}
+
+static Ast *
+parsename(TokenList *t)
+{
+ Ast *name = alloc(DataAst);
+ name->tag = AstName;
+ name->name = t->tokens[t->offset].name;
+ match(t, TokName);
+ return name;
+}
+
+static Ast *
+parsefunc(TokenList *t)
+{
+ /* TODO: parse primitives as well */
+ Ast *func;
+ if(peek(t) == TokName && peekclass(t) == NameclassFunc)
+ func = parsename(t);
+ else{
+ func = alloc(DataAst);
+ func->tag = AstPrim;
+ func->prim = t->tokens[t->offset].prim;
+ match(t, TokPrimitive);
+ }
+
+ return func;
+}
+
+static Ast *
+parseconst(TokenList *t)
+{
+ Ast *val = alloc(DataAst);
+ val->tag = AstConst;
+
+ vlong num = t->tokens[t->offset].num;
+ match(t, TokNumber);
+ val->val = allocarray(TypeNumber, 0, 1);
+ setint(val->val, 0, num);
+
+ return val;
+}
--- a/scan.c
+++ b/scan.c
@@ -5,6 +5,14 @@
#include "dat.h"
#include "fns.h"
+struct {
+ Rune spelling;
+ int id;
+} primspecs[] = {
+ L'+', PrimPlus,
+ L'-', PrimMinus,
+};
+
Token *
newtok(TokenList *tokens, int tag)
{
@@ -29,26 +37,32 @@
while(*cp){
n = chartorune(&r, cp);
+ int new = -1;
switch(r){
- case '(':
- newtok(tokens, TokLparen);
+ case L'(': new = TokLparen; break;
+ case L')': new = TokRparen; break;
+ case L'[': new = TokLbrack; break;
+ case L']': new = TokRbrack; break;
+ case L'{': new = TokLbrace; break;
+ case L'}': new = TokRbrace; break;
+ case L'\n': new = TokNewline; break;
+ case L'⋄': new = TokDiamond; break;
+ case L'∇': new = TokDel; break;
+ case L'←': new = TokLarrow; break;
+ case L';': new = TokSemi; break;
+ }
+ if(new != -1){
+ newtok(tokens, new);
goto next;
- case ')':
- newtok(tokens, TokRparen);
- goto next;
- case '[':
- newtok(tokens, TokLbrack);
- goto next;
- case ']':
- newtok(tokens, TokRbrack);
- goto next;
- case '\n':
- newtok(tokens, TokNewline);
- goto next;
- case L'⋄':
- newtok(tokens, TokDiamond);
- goto next;
}
+ for(int i = 0; i < nelem(primspecs); i++){
+ if(r == primspecs[i].spelling){
+ tok = newtok(tokens, TokPrimitive);
+ tok->prim = primspecs[i].id;
+ tok->nameclass = NameclassFunc;
+ goto next;
+ }
+ }
if(isspacerune(r))
goto next;
if(isdigitrune(r)){
@@ -59,10 +73,30 @@
tok->num = num;
goto next;
}
+ if(isalpharune(r)){
+ char *start = cp;
+ do{
+ cp += n;
+ n = chartorune(&r, cp);
+ }while(isalpharune(r) || isdigitrune(r));
+ tok = newtok(tokens, TokName);
+ usize size = cp - start;
+ tok->name = malloc(size + 1);
+ memcpy(tok->name, start, size);
+ tok->name[size] = 0;
+ continue;
+ }
*errp = "scan error";
return nil;
next:
cp += n;
+ }
+ newtok(tokens, TokEnd);
+ if(tokens){
+ print("token tags: ");
+ for(uvlong i = 0; i < tokens->count; i++)
+ print("%d ", tokens->tokens[i].tag);
+ print("\n");
}
return tokens;
}
\ No newline at end of file
--- a/session.c
+++ b/session.c
@@ -56,7 +56,7 @@
if(err)
goto error;
- USED(ast);
+ debugast(ast, 0);
appendlog(s, "got an AST but can't evaluate it yet\n");
}
}
--- a/symtab.c
+++ b/symtab.c
@@ -5,24 +5,10 @@
#include "dat.h"
#include "fns.h"
-struct {
- char *name;
- void *(*init)(void);
-} defaultsyms[] = {
- { "⎕IO", init_quadio },
-};
-
Symtab *
-allocsymtab(int defaults)
+allocsymtab(void)
{
Symtab *s = alloc(DataSymtab);
- if(defaults){
- for(uvlong i = 0; i < nelem(defaultsyms); i++){
- uvlong symb = sym(s, defaultsyms[i].name);
- symset(s, symb, defaultsyms[i].init());
- }
- }
-
return s;
}
@@ -77,14 +63,14 @@
return value;
}
-Qid
-symqid(Symtab *s, uvlong id)
+Symbol *
+symptr(Symtab *s, uvlong id)
{
- Qid qid;
+ Symbol *symb;
rlock(&s->lock);
- qid = s->symbols[id]->qsymbol;
+ symb = s->symbols[id];
runlock(&s->lock);
- return qid;
+ return symb;
}
void
--- a/util.c
+++ b/util.c
@@ -24,4 +24,90 @@
else
str[i] = 0;
}
+}
+
+static void
+indent(int depth)
+{
+ for(int i = 0; i < depth; i++)
+ print(" ");
+}
+
+static void
+printchild(char *desc, Ast *ast, int depth)
+{
+ indent(depth);
+ print("%s:\n", desc);
+ indent(depth+1);
+ debugast(ast, depth+1);
+}
+
+void
+debugast(Ast *ast, int depth)
+{
+ if(ast == nil){
+ print("<nil>\n");
+ return;
+ }
+
+ int printchildren = 0;
+
+ switch(ast->tag){
+ case AstProg:
+ print("prog\n");
+ printchildren = 1;
+ break;
+ case AstFunc:
+ print("func\n");
+ depth++;
+ printchild("name", ast->funcname, depth);
+ printchild("result", ast->funcresult, depth);
+ printchild("left arg", ast->funcleftarg, depth);
+ printchild("right arg", ast->funcrightarg, depth);
+ printchild("local vars", ast->funclocals, depth);
+ indent(depth); print("body\n");
+ printchildren = 1;
+ break;
+ case AstName:
+ print("\"%s\"\n", ast->name);
+ break;
+ case AstLocals:
+ print("locals\n");
+ printchildren = 1;
+ break;
+ case AstAssign:
+ print("assign\n");
+ printchild("name", ast->left, depth+1);
+ printchild("value", ast->right, depth+1);
+ break;
+ case AstMonadic:
+ print("monadic call\n");
+ printchild("func", ast->func, depth+1);
+ printchild("right", ast->right, depth+1);
+ break;
+ case AstDyadic:
+ print("dyadic call\n");
+ printchild("func", ast->func, depth+1);
+ printchild("left", ast->left, depth+1);
+ printchild("right", ast->right, depth+1);
+ break;
+ case AstConst:
+ print("const:\n");
+ indent(depth+1);
+ print("%s\n", printarray(ast->val));
+ break;
+ case AstPrim:
+ print("prim: %d\n", ast->prim);
+ break;
+ default:
+ print("<ast node %d>\n", ast->tag);
+ break;
+ }
+
+ if(printchildren){
+ for(uvlong i = 0; i < ast->childcount; i++){
+ indent(depth+1);
+ debugast(ast->children[i], depth+1);
+ }
+ }
}
\ No newline at end of file
--- a/value.c
+++ b/value.c
@@ -18,17 +18,36 @@
void *
parseval(char *buf, char **errp)
{
+ Ast *ast;
void *val = nil;
+
TokenList *tokens = scan(buf, errp);
- if(tokens != nil){
- /* Parse the tokens as a constant. TODO: Support function definitions as well... */
- val = parseaplan(tokens, errp);
+ if(tokens == nil)
+ goto end;
+ ast = parse(tokens, errp);
+ if(ast == nil)
+ goto end;
+
+ /* Check that the ast is either a single function definition,
+ * or a single constant.
+ */
+ if(!(ast->tag == AstProg && ast->childcount == 1)){
+ *errp = "Expected single value or function definition";
+ goto end;
}
+
+ ast = ast->children[0];
+ switch(ast->tag){
+ case AstConst:
+ val = ast->val;
+ break;
+ case AstFunc:
+ *errp = "Functions not supported yet";
+ break;
+ default:
+ *errp = "Expected constant or function definition";
+ goto end;
+ }
+end:
return val;
}
-
-void *
-init_quadio(void)
-{
- return nil;
-}
\ No newline at end of file