ref: a40a18fc0181b2b0c59a20dcd89bee7b38d63e40
dir: /parser.c/
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dat.h"
#include "fns.h"
#define PrecedenceLevels 1200
typedef struct Token Token;
typedef struct Operator Operator;
typedef struct OpInfo OpInfo;
struct Token
{
int tag;
Rune *text;
double dval;
vlong ival;
};
struct Operator
{
int type;
int level;
Rune *spelling;
Operator *next;
};
struct OpInfo
{
int level;
int type;
};
enum {
Xf = 1<<0, /* 1 */
Yf = 1<<1, /* 2 */
Xfx = 1<<2, /* 4 */
Xfy = 1<<3, /* 8 */
Yfx = 1<<4, /* 16 */
Fy = 1<<5, /* 32 */
Fx = 1<<6, /* 64 */
};
enum {
AtomTok = 1<<0, /* 1 */
FunctorTok = 1<<1, /* 2 */
VarTok = 1<<2, /* 4 */
StringTok = 1<<3, /* 8 */
IntTok = 1<<4, /* 16 */
FloatTok = 1<<5, /* 32 */
ParenLeftTok = 1<<6, /* 64 */
ParenRightTok = 1<<7, /* 128 */
SquareBracketLeftTok = 1<<8, /* 256 */
SquareBracketRightTok = 1<<9, /* 512 */
CurlyBracketLeftTok = 1<<10, /* 1024 */
CurlyBracketRightTok = 1<<11, /* 2048 */
CommaTok = 1<<12, /* 4096 */
PipeTok = 1<<13, /* 8192 */
EofTok = 1<<14, /* 16384 */
};
static Biobuf *parsein;
static Token lookahead;
static Operator *operators[PrecedenceLevels];
void initoperators(void);
void addoperator(int, int, Rune *);
Operator *getoperator(Rune *);
void nexttoken(void);
Term *fullterm(int, Rune *, Term *);
Term *term(void);
Term *listterm(int);
Term *curlybracketterm(void);
Term *compound(void);
Term *parseoperators(Term *);
void match(int);
void syntaxerror_parser(char *);
Term *prologtext(int);
Term *
parse(int fd, Biobuf *bio, int querymode)
{
if(bio == nil){
fd = dup(fd, -1);
parsein = Bfdopen(fd, OREAD);
if(parsein == nil){
print("Could not open file\n");
return nil;
}
}else
parsein = bio;
initoperators();
nexttoken();
Term *result = prologtext(querymode);
if(querymode){
uvlong id = 1;
result = copyterm(result, &id);
}
if(!bio)
Bterm(parsein);
return result;
}
Term *
prologtext(int querymode)
{
if(lookahead.tag == EofTok)
return nil;
Term *t = fullterm(AtomTok, L".", nil);
if(lookahead.tag == AtomTok && runestrcmp(lookahead.text, L".") == 0){
if(!querymode)
match(AtomTok);
}else
syntaxerror_parser("prologtext");
if(querymode)
return t;
if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 1){
Term *body = t->children;
print("Got directive: %S\n", prettyprint(body, 0, 0, 0));
if(runestrcmp(body->text, L"module") == 0 && body->arity == 2)
t->next = prologtext(querymode);
else
t = prologtext(querymode);
}else if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 2){
t->next = prologtext(querymode);
}else if(t->tag == AtomTerm || t->tag == CompoundTerm){
t->next = prologtext(querymode);
}else{
print("Expected directive or clause as toplevel\n");
syntaxerror_parser("prologtext");
}
return t;
}
Term *
fullterm(int stoptag, Rune *stoptext, Term *list)
{
if((lookahead.tag & stoptag) && (stoptext == nil || runestrcmp(stoptext, lookahead.text) == 0))
return parseoperators(list);
Term *t = term();
list = appendterm(list, t);
return fullterm(stoptag, stoptext, list);
}
Term *
term(void)
{
Term *result;
switch(lookahead.tag){
case AtomTok:
result = mkatom(lookahead.text);
match(AtomTok);
break;
case FunctorTok:
result = compound();
break;
case VarTok:
result = mkvariable(lookahead.text);
match(VarTok);
break;
case IntTok:
result = mkinteger(lookahead.ival);
match(IntTok);
break;
case FloatTok:
result = mkfloat(lookahead.dval);
match(FloatTok);
break;
case CommaTok:
result = mkatom(L",");
match(CommaTok);
break;
case SquareBracketLeftTok:
result = listterm(1);
break;
case CurlyBracketLeftTok:
result = curlybracketterm();
break;
case ParenLeftTok:
match(ParenLeftTok);
result = fullterm(ParenRightTok, nil, nil);
match(ParenRightTok);
break;
case StringTok:
result = mkstring(lookahead.text);
match(StringTok);
break;
default:
print("Can't parse term of token type %d\n", lookahead.tag);
syntaxerror_parser("term");
result = nil;
}
return result;
}
Term *
listterm(int start)
{
if(start)
match(SquareBracketLeftTok);
if(lookahead.tag == SquareBracketRightTok){
match(SquareBracketRightTok);
return mkatom(L"[]");
}else if(lookahead.tag == PipeTok){
match(PipeTok);
Term *t = fullterm(SquareBracketRightTok, nil, nil);
match(SquareBracketRightTok);
return t;
}else{
Term *t = fullterm(CommaTok|PipeTok|SquareBracketRightTok, nil, nil);
if(lookahead.tag == CommaTok)
match(CommaTok);
t->next = listterm(0);
return mkcompound(L".", 2, t);
}
}
Term *
curlybracketterm(void)
{
match(CurlyBracketLeftTok);
Term *result = fullterm(CurlyBracketRightTok, nil, nil);
match(CurlyBracketRightTok);
return mkcompound(L"{}", 1, result);
}
Term *
compound(void)
{
Rune *name = lookahead.text;
Term *args = nil;
int arity = 0;
match(FunctorTok);
match(ParenLeftTok);
while(lookahead.tag != ParenRightTok){
Term *t = fullterm(CommaTok|ParenRightTok, nil, nil);
if(lookahead.tag == CommaTok)
match(CommaTok);
args = appendterm(args, t);
arity++;
}
match(ParenRightTok);
return mkcompound(name, arity, args);
}
Term *
parseoperators(Term *list)
{
int i;
int length = termslength(list);
Term *t;
Term **terms = gmalloc(sizeof(Term *) * length);
OpInfo *infos = gmalloc(sizeof(OpInfo) * length);
for(i = 0, t = list; i < length; i++){
Operator *op = getoperator(t->text);
if(op && t->tag == AtomTerm){
infos[i].type = op->type;
infos[i].level = op->level;
}else{
infos[i].type = 0;
infos[i].level = 0;
}
terms[i] = t;
Term *tmp = t;
t = t->next;
tmp->next = nil;
}
while(length > 1){
int index = -1;
int lowest = PrecedenceLevels + 1;
for(i = 0; i < length; i++){
if(infos[i].type == 0)
continue;
if(infos[i].level == lowest && infos[i].type == Xfy)
index = i;
else if(infos[i].level < lowest){
index = i;
lowest = infos[i].level;
}
}
if(index == -1){
print("Can't parse, list of length %d contains no operators: ", length);
for(i = 0; i < length; i++)
print("%S(%d) ", prettyprint(terms[i], 0, 0, 0), infos[i].level);
print("\n");
syntaxerror_parser("parseoperators");
}
int infixlevel = infos[index].type & (Xfx|Xfy|Yfx);
int prefixlevel = infos[index].type & (Fx|Fy);
int postfixlevel = infos[index].type & (Xf|Yf);
if(infixlevel && index != 0 && index != length-1 && infos[index-1].type == 0 && infos[index-1].type == 0){
infos[index-1].type = 0;
infos[index-1].level = infos[index].level;
Term *args = terms[index-1];
args->next = terms[index+1];
terms[index-1] = mkcompound(terms[index]->text, 2, args);
length -= 2;
for(i = index; i < length; i++){
infos[i].level = infos[i+2].level;
infos[i].type = infos[i+2].type;
terms[i] = terms[i+2];
}
}else if(prefixlevel && index != length-1 && infos[index+1].type == 0){
infos[index].type = 0;
terms[index] = mkcompound(terms[index]->text, 1, terms[index+1]);
length--;
for(i = index+1; i < length; i++){
infos[i].level = infos[i+1].level;
infos[i].type = infos[i+1].type;
terms[i] = terms[i+1];
}
}else if(postfixlevel && index != 0 && infos[index-1].type == 0){
infos[index-1].type = 0;
infos[index-1].level = infos[index].level;
terms[index-1] = mkcompound(terms[index]->text, 1, terms[index-1]);
length--;
for(i = index; i < length; i++){
infos[i].level = infos[i+1].level;
infos[i].type = infos[i+1].type;
terms[i] = terms[i+1];
}
}else{
print("Parse error when parsing operator %S (prefix=%d, postfix=%d, infix=%d level=%d)\n", prettyprint(terms[index], 0, 0, 0), prefixlevel, postfixlevel, infixlevel, infos[index].level);
syntaxerror_parser("parseoperators");
}
}
Term *result = terms[0];
return result;
}
void
initoperators(void)
{
Operator *op;
int i;
for(i = 0; i < PrecedenceLevels; i++){
while(operators[i]){
op = operators[i];
operators[i] = op->next;
free(op);
}
}
addoperator(1200, Xfx, L":-");
addoperator(1200, Xfx, L"-->");
addoperator(1200, Fx, L":-");
addoperator(1200, Fx, L"?-");
addoperator(1100, Xfy, L";");
addoperator(1050, Xfy, L"->");
addoperator(1000, Xfy, L",");
addoperator(900, Fy, L"\\+");
addoperator(700, Xfx, L"=");
addoperator(700, Xfx, L"\\=");
addoperator(700, Xfx, L"==");
addoperator(700, Xfx, L"\\==");
addoperator(700, Xfx, L"@<");
addoperator(700, Xfx, L"@=<");
addoperator(700, Xfx, L"@>");
addoperator(700, Xfx, L"@>=");
addoperator(700, Xfx, L"is");
addoperator(700, Xfx, L"=:=");
addoperator(700, Xfx, L"=\\=");
addoperator(700, Xfx, L"<");
addoperator(700, Xfx, L"=<");
addoperator(700, Xfx, L">");
addoperator(700, Xfx, L">=");
addoperator(700, Xfx, L"=..");
addoperator(600, Xfy, L":");
addoperator(500, Yfx, L"+");
addoperator(500, Yfx, L"-");
addoperator(500, Yfx, L"/\\");
addoperator(500, Yfx, L"\\/");
addoperator(400, Yfx, L"*");
addoperator(400, Yfx, L"/");
addoperator(400, Yfx, L"//");
addoperator(400, Yfx, L"rem");
addoperator(400, Yfx, L"mod");
addoperator(400, Yfx, L"<<");
addoperator(400, Yfx, L">>");
addoperator(200, Xfx, L"**");
addoperator(200, Xfy, L"^");
addoperator(200, Fy, L"-");
addoperator(200, Fy, L"\\");
}
void
addoperator(int level, int type, Rune *spelling)
{
/* the operator table is never garbage collected, so just use normal malloc */
Operator *op = malloc(sizeof(Operator));
op->type = type;
op->level = level;
op->spelling = spelling;
op->next = operators[level-1];
operators[level-1] = op;
}
Operator *
getoperator(Rune *spelling)
{
Operator *op = nil;
int level;
if(spelling == nil)
return nil;
for(level = 0; level < PrecedenceLevels && op == nil; level++){
Operator *tmp;
for(tmp = operators[level]; tmp != nil; tmp = tmp->next){
if(runestrcmp(tmp->spelling, spelling) == 0){
if(op == nil){
op = gmalloc(sizeof(Operator));
memcpy(op, tmp, sizeof(Operator));
}else
op->type |= tmp->type;
}
}
}
return op;
}
void
nexttoken(void)
{
Rune buf[1024];
Rune peek;
int i = 0;
double numD;
vlong numI = 0;
int negative = 0;
static long replaypeek = -1;
if(replaypeek == -1)
peek = Bgetrune(parsein);
else{
peek = replaypeek;
replaypeek = -1;
}
SkipWhite:
/* Skip whitespace */
while(isspacerune(peek))
peek = Bgetrune(parsein);
/* Skip single line comments */
if(peek == L'%'){
while(peek != L'\n')
peek = Bgetrune(parsein);
goto SkipWhite;
}
/* Variables */
if(peek == L'_' || isupperrune(peek)){
while(peek == L'_' || isalpharune(peek) || isdigitrune(peek)){
/* FIXME Should handle input length, also elsewhere in this function */
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
Bungetrune(parsein);
lookahead.tag = VarTok;
lookahead.text = runestrdup(buf);
return;
}
/* Strings */
if(peek == L'"'){
peek = Bgetrune(parsein);
while(peek != L'"'){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
lookahead.tag = StringTok;
lookahead.text = runestrdup(buf);
return;
}
/* Numbers */
if(peek == L'-'){
peek = Bgetrune(parsein);
if(isdigitrune(peek))
negative = 1;
else{
Bungetrune(parsein);
peek = L'-';
}
}
if(isdigitrune(peek)){
if(peek == L'0'){
peek = Bgetrune(parsein);
switch(peek){
case L'o': /* Octal */
case L'x': /* Hexadecimal */
case L'b': /* Binary */
print("Cant lex numbers in other bases than base 10 right now\n");
exits("lexer");
}
}
while(isdigitrune(peek)){
numI *= 10;
numI += peek - L'0';
peek = Bgetrune(parsein);
}
if(peek == L'.'){
int place = 1;
numD = numI;
peek = Bgetrune(parsein);
if(!isdigitrune(peek)){
Bungetrune(parsein);
replaypeek = L'.';
goto Integer;
}
while(isdigitrune(peek)){
double addition = (peek - L'0') / (double)(10 * place);
numD += addition;
peek = Bgetrune(parsein);
place *= 10;
}
Bungetrune(parsein);
/* Should also lex 123.45E10 */
lookahead.tag = FloatTok;
lookahead.dval = negative ? -numD : numD;
}else{
Bungetrune(parsein);
Integer:
lookahead.tag = IntTok;
lookahead.ival = negative ? -numI : numI;
}
return;
}
/* Atoms */
/* Atoms in single quotes */
if(peek == L'\''){
peek = Bgetrune(parsein);
while(peek != L'\''){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
lookahead.tag = AtomTok;
lookahead.text = runestrdup(buf);
goto Functor;
}
/* Special atom [] */
if(peek == L'['){
peek = Bgetrune(parsein);
if(peek == L']'){
lookahead.tag = AtomTok;
lookahead.text = runestrdup(L"[]");
goto Functor;
}else{
Bungetrune(parsein);
lookahead.tag = SquareBracketLeftTok;
return;
}
}
/* Special atom {} */
if(peek == L'{'){
peek = Bgetrune(parsein);
if(peek == L'}'){
lookahead.tag = AtomTok;
lookahead.text = runestrdup(L"{}");
goto Functor;
}else{
Bungetrune(parsein);
lookahead.tag = CurlyBracketLeftTok;
return;
}
}
/* Graphic atom */
Rune *graphics = L"#$&*+-./:<=>?@^~\\";
if(runestrchr(graphics, peek)){
while(runestrchr(graphics, peek)){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
Bungetrune(parsein);
lookahead.tag = AtomTok;
lookahead.text = runestrdup(buf);
goto Functor;
}
/* simple atom */
if(islowerrune(peek)){
while(isalpharune(peek) || isdigitrune(peek) || peek == L'_'){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
Bungetrune(parsein);
lookahead.tag = AtomTok;
lookahead.text = runestrdup(buf);
goto Functor;
}
/* Other */
if(runestrchr(L",.()]}|!;", peek)){
switch(peek){
case L',': lookahead.tag = CommaTok; break;
case L'(': lookahead.tag = ParenLeftTok; break;
case L')': lookahead.tag = ParenRightTok; break;
case L']': lookahead.tag = SquareBracketRightTok; break;
case L'}': lookahead.tag = CurlyBracketRightTok; break;
case L'|': lookahead.tag = PipeTok; break;
case L'!': lookahead.tag = AtomTok; lookahead.text = runestrdup(L"!"); break;
case L';': lookahead.tag = AtomTok; lookahead.text = runestrdup(L";"); break;
}
return;
}
/* EOF */
if(peek == Beof){
lookahead.tag = EofTok;
return;
}
print("Could not lex rune %C (%ud)\n", peek, peek);
exits("lexer");
Functor:
if(peek == Beof){
Bgetrune(parsein);
return;
}
peek = Bgetrune(parsein);
if(peek == L'(')
lookahead.tag = FunctorTok;
Bungetrune(parsein);
return;
}
void
match(int tag)
{
if(lookahead.tag == tag)
nexttoken();
else
syntaxerror_parser("match");
}
void
syntaxerror_parser(char *where)
{
print("Syntax error: Unexpected %d (%S) token in %s\n", lookahead.tag, lookahead.text, where);
exits("syntax error");
}