ref: 77c7fea4cee74562ad60c7ce96ca830a7ebda8b7
dir: /parser.c/
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dat.h"
#include "fns.h"
typedef struct Token Token;
typedef struct OpInfo OpInfo;
struct Token
{
int tag;
Rune *text;
double dval;
vlong ival;
};
struct OpInfo
{
int level;
int type;
};
enum {
AtomTok = 1<<0, /* 1 */
FunctorTok = 1<<1, /* 2 */
VarTok = 1<<2, /* 4 */
StringTok = 1<<3, /* 8 */
IntTok = 1<<4, /* 16 */
FloatTok = 1<<5, /* 32 */
ParenLeftTok = 1<<6, /* 64 */
ParenRightTok = 1<<7, /* 128 */
SquareBracketLeftTok = 1<<8, /* 256 */
SquareBracketRightTok = 1<<9, /* 512 */
CurlyBracketLeftTok = 1<<10, /* 1024 */
CurlyBracketRightTok = 1<<11, /* 2048 */
CommaTok = 1<<12, /* 4096 */
PipeTok = 1<<13, /* 8192 */
EofTok = 1<<14, /* 16384 */
};
static Biobuf *parsein;
static Token lookahead;
static Module *currentmod;
void nexttoken(void);
Term *fullterm(int, Rune *, Term *);
Term *term(void);
Term *listterm(int);
Term *curlybracketterm(void);
Term *compound(void);
Term *parseoperators(Term *);
void match(int);
void syntaxerror_parser(char *);
Term *prologtext(int);
void handlemoduledirective(Term *);
void handleopdirective(Term *);
Term *
parse(int fd, Biobuf *bio, int querymode)
{
if(bio == nil){
fd = dup(fd, -1);
parsein = Bfdopen(fd, OREAD);
if(parsein == nil){
print("Could not open file\n");
return nil;
}
}else
parsein = bio;
nexttoken();
currentmod = usermodule;
Term *result = prologtext(querymode);
if(querymode && result){
result = copyterm(result, &clausenr);
clausenr++;
}
if(!bio)
Bterm(parsein);
return result;
}
Term *
prologtext(int querymode)
{
if(lookahead.tag == EofTok)
return nil;
Term *t = fullterm(AtomTok, L".", nil);
if(lookahead.tag == AtomTok && runestrcmp(lookahead.text, L".") == 0){
if(!querymode)
match(AtomTok);
}else
syntaxerror_parser("prologtext");
if(querymode)
return t;
if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 1){
Term *body = t->children;
if(runestrcmp(body->text, L"module") == 0 && body->arity == 2){
handlemoduledirective(body->children);
t->next = prologtext(querymode);
return t;
}
else if(runestrcmp(body->text, L"op") == 0 && body->arity == 3)
handleopdirective(body->children);
t = prologtext(querymode);
}else if(t->tag == CompoundTerm && runestrcmp(t->text, L":-") == 0 && t->arity == 2){
t->next = prologtext(querymode);
}else if(t->tag == AtomTerm || t->tag == CompoundTerm){
t->next = prologtext(querymode);
}else{
print("Expected directive or clause as toplevel\n");
syntaxerror_parser("prologtext");
}
return t;
}
Term *
fullterm(int stoptag, Rune *stoptext, Term *list)
{
if((lookahead.tag & stoptag) && (stoptext == nil || runestrcmp(stoptext, lookahead.text) == 0))
return parseoperators(list);
Term *t = term();
list = appendterm(list, t);
return fullterm(stoptag, stoptext, list);
}
Term *
term(void)
{
Term *result;
switch(lookahead.tag){
case AtomTok:
result = mkatom(lookahead.text);
match(AtomTok);
break;
case FunctorTok:
result = compound();
break;
case VarTok:
result = mkvariable(lookahead.text);
match(VarTok);
break;
case IntTok:
result = mkinteger(lookahead.ival);
match(IntTok);
break;
case FloatTok:
result = mkfloat(lookahead.dval);
match(FloatTok);
break;
case CommaTok:
result = mkatom(L",");
match(CommaTok);
break;
case SquareBracketLeftTok:
result = listterm(1);
break;
case CurlyBracketLeftTok:
result = curlybracketterm();
break;
case ParenLeftTok:
match(ParenLeftTok);
result = fullterm(ParenRightTok, nil, nil);
result->inparens = 1;
match(ParenRightTok);
break;
case StringTok:
result = mkstring(lookahead.text);
match(StringTok);
break;
default:
print("Can't parse term of token type %d\n", lookahead.tag);
syntaxerror_parser("term");
result = nil;
}
return result;
}
Term *
listterm(int start)
{
if(start)
match(SquareBracketLeftTok);
if(lookahead.tag == SquareBracketRightTok){
match(SquareBracketRightTok);
return mkatom(L"[]");
}else if(lookahead.tag == PipeTok){
match(PipeTok);
Term *t = fullterm(SquareBracketRightTok, nil, nil);
match(SquareBracketRightTok);
return t;
}else{
Term *t = fullterm(CommaTok|PipeTok|SquareBracketRightTok, nil, nil);
if(lookahead.tag == CommaTok)
match(CommaTok);
t->next = listterm(0);
return mkcompound(L".", 2, t);
}
}
Term *
curlybracketterm(void)
{
match(CurlyBracketLeftTok);
Term *result = fullterm(CurlyBracketRightTok, nil, nil);
match(CurlyBracketRightTok);
return mkcompound(L"{}", 1, result);
}
Term *
compound(void)
{
Rune *name = lookahead.text;
Term *args = nil;
int arity = 0;
match(FunctorTok);
match(ParenLeftTok);
while(lookahead.tag != ParenRightTok){
Term *t = fullterm(CommaTok|ParenRightTok, nil, nil);
if(lookahead.tag == CommaTok)
match(CommaTok);
args = appendterm(args, t);
arity++;
}
match(ParenRightTok);
return mkcompound(name, arity, args);
}
Term *
parseoperators(Term *list)
{
int i;
int length = termslength(list);
Term *t;
Term **terms = gmalloc(sizeof(Term *) * length);
OpInfo *infos = gmalloc(sizeof(OpInfo) * length);
for(i = 0, t = list; i < length; i++){
Operator *op = getoperator(t->text, currentmod);
if(op && t->tag == AtomTerm && !t->inparens){
infos[i].type = op->type;
infos[i].level = op->level;
}else{
infos[i].type = 0;
infos[i].level = 0;
}
terms[i] = t;
Term *tmp = t;
t = t->next;
tmp->next = nil;
}
while(length > 1){
int index = -1;
int lowest = PrecedenceLevels + 1;
for(i = 0; i < length; i++){
if(infos[i].type == 0)
continue;
if(infos[i].level == lowest && infos[i].type == Xfy)
index = i;
else if(infos[i].level < lowest){
index = i;
lowest = infos[i].level;
}
}
if(index == -1){
print("Can't parse, list of length %d contains no operators: ", length);
for(i = 0; i < length; i++)
print("%S(%d) ", prettyprint(terms[i], 0, 1, 0, currentmod), infos[i].level);
print("\n");
syntaxerror_parser("parseoperators");
}
int infixlevel = infos[index].type & (Xfx|Xfy|Yfx);
int prefixlevel = infos[index].type & (Fx|Fy);
int postfixlevel = infos[index].type & (Xf|Yf);
if(infixlevel && index != 0 && index != length-1 && infos[index-1].type == 0 && infos[index-1].type == 0){
infos[index-1].type = 0;
infos[index-1].level = infos[index].level;
Term *args = terms[index-1];
args->next = terms[index+1];
terms[index-1] = mkcompound(terms[index]->text, 2, args);
length -= 2;
for(i = index; i < length; i++){
infos[i].level = infos[i+2].level;
infos[i].type = infos[i+2].type;
terms[i] = terms[i+2];
}
}else if(prefixlevel && index != length-1 && infos[index+1].type == 0){
infos[index].type = 0;
terms[index] = mkcompound(terms[index]->text, 1, terms[index+1]);
length--;
for(i = index+1; i < length; i++){
infos[i].level = infos[i+1].level;
infos[i].type = infos[i+1].type;
terms[i] = terms[i+1];
}
}else if(postfixlevel && index != 0 && infos[index-1].type == 0){
infos[index-1].type = 0;
infos[index-1].level = infos[index].level;
terms[index-1] = mkcompound(terms[index]->text, 1, terms[index-1]);
length--;
for(i = index; i < length; i++){
infos[i].level = infos[i+1].level;
infos[i].type = infos[i+1].type;
terms[i] = terms[i+1];
}
}else{
print("Parse error when parsing operator %S (prefix=%d, postfix=%d, infix=%d level=%d)\n", prettyprint(terms[index], 0, 0, 0, currentmod), prefixlevel, postfixlevel, infixlevel, infos[index].level);
syntaxerror_parser("parseoperators");
}
}
Term *result = terms[0];
return result;
}
void
nexttoken(void)
{
Rune buf[1024];
Rune peek;
int i = 0;
double numD;
vlong numI = 0;
int negative = 0;
static long replaypeek = -1;
if(replaypeek == -1)
peek = Bgetrune(parsein);
else{
peek = replaypeek;
replaypeek = -1;
}
SkipWhite:
/* Skip whitespace */
while(isspacerune(peek))
peek = Bgetrune(parsein);
/* Skip single line comments */
if(peek == L'%'){
while(peek != L'\n')
peek = Bgetrune(parsein);
goto SkipWhite;
}
/* Variables */
if(peek == L'_' || isupperrune(peek)){
while(peek == L'_' || isalpharune(peek) || isdigitrune(peek)){
/* FIXME Should handle input length, also elsewhere in this function */
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
Bungetrune(parsein);
lookahead.tag = VarTok;
lookahead.text = runestrdup(buf);
return;
}
/* Strings */
if(peek == L'"'){
peek = Bgetrune(parsein);
while(peek != L'"'){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
lookahead.tag = StringTok;
lookahead.text = runestrdup(buf);
return;
}
/* Numbers */
if(peek == L'-'){
peek = Bgetrune(parsein);
if(isdigitrune(peek))
negative = 1;
else{
Bungetrune(parsein);
peek = L'-';
}
}
if(isdigitrune(peek)){
if(peek == L'0'){
peek = Bgetrune(parsein);
switch(peek){
case L'\'': /* Character code */
peek = Bgetrune(parsein);
lookahead.tag = IntTok;
lookahead.ival = peek;
return;
case L'o': /* Octal */
case L'x': /* Hexadecimal */
case L'b': /* Binary */
print("Cant lex numbers in other bases than base 10 right now\n");
exits("lexer");
}
}
while(isdigitrune(peek)){
numI *= 10;
numI += peek - L'0';
peek = Bgetrune(parsein);
}
if(peek == L'.'){
int place = 1;
numD = numI;
peek = Bgetrune(parsein);
if(!isdigitrune(peek)){
Bungetrune(parsein);
replaypeek = L'.';
goto Integer;
}
while(isdigitrune(peek)){
double addition = (peek - L'0') / (double)(10 * place);
numD += addition;
peek = Bgetrune(parsein);
place *= 10;
}
Bungetrune(parsein);
/* Should also lex 123.45E10 */
lookahead.tag = FloatTok;
lookahead.dval = negative ? -numD : numD;
}else{
Bungetrune(parsein);
Integer:
lookahead.tag = IntTok;
lookahead.ival = negative ? -numI : numI;
}
return;
}
/* Atoms */
/* Atoms in single quotes */
if(peek == L'\''){
peek = Bgetrune(parsein);
while(peek != L'\''){
QuotedAtomLoop:
buf[i++] = peek;
peek = Bgetrune(parsein);
}
peek = Bgetrune(parsein);
if(peek == L'\'')
goto QuotedAtomLoop;
else
Bungetrune(parsein);
buf[i] = '\0';
lookahead.tag = AtomTok;
lookahead.text = runestrdup(buf);
goto Functor;
}
/* Special atom [] */
if(peek == L'['){
peek = Bgetrune(parsein);
if(peek == L']'){
lookahead.tag = AtomTok;
lookahead.text = runestrdup(L"[]");
goto Functor;
}else{
Bungetrune(parsein);
lookahead.tag = SquareBracketLeftTok;
return;
}
}
/* Special atom {} */
if(peek == L'{'){
peek = Bgetrune(parsein);
if(peek == L'}'){
lookahead.tag = AtomTok;
lookahead.text = runestrdup(L"{}");
goto Functor;
}else{
Bungetrune(parsein);
lookahead.tag = CurlyBracketLeftTok;
return;
}
}
/* Graphic atom */
Rune *graphics = L"#$&*+-./:<=>?@^~\\"; /* keep in sync with prettyprint*/
if(runestrchr(graphics, peek)){
while(runestrchr(graphics, peek)){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
Bungetrune(parsein);
lookahead.tag = AtomTok;
lookahead.text = runestrdup(buf);
goto Functor;
}
/* simple atom */
if(islowerrune(peek)){
while(isalpharune(peek) || isdigitrune(peek) || peek == L'_'){
buf[i++] = peek;
peek = Bgetrune(parsein);
}
buf[i] = '\0';
Bungetrune(parsein);
lookahead.tag = AtomTok;
lookahead.text = runestrdup(buf);
goto Functor;
}
/* Other */
if(runestrchr(L",.()]}|!;", peek)){
switch(peek){
case L',': lookahead.tag = CommaTok; break;
case L'(': lookahead.tag = ParenLeftTok; break;
case L')': lookahead.tag = ParenRightTok; break;
case L']': lookahead.tag = SquareBracketRightTok; break;
case L'}': lookahead.tag = CurlyBracketRightTok; break;
case L'|': lookahead.tag = PipeTok; break;
case L'!': lookahead.tag = AtomTok; lookahead.text = runestrdup(L"!"); break;
case L';': lookahead.tag = AtomTok; lookahead.text = runestrdup(L";"); break;
}
return;
}
/* EOF */
if(peek == Beof){
lookahead.tag = EofTok;
return;
}
print("Could not lex rune %C (%ud)\n", peek, peek);
exits("lexer");
Functor:
if(peek == Beof){
Bgetrune(parsein);
return;
}
peek = Bgetrune(parsein);
if(peek == L'(')
lookahead.tag = FunctorTok;
Bungetrune(parsein);
return;
}
void
match(int tag)
{
if(lookahead.tag == tag)
nexttoken();
else
syntaxerror_parser("match");
}
void
syntaxerror_parser(char *where)
{
print("Syntax error: Unexpected %d (%S) token in %s\n", lookahead.tag, lookahead.text, where);
exits("syntax error");
}
void
handlemoduledirective(Term *args)
{
Term *modulename = args;
Term *publiclist = modulename->next;
USED(publiclist);
if(modulename->tag != AtomTerm){
print("Module name should be an atom in: %S\n", prettyprint(modulename, 0, 0, 0, currentmod));
return;
}
currentmod = addemptymodule(modulename->text);
}
void
handleopdirective(Term *args)
{
Term *levelt = args;
Term *typet = levelt->next;
Term *opt = typet->next;
if(levelt->tag == IntegerTerm
&& levelt->ival >= 0
&& levelt->ival <= PrecedenceLevels
&& typet->tag == AtomTerm
&& opt->tag == AtomTerm){
int level = levelt->ival;
Rune *spelling = opt->text;
int type = 0;
if(runestrcmp(typet->text, L"xf") == 0)
type = Xf;
else if(runestrcmp(typet->text, L"yf") == 0)
type = Yf;
else if(runestrcmp(typet->text, L"xfx") == 0)
type = Xfx;
else if(runestrcmp(typet->text, L"xfy") == 0)
type = Xfy;
else if(runestrcmp(typet->text, L"yfx") == 0)
type = Yfx;
else if(runestrcmp(typet->text, L"fy") == 0)
type = Fy;
else if(runestrcmp(typet->text, L"fx") == 0)
type = Fx;
if(type != 0){
addoperator(level, type, spelling, currentmod);
return;
}
}
print("Malformed op directive with level=%S, type=%S, op=%S\n",
prettyprint(levelt, 0, 0, 0, currentmod),
prettyprint(typet, 0, 0, 0, currentmod),
prettyprint(opt, 0, 0, 0, currentmod));
}