ref: bd6c2aad586814b091ce5aca9d41cf2c51adb37b
dir: /utils/rcsh/lex.c/
#include "rc.h" #include "y.tab.h" #define NTOK 8192 int getnext(void); int future=EOF; int doprompt=1; int inquote; int nerror; char *promptstr; char tok[NTOK]; int lastdol; /* was the last token read '$' or '$#' or '"'? */ int lastword; /* was the last token read a word or compound word terminator? */ int lastc; void kinit(void) { kenter(FOR, "for"); kenter(IN, "in"); kenter(WHILE, "while"); kenter(IF, "if"); kenter(NOT, "not"); kenter(TWIDDLE, "~"); kenter(BANG, "!"); kenter(SUBSHELL, "@"); kenter(SWITCH, "switch"); kenter(FN, "fn"); } int wordchr(int c) { return !strchr("\n \t\r#;&|^$=`'{}()<>", c) && c!=EOF; } int idchr(int c) { /* * Formerly: * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9' * || c=='_' || c=='*'; */ return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); } /* * Look ahead in the input stream */ int nextc(void) { if(future==EOF) future=getnext(); return future; } /* * Consume the lookahead character. */ int advance(void) { int c=nextc(); lastc=future; future=EOF; return c; } /* * read a character from the input stream */ int getnext(void) { register int c; static peekc=EOF; if(peekc!=EOF){ c=peekc; peekc=EOF; return c; } if(runq->eof) return EOF; if(doprompt) pprompt(); c=rchr(runq->cmdfd); if(!inquote && c=='\\'){ c=rchr(runq->cmdfd); if(c=='\n'){ doprompt=1; c=' '; } else{ peekc=c; c='\\'; } } doprompt=doprompt || c=='\n' || c==EOF; if(c==EOF) runq->eof++; else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); return c; } void pprompt(void) { Var *prompt; if(runq->iflag){ pstr(err, promptstr); flush(err); prompt=vlook("prompt"); if(prompt->val && prompt->val->next) promptstr=prompt->val->next->word; else promptstr="\t"; } runq->lineno++; doprompt=0; } void skipwhite(void) { int c; for(;;){ c=nextc(); if(c=='#'){ /* Why did this used to be if(!inquote && c=='#') ?? */ for(;;){ c=nextc(); if(c=='\n' || c==EOF) break; advance(); } } if(c==' ' || c=='\t' || c=='\r') advance(); else return; } } void skipnl(void) { int c; for(;;){ skipwhite(); c=nextc(); if(c!='\n') return; advance(); } } int nextis(int c) { if(nextc()==c){ advance(); return 1; } return 0; } char * addtok(char *p, int val) { if(p==0) return 0; if(p==&tok[NTOK]){ *p=0; yyerror("token buffer too short"); return 0; } *p++=val; return p; } char * addutf(char *p, int c) { p=addtok(p, c); if(twobyte(c)) /* 2-byte escape */ return addtok(p, advance()); if(threebyte(c)){ /* 3-byte escape */ p=addtok(p, advance()); return addtok(p, advance()); } return p; } int yylex(void) { int c, d=nextc(); char *w=tok; Tree *t; yylval.tree=0; /* * Embarassing sneakiness: if the last token read was a quoted or unquoted * WORD then we alter the meaning of what follows. If the next character * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise, * if the next character is the first character of a simple or compound word, * we insert a `^' before it. */ if(lastword){ lastword=0; if(d=='('){ advance(); strcpy(tok, "( [SUB]"); return SUB; } if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ strcpy(tok, "^"); return '^'; } } inquote=0; skipwhite(); switch(c=advance()){ case EOF: lastdol=0; strcpy(tok, "EOF"); return EOF; case '$': lastdol=1; if(nextis('#')){ strcpy(tok, "$#"); return COUNT; } if(nextis('"')){ strcpy(tok, "$\""); return '"'; } strcpy(tok, "$"); return '$'; case '&': lastdol=0; if(nextis('&')){ skipnl(); strcpy(tok, "&&"); return ANDAND; } strcpy(tok, "&"); return '&'; case '|': lastdol=0; if(nextis(c)){ skipnl(); strcpy(tok, "||"); return OROR; } case '<': case '>': lastdol=0; /* * funny redirection tokens: * redir: arrow | arrow '[' fd ']' * arrow: '<' | '<<' | '>' | '>>' | '|' * fd: digit | digit '=' | digit '=' digit * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' * some possibilities are nonsensical and get a message. */ *w++=c; t=newtree(); switch(c){ case '|': t->type=PIPE; t->fd0=1; t->fd1=0; break; case '>': t->type=REDIR; if(nextis(c)){ t->rtype=APPEND; *w++=c; } else t->rtype=WRITE; t->fd0=1; break; case '<': t->type=REDIR; if(nextis(c)){ t->rtype=HERE; *w++=c; } else t->rtype=READ; t->fd0=0; break; } if(nextis('[')){ *w++='['; c=advance(); *w++=c; if(c<'0' || '9'<c){ RedirErr: *w=0; yyerror(t->type==PIPE?"pipe syntax" :"redirection syntax"); return EOF; } t->fd0=0; do{ t->fd0=t->fd0*10+c-'0'; *w++=c; c=advance(); }while('0'<=c && c<='9'); if(c=='='){ *w++='='; if(t->type==REDIR) t->type=DUP; c=advance(); if('0'<=c && c<='9'){ t->rtype=DUPFD; t->fd1=t->fd0; t->fd0=0; do{ t->fd0=t->fd0*10+c-'0'; *w++=c; c=advance(); }while('0'<=c && c<='9'); } else{ if(t->type==PIPE) goto RedirErr; t->rtype=CLOSE; } } if(c!=']' || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND)) goto RedirErr; *w++=']'; } *w='\0'; yylval.tree=t; if(t->type==PIPE) skipnl(); return t->type; case '\'': lastdol=0; lastword=1; inquote=1; for(;;){ c=advance(); if(c==EOF) break; if(c=='\''){ if(nextc()!='\'') break; advance(); } w=addutf(w, c); } if(w!=0) *w='\0'; t=token(tok, WORD); t->quoted=1; yylval.tree=t; return t->type; } if(!wordchr(c)){ lastdol=0; tok[0]=c; tok[1]='\0'; return c; } for(;;){ /* next line should have (char)c==GLOB, but ken's compiler is broken */ if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) w=addtok(w, GLOB); w=addutf(w, c); c=nextc(); if(lastdol?!idchr(c):!wordchr(c)) break; advance(); } Out: lastword=1; lastdol=0; if(w!=0) *w='\0'; t=klook(tok); if(t->type!=WORD) lastword=0; t->quoted=0; yylval.tree=t; return t->type; } void yyerror(char *m) { pfmt(err, "rc: "); if(runq->cmdfile) pfmt(err, "file %s: ", runq->cmdfile); if(!runq->iflag) pfmt(err, "line %d: ", runq->lineno); if(tok[0] && tok[0]!='\n') pfmt(err, "token %q: ", tok); pfmt(err, "%s\n", m); flush(err); lastword=0; lastdol=0; while(lastc!='\n' && lastc!=EOF) advance(); nerror++; }