ref: ad5a80bfb081dc954be03836cc65090e0f6c7e4f
dir: /appl/svc/httpd/parser.b/
implement Parser;
include "sys.m";
sys: Sys;
include "draw.m";
draw: Draw;
include "bufio.m";
include "string.m";
str: String;
include "daytime.m";
daytime: Daytime;
include "contents.m";
contents : Contents;
Content: import contents;
include "cache.m";
include "httpd.m";
Private_info: import Httpd;
Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax,
BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : import Httpd;
include "parser.m";
include "date.m";
date : Date;
include "alarms.m";
alarms: Alarms;
Alarm: import alarms;
include "lock.m";
locks: Lock;
Semaphore: import locks;
Error: adt {
num : string;
concise: string;
verbose: string;
};
errormsg := array[] of {
Internal => Error("500 Internal Error", "Internal Error",
"This server could not process your request due to an interal error."),
TempFail => Error("500 Internal Error", "Temporary Failure",
"The object %s is currently inaccessible.<p>Please try again later."),
Unimp => Error("501 Not implemented", "Command not implemented",
"This server does not implement the %s command."),
UnkVers => Error("501 Not Implemented", "Unknown http version",
"This server does not know how to respond to http version %s."),
BadCont => Error("501 Not Implemented", "Impossible format",
"This server cannot produce %s in any of the formats your client accepts."),
BadReq => Error("400 Bad Request", "Strange Request",
"Your client sent a query that this server could not understand."),
Syntax => Error("400 Bad Request", "Garbled Syntax",
"Your client sent a query with incoherent syntax."),
BadSearch =>Error("400 Bad Request", "Inapplicable Search",
"Your client sent a search that cannot be applied to %s."),
NotFound =>Error("404 Not Found", "Object not found",
"The object %s does not exist on this server."),
NoSearch => Error("403 Forbidden", "Search not supported",
"The object %s does not support the search command."),
OnlySearch =>Error("403 Forbidden", "Searching Only",
"The object %s only supports the searching methods."),
Unauth => Error("401 Unauthorized", "Unauthorized",
"You are not authorized to see the object %s."),
OK => Error("200 OK", "everything is fine","Groovy man"),
};
badmodule(p: string)
{
sys->fprint(sys->fildes(2), "parse: cannot load %s: %r", p);
raise "fail:bad module";
}
lock: ref Semaphore;
init()
{
sys = load Sys Sys->PATH;
date = load Date Date->PATH;
if (date==nil) badmodule(Date->PATH);
daytime = load Daytime Daytime->PATH;
if(daytime == nil) badmodule(Daytime->PATH);
contents = load Contents Contents->PATH;
if(contents == nil) badmodule(Contents->PATH);
str = load String String->PATH;
if(str == nil) badmodule(String->PATH);
alarms = load Alarms Alarms->PATH;
if(alarms == nil) badmodule(Alarms->PATH);
locks = load Lock Lock->PATH;
if(locks == nil) badmodule(Lock->PATH);
locks->init();
lock = Semaphore.new();
date->init();
}
atexit(g: ref Private_info)
{
if (g.dbg_log!=nil){
sys->fprint(g.dbg_log,"At exit from parse, closing fds. \n");
}
if (g.bin!=nil)
g.bufio->g.bin.close();
if (g.bout!=nil)
g.bufio->g.bout.close();
g.bin=nil;
g.bout=nil;
exit;
}
httpheaders(g: ref Private_info,vers : string)
{
if(vers == "")
return;
g.tok = '\n';
# 15 minutes to get request line
a := Alarm.alarm(15*1000*60);
while(lex(g) != '\n'){
if(g.tok == Word && lex(g) == ':'){
if (g.dbg_log!=nil)
sys->fprint(g.dbg_log,"hitting parsejump. wordval is %s\n",
g.wordval);
parsejump(g,g.wordval);
}
while(g.tok != '\n')
lex(g);
}
a.stop();
}
mimeok(g: ref Private_info,name : string,multipart : int,head : list of ref Content): list of ref Content
{
generic, specific, s : string;
v : real;
while(lex(g) != Word)
if(g.tok != ',')
return head;
generic = g.wordval;
lex(g);
if(g.tok == '/' || multipart){
if(g.tok != '/')
return head;
if(lex(g) != Word)
return head;
specific = g.wordval;
lex(g);
}else
specific = "*";
tmp := contents->mkcontent(generic, specific);
head = tmp::head;
for(;;){
case g.tok {
';' =>
if(lex(g) == Word){
s = g.wordval;
if(lex(g) != '=' || lex(g) != Word)
return head;
v = 3.14; # should be strtof(g.wordval, nil);
if(s=="q")
tmp.q = v;
else
logit(g,sys->sprint(
"unknown %s param: %s %s",
name, s, g.wordval));
}
break;
',' =>
return mimeok(g,name, multipart,head);
* =>
return head;
}
lex(g);
}
return head;
}
mimeaccept(g: ref Private_info,name : string)
{
g.oktype = mimeok(g,name, 1, g.oktype);
}
mimeacceptenc(g: ref Private_info,name : string)
{
g.okencode = mimeok(g,name, 0, g.okencode);
}
mimeacceptlang(g: ref Private_info,name : string)
{
g.oklang = mimeok(g,name, 0, g.oklang);
}
mimemodified(g: ref Private_info,name : string)
{
lexhead(g);
g.modtime = date->date2sec(g.wordval);
if (g.dbg_log!=nil){
sys->fprint(g.dbg_log,"modtime %d\n",g.modtime);
}
if(g.modtime == 0)
logit(g,sys->sprint("%s: %s", name, g.wordval));
}
mimeagent(g: ref Private_info,nil : string)
{
lexhead(g);
g.client = g.wordval;
}
mimefrom(g: ref Private_info,nil : string)
{
lexhead(g);
}
mimehost(g: ref Private_info,nil : string)
{
h : string;
lexhead(g);
(nil,h)=str->splitr(g.wordval," \t");
g.host = h;
}
mimereferer(g: ref Private_info,nil : string)
{
h : string;
lexhead(g);
(nil,h)=str->splitr(g.wordval," \t");
g.referer = h;
}
mimeclength(g: ref Private_info,nil : string)
{
h : string;
lexhead(g);
(nil,h)=str->splitr(g.wordval," \t");
g.clength = int h;
}
mimectype(g: ref Private_info,nil : string)
{
h : string;
lexhead(g);
(nil,h)=str->splitr(g.wordval," \t");
g.ctype = h;
}
mimeignore(g: ref Private_info,nil : string)
{
lexhead(g);
}
mimeunknown(g: ref Private_info,name : string)
{
lexhead(g);
if(g.client!="")
logit(g,sys->sprint("agent %s: ignoring header %s: %s ",
g.client, name, g.wordval));
else
logit(g,sys->sprint("ignoring header %s: %s", name, g.wordval));
}
parsejump(g: ref Private_info,k : string)
{
case k {
"from" =>
mimefrom(g,k);
"if-modified-since" =>
mimemodified(g,k);
"accept" =>
mimeaccept(g,k);
"accept-encoding" =>
mimeacceptenc(g,k);
"accept-language" =>
mimeacceptlang(g,k);
"user-agent" =>
mimeagent(g,k);
"host" =>
mimehost(g,k);
"referer" =>
mimereferer(g,k);
"content-length" =>
mimeclength(g,k);
"content-type" =>
mimectype(g,k);
"authorization" or "chargeto" or "connection" or "forwarded" or
"pragma" or "proxy-agent" or "proxy-connection" or
"x-afs-tokens" or "x-serial-number" =>
mimeignore(g,k);
* =>
mimeunknown(g,k);
};
}
lex(g: ref Private_info): int
{
g.tok = lex1(g);
return g.tok;
}
# rfc 822/rfc 1521 lexical analyzer
lex1(g: ref Private_info): int
{
level, c : int;
if(g.parse_eof)
return '\n';
# top:
for(;;){
c = getc(g);
case c {
'(' =>
level = 1;
while((c = getc(g)) != Bufio->EOF){
if(c == '\\'){
c = getc(g);
if(c == Bufio->EOF)
return '\n';
continue;
}
if(c == '(')
level++;
else if(c == ')' && level == 1){
level--;
break;
}
else if(c == '\n'){
c = getc(g);
if(c == Bufio->EOF)
return '\n';
break;
if(c != ' ' && c != '\t'){
ungetc(g);
return '\n';
}
}
}
' ' or '\t' or '\r' =>
break;
'\n' =>
if(g.tok == '\n'){
g.parse_eof = 1;
return '\n';
}
c = getc(g);
if(c == Bufio->EOF)
return '\n';
if(c != ' ' && c != '\t'){
ungetc(g);
return '\n';
}
')' or '<' or '>' or '[' or ']' or '@' or '/' or ','
or ';' or ':' or '?' or '=' =>
return c;
'"' =>
word(g,"\"");
getc(g); # skip the closing quote
return Word;
* =>
ungetc(g);
word(g,"\"()<>@,;:/[]?=\r\n \t");
return Word;
}
}
return 0;
}
# return the rest of an rfc 822, not including \r or \n
# do not map to lower case
lexhead(g: ref Private_info)
{
c, n: int;
n = 0;
while((c = getc(g)) != Bufio->EOF){
if(c == '\r')
c = wordcr(g);
else if(c == '\n')
c = wordnl(g);
if(c == '\n')
break;
if(c == '\\'){
c = getc(g);
if(c == Bufio->EOF)
break;
}
g.wordval[n++] = c;
}
g.tok = '\n';
g.wordval= g.wordval[0:n];
}
word(g: ref Private_info,stop : string)
{
c : int;
n := 0;
while((c = getc(g)) != Bufio->EOF){
if(c == '\r')
c = wordcr(g);
else if(c == '\n')
c = wordnl(g);
if(c == '\\'){
c = getc(g);
if(c == Bufio->EOF)
break;
}else if(str->in(c,stop)){
ungetc(g);
g.wordval = g.wordval[0:n];
return;
}
if(c >= 'A' && c <= 'Z')
c += 'a' - 'A';
g.wordval[n++] = c;
}
g.wordval = g.wordval[0:n];
# sys->print("returning from word");
}
wordcr(g: ref Private_info): int
{
c := getc(g);
if(c == '\n')
return wordnl(g);
ungetc(g);
return ' ';
}
wordnl(g: ref Private_info): int
{
c := getc(g);
if(c == ' ' || c == '\t')
return c;
ungetc(g);
return '\n';
}
getc(g: ref Private_info): int
{
c := g.bufio->g.bin.getc();
if(c == Bufio->EOF){
g.parse_eof = 1;
return c;
}
return c & 16r7f;
}
ungetc(g: ref Private_info)
{
# this is a dirty hack, I am tacitly assuming that characters read
# from stdin will be ASCII.....
g.bufio->g.bin.ungetc();
}
# go from url with ascii and %xx escapes to unicode, allowing for existing unencoded utf-8
urlunesc(s : string): string
{
a := array[Sys->UTFmax*len s] of byte;
o := 0;
for(i := 0; i < len s; i++){
c := int s[i];
if(c < Runeself){
if(c == '%' && i+2 < len s){
d0 := hex(int s[i+1]);
if(d0 >= 0){
d1 := hex(int s[i+2]);
if(d1 >= 0){
i += 2;
c = d0*16 + d1;
}
}
} else if(c == '+' || c == 0)
c = ' ';
a[o++] = byte c;
}else
o += sys->char2byte(c, a, o);
}
return string a[0: o];
}
hex(c: int): int
{
if(c >= '0' && c <= '9')
return c-'0';
if(c >= 'a' && c <= 'f')
return c-'a' + 10;
if(c >= 'A' && c <= 'F')
return c-'A' + 10;
return -1;
}
# write a failure message to the net and exit
fail(g: ref Private_info,reason : int, message : string)
{
verb : string;
title:=sys->sprint("<head><title>%s</title></head>\n<body bgcolor=#ffffff>\n",
errormsg[reason].concise);
body1:= "<h1> Error </h1>\n<P>" +
"Sorry, Charon is unable to process your request. The webserver reports"+
" the following error <P><b>";
#concise error
body2:="</b><p>for the URL\n<P><b>";
#message
body3:="</b><P>with the following reason:\n<P><b>";
#reason
if (str->in('%',errormsg[reason].verbose)){
(v1,v2):=str->splitl(errormsg[reason].verbose,"%");
verb=v1+message+v2[2:];
}else
verb=errormsg[reason].verbose;
body4:="</b><hr> This Webserver powered by <img src=\"/inferno.gif\">. <P>"+
"For more information click <a href=\"http://inferno.lucent.com\"> here </a>\n"+
"<hr><address>\n";
dtime:=sys->sprint("This information processed at %s.\n",daytime->time());
body5:="</address>\n</body>\n";
strbuf:=title+body1+errormsg[reason].concise+body2+message+body3+
verb+body4+dtime+body5;
if (g.bout!=nil && reason!=2){
g.bufio->g.bout.puts(sys->sprint("%s %s\r\n", g.version, errormsg[reason].num));
g.bufio->g.bout.puts(sys->sprint("Date: %s\r\n", daytime->time()));
g.bufio->g.bout.puts(sys->sprint("Server: Charon\r\n"));
g.bufio->g.bout.puts(sys->sprint("MIME-version: 1.0\r\n"));
g.bufio->g.bout.puts(sys->sprint("Content-Type: text/html\r\n"));
g.bufio->g.bout.puts(sys->sprint("Content-Length: %d\r\n", len strbuf));
g.bufio->g.bout.puts(sys->sprint("\r\n"));
g.bufio->g.bout.puts(strbuf);
g.bufio->g.bout.flush();
}
logit(g,sys->sprint("failing: %s", errormsg[reason].num));
atexit(g);
}
# write successful header
okheaders(g: ref Private_info)
{
g.bufio->g.bout.puts(sys->sprint("%s 200 OK\r\n", g.version));
g.bufio->g.bout.puts("Server: Charon\r\n");
g.bufio->g.bout.puts("MIME-version: 1.0\r\n");
}
notmodified(g: ref Private_info)
{
g.bufio->g.bout.puts(sys->sprint("%s 304 Not Modified\r\n", g.version));
g.bufio->g.bout.puts("Server: Charon\r\n");
g.bufio->g.bout.puts("MIME-version: 1.0\r\n\r\n");
atexit(g);
}
logit(g: ref Private_info,message : string )
{
lock.obtain();
sys->fprint(g.logfile,"%s %s\n", g.remotesys, message);
lock.release();
}
urlconv(p : string): string
{
a := array[Sys->UTFmax] of byte;
t := "";
for(i := 0; i < len p; i++){
c := p[i];
if(c == 0)
continue; # ignore nul bytes
if(c >= Runeself){ # convert to UTF-8
n := sys->char2byte(c, a, 0);
for(j := 0; j < n; j++)
t += sys->sprint("%%%.2X", int a[j]);
}else if(c <= ' ' || c == '%'){
t += sys->sprint("%%%2.2X", c);
} else {
t[len t] = c;
}
}
return t;
}