ref: 61f4c085c402af665b436f6d010d61107551d8a3
dir: /sys/src/cmd/troff2html/troff2html.c/
#include <u.h> #include <libc.h> #include <bio.h> enum{ Nfont = 11, Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */ }; typedef uintptr Char; typedef struct Troffchar Troffchar; typedef struct Htmlchar Htmlchar; typedef struct Font Font; typedef struct HTMLfont HTMLfont; /* * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes. * must be able to hold a pointer. */ enum { Italic = 16, Bold, CW, Indent1, Indent2, Indent3, Heading = 25, Anchor = 26, /* must be last */ }; enum /* magic emissions */ { Estring = 0, Epp = 1<<16, }; int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW }; int nest[10]; int nnest; struct Troffchar { char *name; char *value; }; struct Htmlchar { char *utf; char *name; int value; }; #include "chars.h" struct Font{ char *name; HTMLfont *htmlfont; }; struct HTMLfont{ char *name; char *htmlname; int bit; }; /* R must be first; it's the default representation for fonts we don't recognize */ HTMLfont htmlfonts[] = { "R", nil, 0, "LucidaSans", nil, 0, "I", "i", Italic, "LucidaSansI", "i", Italic, "CW", "tt", CW, "LucidaCW", "tt", CW, nil, nil, }; #define TABLE "<table border=0 cellpadding=0 cellspacing=0>" char* onattr[8*sizeof(int)] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "<i>", /* italic */ "<b>", /* bold */ "<tt><font size=+1>", /* cw */ "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent1 */ "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent2 */ "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent3 */ 0, 0, 0, "<p><font size=+1><b>", /* heading 25 */ "<unused>", /* anchor 26 */ }; char* offattr[8*sizeof(int)] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "</i>", /* italic */ "</b>", /* bold */ "</font></tt>", /* cw */ "<-/table>", /* indent1 */ "<-/table>", /* indent2 */ "<-/table>", /* indent3 */ 0, 0, 0, "</b></font>", /* heading 25 */ "</a>", /* anchor 26 */ }; Font *font[Nfont]; Biobuf bout; int debug = 0; /* troff state */ int page = 1; int ft = 1; int vp = 0; int hp = 0; int ps = 1; int res = 720; int didP = 0; int atnewline = 1; int prevlineH = 0; Char attr = 0; /* or'ed into each Char */ Char *chars; int nchars; int nalloc; char** anchors; /* allocated in order */ int nanchors; char *filename; int cno; char buf[8192]; char *title = "Plan 9 man page"; void process(Biobuf*, char*); void mountfont(int, char*); void switchfont(int); void header(char*); void flush(void); void trailer(void); void* emalloc(ulong n) { void *p; p = malloc(n); if(p == nil) sysfatal("malloc failed: %r"); return p; } void* erealloc(void *p, ulong n) { p = realloc(p, n); if(p == nil) sysfatal("realloc failed: %r"); return p; } char* estrdup(char *s) { char *t; t = strdup(s); if(t == nil) sysfatal("strdup failed: %r"); return t; } void usage(void) { fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n"); exits("usage"); } int hccmp(const void *va, const void *vb) { Htmlchar *a, *b; a = (Htmlchar*)va; b = (Htmlchar*)vb; return a->value - b->value; } void main(int argc, char *argv[]) { int i; Biobuf in, *inp; Rune r; for(i=0; i<nelem(htmlchars); i++){ chartorune(&r, htmlchars[i].utf); htmlchars[i].value = r; } qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp); ARGBEGIN{ case 't': title = ARGF(); if(title == nil) usage(); break; case 'd': debug++; break; default: usage(); }ARGEND Binit(&bout, 1, OWRITE); if(argc == 0){ header(title); Binit(&in, 0, OREAD); process(&in, "<stdin>"); }else{ header(title); for(i=0; i<argc; i++){ inp = Bopen(argv[i], OREAD); if(inp == nil) sysfatal("can't open %s: %r", argv[i]); process(inp, argv[i]); Bterm(inp); } } flush(); trailer(); exits(nil); } void emitchar(Char c) { if(nalloc == nchars){ nalloc += 10000; chars = realloc(chars, nalloc*sizeof(chars[0])); if(chars == nil) sysfatal("malloc failed: %r"); } chars[nchars++] = c; } void emit(Rune r) { emitchar(r | attr); /* * Close man page references early, so that * .IR proof (1), * doesn't make the comma part of the link. */ if(r == ')') attr &= ~(1<<Anchor); } void emitstr(char *s) { emitchar(Estring); emitchar((Char)s); } int indentlevel; int linelen; void iputrune(Biobuf *b, Rune r) { int i; if(linelen++ > 60 && r == ' ') r = '\n'; Bputrune(b, r); if(r == '\n'){ for(i=0; i<indentlevel; i++) Bprint(b, " "); linelen = 0; } } void iputs(Biobuf *b, char *s) { if(s[0]=='<' && s[1]=='+'){ iputrune(b, '\n'); Bprint(b, "<%s", s+2); indentlevel++; iputrune(b, '\n'); }else if(s[0]=='<' && s[1]=='-'){ indentlevel--; iputrune(b, '\n'); Bprint(b, "<%s", s+2); iputrune(b, '\n'); }else Bprint(b, "%s", s); } void setattr(Char a) { Char on, off; int i, j; on = a & ~attr; off = attr & ~a; /* walk up the nest stack until we reach something we need to turn off. */ for(i=0; i<nnest; i++) if(off&(1<<nest[i])) break; /* turn off everything above that */ for(j=nnest-1; j>=i; j--) iputs(&bout, offattr[nest[j]]); /* turn on everything we just turned off but didn't want to */ for(j=i; j<nnest; j++) if(a&(1<<nest[j])) iputs(&bout, onattr[nest[j]]); else nest[j] = 0; /* shift the zeros (turned off things) up */ for(i=j=0; i<nnest; i++) if(nest[i] != 0) nest[j++] = nest[i]; nnest = j; /* now turn on the new attributes */ for(i=0; i<nelem(attrorder); i++){ j = attrorder[i]; if(on&(1<<j)){ if(j == Anchor) onattr[j] = anchors[nanchors++]; iputs(&bout, onattr[j]); if(nnest >= nelem(nest)) sysfatal("nesting too deep"); nest[nnest++] = j; } } attr = a; } void flush(void) { int i; Char c, a; nanchors = 0; for(i=0; i<nchars; i++){ c = chars[i]; if(c == Estring){ /* next word is string to print */ iputs(&bout, (char*)chars[++i]); continue; } if(c == Epp){ iputrune(&bout, '\n'); iputs(&bout, TABLE "<tr height=5><td></table>"); iputrune(&bout, '\n'); continue; } a = c & ~0xFFFF; c &= 0xFFFF; /* * If we're going to something off after a space, * let's just turn it off before. */ if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32) a ^= a & ~chars[i+1]; setattr(a); iputrune(&bout, c & 0xFFFF); } } void header(char *s) { Bprint(&bout, "<head>\n"); Bprint(&bout, "<title>%s</title>\n", s); Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n"); Bprint(&bout, "</head>\n"); Bprint(&bout, "<body bgcolor=#ffffff>\n"); } void trailer(void) { Bprint(&bout, "</body></html>\n"); } int getc(Biobuf *b) { cno++; return Bgetrune(b); } void ungetc(Biobuf *b) { cno--; Bungetrune(b); } char* getline(Biobuf *b) { int i, c; for(i=0; i<sizeof buf; i++){ c = getc(b); if(c == Beof) return nil; buf[i] = c; if(c == '\n'){ buf[i] = '\0'; break; } } return buf; } int getnum(Biobuf *b) { int i, c; i = 0; for(;;){ c = getc(b); if(c<'0' || '9'<c){ ungetc(b); break; } i = i*10 + (c-'0'); } return i; } char* getstr(Biobuf *b) { int i, c; for(i=0; i<sizeof buf; i++){ /* must get bytes not runes */ cno++; c = Bgetc(b); if(c == Beof) return nil; buf[i] = c; if(c == '\n' || c==' ' || c=='\t'){ ungetc(b); buf[i] = '\0'; break; } } return buf; } int setnum(Biobuf *b, char *name, int min, int max) { int i; i = getnum(b); if(debug > 2) fprint(2, "set %s = %d\n", name, i); if(min<=i && i<max) return i; sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno); return i; } void xcmd(Biobuf *b) { char *p, *q, *fld[16]; int i, nfld; p = getline(b); if(p == nil) sysfatal("xcmd error: %r"); if(debug) fprint(2, "x command '%s'\n", p); /* inline html? */ if(*p == 'X' && (q = strstr(p+1, "html [")) != nil){ p = q+6; if((q = strrchr(p, ']')) != nil) *q = '\0'; emitstr(estrdup(p)); return; } nfld = tokenize(p, fld, nelem(fld)); if(nfld == 0) return; switch(fld[0][0]){ case 'f': /* mount font */ if(nfld != 3) break; i = atoi(fld[1]); if(i<0 || Nfont<=i) sysfatal("font %d out of range at %s:#%d", i, filename, cno); mountfont(i, fld[2]); return; case 'i': /* init */ return; case 'r': if(nfld<2 || atoi(fld[1])!=res) sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>"); return; case 's': /* stop */ return; case 't': /* trailer */ return; case 'T': if(nfld!=2 || strcmp(fld[1], "utf")!=0) sysfatal("output for unknown typesetter type %s", fld[1]); return; case 'X': if(nfld<3 || strcmp(fld[1], "html")!=0) break; /* is it a man reference of the form cp(1)? */ /* X manref start/end cp (1) */ if(nfld==6 && strcmp(fld[2], "manref")==0){ /* was the right macro; is it the right form? */ if(strlen(fld[5])>=3 && fld[5][0]=='(' && fld[5][2]==')' && '0'<=fld[5][1] && fld[5][1]<='9'){ if(strcmp(fld[3], "start") == 0){ /* set anchor attribute and remember string */ attr |= (1<<Anchor); nanchors++; anchors = erealloc(anchors, nanchors*sizeof(char*)); anchors[nanchors-1] = smprint("<a href=\"/magic/man2html/%c/%s\">", fld[5][1], fld[4]); }else if(strcmp(fld[3], "end") == 0) attr &= ~(1<<Anchor); } }else if(strcmp(fld[2], "manPP") == 0){ didP = 1; emitchar(Epp); }else if(nfld<4 || strcmp(fld[2], "manref")!=0){ if(nfld>2 && cistrcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */ didP = 1; /* clear all font attributes before paragraph */ emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW))))); emitstr("<P>"); /* next emittec char will turn font attributes back on */ }else if(nfld>2 && cistrcmp(fld[2], "<H4>")==0) attr |= (1<<Heading); else if(nfld>2 && cistrcmp(fld[2], "</H4>")==0) attr &= ~(1<<Heading); else if(nfld>2 && cistrcmp(fld[2], "<B>")==0) attr |= (1<<Bold); else if(nfld>2 && cistrcmp(fld[2], "</B>")==0) attr &= ~(1<<Bold); else if(nfld>2 && cistrcmp(fld[2], "<I>")==0) attr |= (1<<Italic); else if(nfld>2 && cistrcmp(fld[2], "</I>")==0) attr &= ~(1<<Italic); else if(debug) fprint(2, "unknown in-line html %s... at %s:%#d\n", fld[2], filename, cno); } return; } if(debug) fprint(2, "unknown or badly formatted x command %s\n", fld[0]); } int lookup(int c, Htmlchar tab[], int ntab) { int low, high, mid; low = 0; high = ntab - 1; while(low <= high){ mid = (low+high)/2; if(c < tab[mid].value) high = mid - 1; else if(c > tab[mid].value) low = mid + 1; else return mid; } return -1; /* no match */ } void emithtmlchar(int r) { static char buf[10]; int i; i = lookup(r, htmlchars, nelem(htmlchars)); if(i >= 0) emitstr(htmlchars[i].name); else emit(r); } char* troffchar(char *s) { int i; for(i=0; troffchars[i].name!=nil; i++) if(strcmp(s, troffchars[i].name) == 0) return troffchars[i].value; return "??"; } void indent(void) { int nind; didP = 0; if(atnewline){ if(hp != prevlineH){ prevlineH = hp; /* these most peculiar numbers appear in the troff -man output */ nind = ((prevlineH-1*res)+323)/324; attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3)); if(nind >= 1) attr |= (1<<Indent1); if(nind >= 2) attr |= (1<<Indent2); if(nind >= 3) attr |= (1<<Indent3); } atnewline = 0; } } void process(Biobuf *b, char *name) { int c, r, v, i; char *p; cno = 0; prevlineH = res; filename = name; for(;;){ c = getc(b); switch(c){ case Beof: /* go to ground state */ attr = 0; emit('\n'); return; case '\n': break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': v = c-'0'; c = getc(b); if(c<'0' || '9'<c) sysfatal("illegal character motion at %s:#%d", filename, cno); v = v*10 + (c-'0'); hp += v; /* fall through to character case */ case 'c': indent(); r = getc(b); emithtmlchar(r); break; case 'D': /* draw line; ignore */ do c = getc(b); while(c!='\n' && c!= Beof); break; case 'f': v = setnum(b, "font", 0, Nfont); switchfont(v); break; case 'h': v = setnum(b, "hpos", -20000, 20000); /* generate spaces if motion is large and within a line */ if(!atnewline && v>2*72) for(i=0; i<v; i+=72) emitstr(" "); hp += v; break; case 'n': setnum(b, "n1", -10000, 10000); //Bprint(&bout, " N1=%d", v); getc(b); /* space separates */ setnum(b, "n2", -10000, 10000); atnewline = 1; if(!didP && hp < (Wid-1)*res) /* if line is less than 19" long, probably need a line break */ emitstr("<br>"); emit('\n'); break; case 'p': page = setnum(b, "ps", -10000, 10000); break; case 's': ps = setnum(b, "ps", 1, 1000); break; case 'v': vp += setnum(b, "vpos", -10000, 10000); /* BUG: ignore motion */ break; case 'x': xcmd(b); break; case 'w': emit(' '); break; case 'C': indent(); p = getstr(b); emitstr(troffchar(p)); break; case 'H': hp = setnum(b, "hpos", 0, 20000); //Bprint(&bout, " H=%d ", hp); break; case 'V': vp = setnum(b, "vpos", 0, 10000); break; default: fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno); return; } } } HTMLfont* htmlfont(char *name) { int i; for(i=0; htmlfonts[i].name!=nil; i++) if(strcmp(name, htmlfonts[i].name) == 0) return &htmlfonts[i]; return &htmlfonts[0]; } void mountfont(int pos, char *name) { if(debug) fprint(2, "mount font %s on %d\n", name, pos); if(font[pos] != nil){ free(font[pos]->name); free(font[pos]); } font[pos] = emalloc(sizeof(Font)); font[pos]->name = estrdup(name); font[pos]->htmlfont = htmlfont(name); } void switchfont(int pos) { HTMLfont *hf; if(debug) fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name); if(pos == ft) return; hf = font[ft]->htmlfont; if(hf->bit != 0) attr &= ~(1<<hf->bit); ft = pos; hf = font[ft]->htmlfont; if(hf->bit != 0) attr |= (1<<hf->bit); }