ref: d4124d48f9c7e85fdaa50640035080174c94196e
dir: /sys/src/cmd/deroff.c/
#include <u.h> #include <libc.h> #include <bio.h> /* * Deroff command -- strip troff, eqn, and tbl sequences from * a file. Has three flags argument, -w, to cause output one word per line * rather than in the original format. * -mm (or -ms) causes the corresponding macro's to be interpreted * so that just sentences are output * -ml also gets rid of lists. * -i causes deroff to ignore .so and .nx commands. * Deroff follows .so and .nx commands, removes contents of macro * definitions, equations (both .EQ ... .EN and $...$), * Tbl command sequences, and Troff backslash vconstructions. * * All input is through the C macro; the most recently read character is in c. */ /* #define C ((c = Bgetrune(infile)) < 0?\ eof():\ ((c == ldelim) && (filesp == files)?\ skeqn():\ (c == '\n'?\ (linect++,c):\ c))) #define C1 ((c = Bgetrune(infile)) == Beof?\ eof():\ (c == '\n'?\ (linect++,c):\ c)) */ /* lose those macros! */ #define C fC() #define C1 fC1() #define SKIP while(C != '\n') #define SKIP1 while(C1 != '\n') #define SKIP_TO_COM SKIP;\ SKIP;\ pc=c;\ while(C != '.' || pc != '\n' || C > 'Z')\ pc=c #define YES 1 #define NO 0 #define MS 0 #define MM 1 #define ONE 1 #define TWO 2 #define NOCHAR -2 #define EXTENDED -1 /* All runes above 0x7F */ #define SPECIAL 0 #define APOS 1 #define PUNCT 2 #define DIGIT 3 #define LETTER 4 int linect = 0; int wordflag= NO; int underscoreflag = NO; int msflag = NO; int iflag = NO; int mac = MM; int disp = 0; int inmacro = NO; int intable = NO; int eqnflag = 0; #define MAX_ASCII 0X80 char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ Rune line[30000]; Rune* lp; long c; long pc; int ldelim = NOCHAR; int rdelim = NOCHAR; char** argv; char fname[50]; Biobuf* files[15]; Biobuf**filesp; Biobuf* infile; char* devnull = "/dev/null"; Biobuf *infile; Biobuf bout; long skeqn(void); Biobuf* opn(char *p); int eof(void); int charclass(int); void getfname(void); void fatal(char *s, char *p); void usage(void); void work(void); void putmac(Rune *rp, int vconst); void regline(int macline, int vconst); void putwords(void); void comline(void); void macro(void); void eqn(void); void tbl(void); void stbl(void); void sdis(char a1, char a2); void sce(void); void backsl(void); char* copys(char *s); void refer(int c1); void inpic(void); int fC(void) { c = Bgetrune(infile); if(c < 0) return eof(); if(c == ldelim && filesp == files) return skeqn(); if(c == '\n') linect++; return c; } int fC1(void) { c = Bgetrune(infile); if(c == Beof) return eof(); if(c == '\n') linect++; return c; } void main(int argc, char *av[]) { int i; char *f; argv = av; Binit(&bout, 1, OWRITE); ARGBEGIN{ case 'w': wordflag = YES; break; case '_': wordflag = YES; underscoreflag = YES; break; case 'm': msflag = YES; if(f = ARGF()) switch(*f) { case 'm': mac = MM; break; case 's': mac = MS; break; case 'l': disp = 1; break; default: usage(); } else usage(); break; case 'i': iflag = YES; break; default: usage(); }ARGEND if(*argv) infile = opn(*argv++); else{ infile = malloc(sizeof(Biobuf)); Binit(infile, 0, OREAD); } files[0] = infile; filesp = &files[0]; for(i='a'; i<='z' ; ++i) chars[i] = LETTER; for(i='A'; i<='Z'; ++i) chars[i] = LETTER; for(i='0'; i<='9'; ++i) chars[i] = DIGIT; chars['\''] = APOS; chars['&'] = APOS; chars['\b'] = APOS; chars['.'] = PUNCT; chars[','] = PUNCT; chars[';'] = PUNCT; chars['?'] = PUNCT; chars[':'] = PUNCT; work(); } long skeqn(void) { while(C1 != rdelim) if(c == '\\') c = C1; else if(c == '"') while(C1 != '"') if(c == '\\') C1; if (msflag) eqnflag = 1; return(c = ' '); } Biobuf* opn(char *p) { Biobuf *fd; while ((fd = Bopen(p, OREAD)) == 0) { if(msflag || p == devnull) fatal("Cannot open file %s - quitting\n", p); else { fprint(2, "Deroff: Cannot open file %s - continuing\n", p); p = devnull; } } linect = 0; return(fd); } int eof(void) { if(Bfildes(infile) != 0) Bterm(infile); if(filesp > files) infile = *--filesp; else if(*argv) infile = opn(*argv++); else exits(0); return(C); } void getfname(void) { char *p; Rune r; Dir *dir; struct chain { struct chain* nextp; char* datap; } *q; static struct chain *namechain= 0; while(C == ' ') ; for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C) p += runetochar(p, &r); *p = '\0'; while(c != '\n') C; if(!strcmp(fname, "/sys/lib/tmac/tmac.cs") || !strcmp(fname, "/sys/lib/tmac/tmac.s")) { fname[0] = '\0'; return; } dir = dirstat(fname); if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) { free(dir); fname[0] = '\0'; return; } free(dir); /* * see if this name has already been used */ for(q = namechain; q; q = q->nextp) if( !strcmp(fname, q->datap)) { fname[0] = '\0'; return; } q = (struct chain*)malloc(sizeof(struct chain)); q->nextp = namechain; q->datap = copys(fname); namechain = q; } void usage(void) { fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n"); exits("usage"); } void fatal(char *s, char *p) { fprint(2, "deroff: "); fprint(2, s, p); exits(s); } void work(void) { for(;;) { eqnflag = 0; if(C == '.' || c == '\'') comline(); else regline(NO, TWO); } } void regline(int macline, int vconst) { line[0] = c; lp = line; for(;;) { if(c == '\\') { *lp = ' '; backsl(); if(c == '%') /* no blank for hyphenation char */ lp--; } if(c == '\n') break; if(intable && c=='T') { *++lp = C; if(c=='{' || c=='}') { lp[-1] = ' '; *lp = C; } } else { if(msflag == 1 && eqnflag == 1) { eqnflag = 0; *++lp = 'x'; } *++lp = C; } } *lp = '\0'; if(lp != line) { if(wordflag) putwords(); else if(macline) putmac(line,vconst); else Bprint(&bout, "%S\n", line); } } void putmac(Rune *rp, int vconst) { Rune *t; int found; Rune last; found = 0; last = 0; while(*rp) { while(*rp == ' ' || *rp == '\t') Bputrune(&bout, *rp++); for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++) ; if(*rp == '\"') rp++; if(t > rp+vconst && charclass(*rp) == LETTER && charclass(rp[1]) == LETTER) { while(rp < t) if(*rp == '\"') rp++; else Bputrune(&bout, *rp++); last = t[-1]; found++; } else if(found && charclass(*rp) == PUNCT && rp[1] == '\0') Bputrune(&bout, *rp++); else { last = t[-1]; rp = t; } } Bputc(&bout, '\n'); if(msflag && charclass(last) == PUNCT) Bprint(&bout, " %C\n", last); } /* * break into words for -w option */ void putwords(void) { Rune *p, *p1; int i, nlet; for(p1 = line;;) { /* * skip initial specials ampersands and apostrophes */ while((i = charclass(*p1)) != EXTENDED && i < DIGIT) if(*p1++ == '\0') return; nlet = 0; for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++) if(i == LETTER || (underscoreflag && *p == '_')) nlet++; /* * MDM definition of word */ if(nlet > 1) { /* * delete trailing ampersands and apostrophes */ while(*--p == '\'' || *p == '&' || charclass(*p) == PUNCT) ; while(p1 <= p) Bputrune(&bout, *p1++); Bputc(&bout, '\n'); } else p1 = p; } } void comline(void) { long c1, c2; while(C==' ' || c=='\t') ; comx: if((c1=c) == '\n') return; c2 = C; if(c1=='.' && c2!='.') inmacro = NO; if(msflag && c1 == '['){ refer(c2); return; } if(c2 == '\n') return; if(c1 == '\\' && c2 == '\"') SKIP; else if (filesp==files && c1=='E' && c2=='Q') eqn(); else if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) { if(msflag) stbl(); else tbl(); } else if(c1=='T' && c2=='E') intable = NO; else if (!inmacro && ((c1 == 'd' && c2 == 'e') || (c1 == 'i' && c2 == 'g') || (c1 == 'a' && c2 == 'm'))) macro(); else if(c1=='s' && c2=='o') { if(iflag) SKIP; else { getfname(); if(fname[0]) { if(infile = opn(fname)) *++filesp = infile; else infile = *filesp; } } } else if(c1=='n' && c2=='x') if(iflag) SKIP; else { getfname(); if(fname[0] == '\0') exits(0); if(Bfildes(infile) != 0) Bterm(infile); infile = *filesp = opn(fname); } else if(c1 == 't' && c2 == 'm') SKIP; else if(c1=='h' && c2=='w') SKIP; else if(msflag && c1 == 'T' && c2 == 'L') { SKIP_TO_COM; goto comx; } else if(msflag && c1=='N' && c2 == 'R') SKIP; else if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){ if(mac==MM)SKIP; else { SKIP_TO_COM; goto comx; } } else if(msflag && c1=='F' && c2=='S') { SKIP_TO_COM; goto comx; } else if(msflag && (c1=='S' || c1=='N') && c2=='H') { SKIP_TO_COM; goto comx; } else if(c1 == 'U' && c2 == 'X') { if(wordflag) Bprint(&bout, "UNIX\n"); else Bprint(&bout, "UNIX "); } else if(msflag && c1=='O' && c2=='K') { SKIP_TO_COM; goto comx; } else if(msflag && c1=='N' && c2=='D') SKIP; else if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U')) SKIP; else if(msflag && mac==MM && c2=='L') { if(disp || c1=='R') sdis('L', 'E'); else { SKIP; Bprint(&bout, " ."); } } else if(!msflag && c1=='P' && c2=='S') { inpic(); } else if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') { sdis(c1, 'E'); } else if(msflag && (c1 == 'K' && c2 == 'F')) { sdis(c1,'E'); } else if(msflag && c1=='n' && c2=='f') sdis('f','i'); else if(msflag && c1=='c' && c2=='e') sce(); else { if(c1=='.' && c2=='.') { if(msflag) { SKIP; return; } while(C == '.') ; } inmacro++; if(c1 <= 'Z' && msflag) regline(YES,ONE); else { if(wordflag) C; regline(YES,TWO); } inmacro--; } } void macro(void) { if(msflag) { do { SKIP1; } while(C1 != '.' || C1 != '.' || C1 == '.'); if(c != '\n') SKIP; return; } SKIP; inmacro = YES; } void sdis(char a1, char a2) { int c1, c2; int eqnf; int lct; if(a1 == 'P'){ while(C1 == ' ') ; if(c == '<') { SKIP1; return; } } lct = 0; eqnf = 1; if(c != '\n') SKIP1; for(;;) { while(C1 != '.') if(c == '\n') continue; else SKIP1; if((c1=C1) == '\n') continue; if((c2=C1) == '\n') { if(a1 == 'f' && (c1 == 'P' || c1 == 'H')) return; continue; } if(c1==a1 && c2 == a2) { SKIP1; if(lct != 0){ lct--; continue; } if(eqnf) Bprint(&bout, " ."); Bputc(&bout, '\n'); return; } else if(a1 == 'L' && c2 == 'L') { lct++; SKIP1; } else if(a1 == 'D' && c1 == 'E' && c2 == 'Q') { eqn(); eqnf = 0; } else if(a1 == 'f') { if((mac == MS && c2 == 'P') || (mac == MM && c1 == 'H' && c2 == 'U')){ SKIP1; return; } SKIP1; } else SKIP1; } } void tbl(void) { while(C != '.') ; SKIP; intable = YES; } void stbl(void) { while(C != '.') ; SKIP_TO_COM; if(c != 'T' || C != 'E') { SKIP; pc = c; while(C != '.' || pc != '\n' || C != 'T' || C != 'E') pc = c; } } void eqn(void) { long c1, c2; int dflg; char last; last = 0; dflg = 1; SKIP; for(;;) { if(C1 == '.' || c == '\'') { while(C1==' ' || c=='\t') ; if(c=='E' && C1=='N') { SKIP; if(msflag && dflg) { Bputc(&bout, 'x'); Bputc(&bout, ' '); if(last) { Bputc(&bout, last); Bputc(&bout, '\n'); } } return; } } else if(c == 'd') { if(C1=='e' && C1=='l') if(C1=='i' && C1=='m') { while(C1 == ' ') ; if((c1=c)=='\n' || (c2=C1)=='\n' || (c1=='o' && c2=='f' && C1=='f')) { ldelim = NOCHAR; rdelim = NOCHAR; } else { ldelim = c1; rdelim = c2; } } dflg = 0; } if(c != '\n') while(C1 != '\n') { if(charclass(c) == PUNCT) last = c; else if(c != ' ') last = 0; } } } /* * skip over a complete backslash vconstruction */ void backsl(void) { int bdelim; sw: switch(C1) { case '"': SKIP1; return; case 's': if(C1 == '\\') backsl(); else { while(C1>='0' && c<='9') ; Bungetrune(infile); c = '0'; } lp--; return; case 'f': case 'n': case '*': if(C1 != '(') return; case '(': if(msflag) { if(C == 'e') { if(C1 == 'm') { *lp = '-'; return; } } else if(c != '\n') C1; return; } if(C1 != '\n') C1; return; case '$': C1; /* discard argument number */ return; case 'b': case 'x': case 'v': case 'h': case 'w': case 'o': case 'l': case 'L': if((bdelim=C1) == '\n') return; while(C1!='\n' && c!=bdelim) if(c == '\\') backsl(); return; case '\\': if(inmacro) goto sw; default: return; } } char* copys(char *s) { char *t, *t0; if((t0 = t = malloc((strlen(s)+1))) == 0) fatal("Cannot allocate memory", (char*)0); while(*t++ = *s++) ; return(t0); } void sce(void) { int n = 1; while (C != L'\n' && !(L'0' <= c && c <= L'9')) ; if (c != L'\n') { for (n = c-L'0';'0' <= C && c <= L'9';) n = n*10 + c-L'0'; } while(n) { if(C == '.') { if(C == 'c') { if(C == 'e') { while(C == ' ') ; if(c == '0') { SKIP; break; } else SKIP; } else SKIP; } else if(c == 'P' || C == 'P') { if(c != '\n') SKIP; break; } else if(c != '\n') SKIP; } else { SKIP; n--; } } } void refer(int c1) { int c2; if(c1 != '\n') SKIP; c2 = 0; for(;;) { if(C != '.') SKIP; else { if(C != ']') SKIP; else { while(C != '\n') c2 = c; if(charclass(c2) == PUNCT) Bprint(&bout, " %C",c2); return; } } } } void inpic(void) { int c1; Rune *p1; /* SKIP1;*/ while(C1 != '\n') if(c == '<'){ SKIP1; return; } p1 = line; c = '\n'; for(;;) { c1 = c; if(C1 == '.' && c1 == '\n') { if(C1 != 'P' || C1 != 'E') { if(c != '\n'){ SKIP1; c = '\n'; } continue; } SKIP1; return; } else if(c == '\"') { while(C1 != '\"') { if(c == '\\') { if(C1 == '\"') continue; Bungetrune(infile); backsl(); } else *p1++ = c; } *p1++ = ' '; } else if(c == '\n' && p1 != line) { *p1 = '\0'; if(wordflag) putwords(); else Bprint(&bout, "%S\n\n", line); p1 = line; } } } int charclass(int c) { if(c < MAX_ASCII) return chars[c]; switch(c){ case 0x2013: case 0x2014: /* en dash, em dash */ return SPECIAL; } return EXTENDED; }