ref: a7a315bff61bff58a9c918294f53c1e87d7ae7bb
dir: /sys/src/ape/cmd/expr/regexp.h/
#define CBRA 2 #define CCHR 4 #define CDOT 8 #define CCL 12 #define CDOL 20 #define CEOF 22 #define CKET 24 #define CBACK 36 #define STAR 01 #define RNGE 03 #define NBRA 9 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) char *braslist[NBRA]; char *braelist[NBRA]; int nbra, ebra; char *loc1, *loc2, *locs; int sed; int circf; int low; int size; char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; char * compile(char *instring, char *ep, char *endbuf, int seof) { INIT /* Dependent declarations and initializations */ register c; register eof = seof; char *lastep = instring; int cclcnt; char bracket[NBRA], *bracketp; int closed; char neg; int lc; int i, cflg; lastep = 0; if((c = GETC()) == eof) { if(*ep == 0 && !sed) ERROR(41); RETURN(ep); } bracketp = bracket; circf = closed = nbra = ebra = 0; if (c == '^') circf++; else UNGETC(c); for (;;) { if (ep >= endbuf) ERROR(50); if((c = GETC()) != '*' && ((c != '\\') || (PEEKC() != '{'))) lastep = ep; if (c == eof) { *ep++ = CEOF; RETURN(ep); } switch (c) { case '.': *ep++ = CDOT; continue; case '\n': ERROR(36); case '*': if (lastep==0 || *lastep==CBRA || *lastep==CKET) goto defchar; *lastep |= STAR; continue; case '$': if(PEEKC() != eof) goto defchar; *ep++ = CDOL; continue; case '[': if(&ep[17] >= endbuf) ERROR(50); *ep++ = CCL; lc = 0; for(i = 0; i < 16; i++) ep[i] = 0; neg = 0; if((c = GETC()) == '^') { neg = 1; c = GETC(); } do { if(c == '\0' || c == '\n') ERROR(49); if(c == '-' && lc != 0) { if ((c = GETC()) == ']') { PLACE('-'); break; } while(lc < c) { PLACE(lc); lc++; } } lc = c; PLACE(c); } while((c = GETC()) != ']'); if(neg) { for(cclcnt = 0; cclcnt < 16; cclcnt++) ep[cclcnt] ^= -1; ep[0] &= 0376; } ep += 16; continue; case '\\': switch(c = GETC()) { case '(': if(nbra >= NBRA) ERROR(43); *bracketp++ = nbra; *ep++ = CBRA; *ep++ = nbra++; continue; case ')': if(bracketp <= bracket || ++ebra != nbra) ERROR(42); *ep++ = CKET; *ep++ = *--bracketp; closed++; continue; case '{': if(lastep == (char *) (0)) goto defchar; *lastep |= RNGE; cflg = 0; nlim: c = GETC(); i = 0; do { if ('0' <= c && c <= '9') i = 10 * i + c - '0'; else ERROR(16); } while(((c = GETC()) != '\\') && (c != ',')); if (i > 255) ERROR(11); *ep++ = i; if (c == ',') { if(cflg++) ERROR(44); if((c = GETC()) == '\\') *ep++ = 255; else { UNGETC(c); goto nlim; /* get 2'nd number */ } } if(GETC() != '}') ERROR(45); if(!cflg) /* one number */ *ep++ = i; else if((ep[-1] & 0377) < (ep[-2] & 0377)) ERROR(46); continue; case '\n': ERROR(36); case 'n': c = '\n'; goto defchar; default: if(c >= '1' && c <= '9') { if((c -= '1') >= closed) ERROR(25); *ep++ = CBACK; *ep++ = c; continue; } } /* Drop through to default to use \ to turn off special chars */ defchar: default: lastep = ep; *ep++ = CCHR; *ep++ = c; } } } step(char *p1, char *p2) { register c; if (circf) { loc1 = p1; return(advance(p1, p2)); } /* fast check for first character */ if (*p2==CCHR) { c = p2[1]; do { if (*p1 != c) continue; if (advance(p1, p2)) { loc1 = p1; return(1); } } while (*p1++); return(0); } /* regular algorithm */ do { if (advance(p1, p2)) { loc1 = p1; return(1); } } while (*p1++); return(0); } advance(char *lp, char *ep) { register char *curlp; char c; char *bbeg; int ct; for (;;) switch (*ep++) { case CCHR: if (*ep++ == *lp++) continue; return(0); case CDOT: if (*lp++) continue; return(0); case CDOL: if (*lp==0) continue; return(0); case CEOF: loc2 = lp; return(1); case CCL: c = *lp++ & 0177; if(ISTHERE(c)) { ep += 16; continue; } return(0); case CBRA: braslist[*ep++] = lp; continue; case CKET: braelist[*ep++] = lp; continue; case CCHR|RNGE: c = *ep++; getrnge(ep); while(low--) if(*lp++ != c) return(0); curlp = lp; while(size--) if(*lp++ != c) break; if(size < 0) lp++; ep += 2; goto star; case CDOT|RNGE: getrnge(ep); while(low--) if(*lp++ == '\0') return(0); curlp = lp; while(size--) if(*lp++ == '\0') break; if(size < 0) lp++; ep += 2; goto star; case CCL|RNGE: getrnge(ep + 16); while(low--) { c = *lp++ & 0177; if(!ISTHERE(c)) return(0); } curlp = lp; while(size--) { c = *lp++ & 0177; if(!ISTHERE(c)) break; } if(size < 0) lp++; ep += 18; /* 16 + 2 */ goto star; case CBACK: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; if(ecmp(bbeg, lp, ct)) { lp += ct; continue; } return(0); case CBACK|STAR: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; curlp = lp; while(ecmp(bbeg, lp, ct)) lp += ct; while(lp >= curlp) { if(advance(lp, ep)) return(1); lp -= ct; } return(0); case CDOT|STAR: curlp = lp; while (*lp++); goto star; case CCHR|STAR: curlp = lp; while (*lp++ == *ep); ep++; goto star; case CCL|STAR: curlp = lp; do { c = *lp++ & 0177; } while(ISTHERE(c)); ep += 16; goto star; star: do { if(--lp == locs) break; if (advance(lp, ep)) return(1); } while (lp > curlp); return(0); } } getrnge(char *str) { low = *str++ & 0377; size = *str == 255 ? 20000 : (*str &0377) - low; } ecmp(char *a, char *b, int count) { while(count--) if(*a++ != *b++) return(0); return(1); }