shithub: libmujs

Download patch

ref: 557968a22dc65351d89ea0e1c34963b8faf22ed1
parent: 81388eb40d29f10599ac30dde90e683a3c254375
author: Tor Andersson <tor.andersson@artifex.com>
date: Mon Aug 14 10:37:14 EDT 2017

Reduce minimum stack use in regexec by using recursive implementation.

--- a/regexp.c
+++ b/regexp.c
@@ -14,7 +14,6 @@
 #define nelem(a) (int)(sizeof (a) / sizeof (a)[0])
 
 #define REPINF 255
-#define MAXTHREAD 1000
 #define MAXSUB REG_MAXSUB
 #define MAXPROG (32 << 10)
 
@@ -959,188 +958,155 @@
 	return 0;
 }
 
-struct Rethread {
-	Reinst *pc;
-	const char *sp;
-	Resub sub;
-};
-
-static void spawn(Rethread *t, Reinst *pc, const char *sp, Resub *sub)
-{
-	t->pc = pc;
-	t->sp = sp;
-	memcpy(&t->sub, sub, sizeof t->sub);
-}
-
 static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out)
 {
-	Rethread ready[MAXTHREAD];
 	Resub scratch;
-	Resub sub;
-	Rune c;
-	int nready;
 	int i;
+	Rune c;
 
-	/* queue initial thread */
-	spawn(ready + 0, pc, sp, out);
-	nready = 1;
-
-	/* run threads in stack order */
-	while (nready > 0) {
-		--nready;
-		pc = ready[nready].pc;
-		sp = ready[nready].sp;
-		memcpy(&sub, &ready[nready].sub, sizeof sub);
-		for (;;) {
-			switch (pc->opcode) {
-			case I_END:
-				for (i = 0; i < MAXSUB; ++i) {
-					out->sub[i].sp = sub.sub[i].sp;
-					out->sub[i].ep = sub.sub[i].ep;
-				}
+	for (;;) {
+		switch (pc->opcode) {
+		case I_END:
+			return 1;
+		case I_JUMP:
+			pc = pc->x;
+			break;
+		case I_SPLIT:
+			scratch = *out;
+			if (match(pc->x, sp, bol, flags, &scratch)) {
+				*out = scratch;
 				return 1;
-			case I_JUMP:
-				pc = pc->x;
-				continue;
-			case I_SPLIT:
-				if (nready >= MAXTHREAD) {
-					/* fprintf(stderr, "regexec: backtrack overflow!\n"); */
-					return 0;
-				}
-				spawn(&ready[nready++], pc->y, sp, &sub);
-				pc = pc->x;
-				continue;
+			}
+			pc = pc->y;
+			break;
 
-			case I_PLA:
-				if (!match(pc->x, sp, bol, flags, &sub))
-					goto dead;
-				pc = pc->y;
-				continue;
-			case I_NLA:
-				memcpy(&scratch, &sub, sizeof scratch);
-				if (match(pc->x, sp, bol, flags, &scratch))
-					goto dead;
-				pc = pc->y;
-				continue;
+		case I_PLA:
+			if (!match(pc->x, sp, bol, flags, out))
+				return 0;
+			pc = pc->y;
+			break;
+		case I_NLA:
+			scratch = *out;
+			if (match(pc->x, sp, bol, flags, &scratch))
+				return 0;
+			pc = pc->y;
+			break;
 
-			case I_ANYNL:
-				sp += chartorune(&c, sp);
-				if (c == 0)
-					goto dead;
+		case I_ANYNL:
+			sp += chartorune(&c, sp);
+			if (c == 0)
+				return 0;
+			pc = pc + 1;
+			break;
+		case I_ANY:
+			sp += chartorune(&c, sp);
+			if (c == 0)
+				return 0;
+			if (isnewline(c))
+				return 0;
+			pc = pc + 1;
+			break;
+		case I_CHAR:
+			sp += chartorune(&c, sp);
+			if (c == 0)
+				return 0;
+			if (flags & REG_ICASE)
+				c = canon(c);
+			if (c != pc->c)
+				return 0;
+			pc = pc + 1;
+			break;
+		case I_CCLASS:
+			sp += chartorune(&c, sp);
+			if (c == 0)
+				return 0;
+			if (flags & REG_ICASE) {
+				if (!incclasscanon(pc->cc, canon(c)))
+					return 0;
+			} else {
+				if (!incclass(pc->cc, c))
+					return 0;
+			}
+			pc = pc + 1;
+			break;
+		case I_NCCLASS:
+			sp += chartorune(&c, sp);
+			if (c == 0)
+				return 0;
+			if (flags & REG_ICASE) {
+				if (incclasscanon(pc->cc, canon(c)))
+					return 0;
+			} else {
+				if (incclass(pc->cc, c))
+					return 0;
+			}
+			pc = pc + 1;
+			break;
+		case I_REF:
+			i = out->sub[pc->n].ep - out->sub[pc->n].sp;
+			if (flags & REG_ICASE) {
+				if (strncmpcanon(sp, out->sub[pc->n].sp, i))
+					return 0;
+			} else {
+				if (strncmp(sp, out->sub[pc->n].sp, i))
+					return 0;
+			}
+			if (i > 0)
+				sp += i;
+			pc = pc + 1;
+			break;
+
+		case I_BOL:
+			if (sp == bol && !(flags & REG_NOTBOL)) {
 				pc = pc + 1;
-				continue;
-			case I_ANY:
-				sp += chartorune(&c, sp);
-				if (c == 0)
-					goto dead;
-				if (isnewline(c))
-					goto dead;
-				pc = pc + 1;
-				continue;
-			case I_CHAR:
-				sp += chartorune(&c, sp);
-				if (c == 0)
-					goto dead;
-				if (flags & REG_ICASE)
-					c = canon(c);
-				if (c != pc->c)
-					goto dead;
-				pc = pc + 1;
-				continue;
-			case I_CCLASS:
-				sp += chartorune(&c, sp);
-				if (c == 0)
-					goto dead;
-				if (flags & REG_ICASE) {
-					if (!incclasscanon(pc->cc, canon(c)))
-						goto dead;
-				} else {
-					if (!incclass(pc->cc, c))
-						goto dead;
+				break;
+			}
+			if (flags & REG_NEWLINE) {
+				if (sp > bol && isnewline(sp[-1])) {
+					pc = pc + 1;
+					break;
 				}
+			}
+			return 0;
+		case I_EOL:
+			if (*sp == 0) {
 				pc = pc + 1;
-				continue;
-			case I_NCCLASS:
-				sp += chartorune(&c, sp);
-				if (c == 0)
-					goto dead;
-				if (flags & REG_ICASE) {
-					if (incclasscanon(pc->cc, canon(c)))
-						goto dead;
-				} else {
-					if (incclass(pc->cc, c))
-						goto dead;
-				}
-				pc = pc + 1;
-				continue;
-			case I_REF:
-				i = sub.sub[pc->n].ep - sub.sub[pc->n].sp;
-				if (flags & REG_ICASE) {
-					if (strncmpcanon(sp, sub.sub[pc->n].sp, i))
-						goto dead;
-				} else {
-					if (strncmp(sp, sub.sub[pc->n].sp, i))
-						goto dead;
-				}
-				if (i > 0)
-					sp += i;
-				pc = pc + 1;
-				continue;
-
-			case I_BOL:
-				if (sp == bol && !(flags & REG_NOTBOL)) {
+				break;
+			}
+			if (flags & REG_NEWLINE) {
+				if (isnewline(*sp)) {
 					pc = pc + 1;
-					continue;
+					break;
 				}
-				if (flags & REG_NEWLINE) {
-					if (sp > bol && isnewline(sp[-1])) {
-						pc = pc + 1;
-						continue;
-					}
-				}
-				goto dead;
-			case I_EOL:
-				if (*sp == 0) {
-					pc = pc + 1;
-					continue;
-				}
-				if (flags & REG_NEWLINE) {
-					if (isnewline(*sp)) {
-						pc = pc + 1;
-						continue;
-					}
-				}
-				goto dead;
-			case I_WORD:
-				i = sp > bol && iswordchar(sp[-1]);
-				i ^= iswordchar(sp[0]);
-				if (!i)
-					goto dead;
-				pc = pc + 1;
-				continue;
-			case I_NWORD:
-				i = sp > bol && iswordchar(sp[-1]);
-				i ^= iswordchar(sp[0]);
-				if (i)
-					goto dead;
-				pc = pc + 1;
-				continue;
-
-			case I_LPAR:
-				sub.sub[pc->n].sp = sp;
-				pc = pc + 1;
-				continue;
-			case I_RPAR:
-				sub.sub[pc->n].ep = sp;
-				pc = pc + 1;
-				continue;
-			default:
-				goto dead;
 			}
+			return 0;
+		case I_WORD:
+			i = sp > bol && iswordchar(sp[-1]);
+			i ^= iswordchar(sp[0]);
+			if (!i)
+				return 0;
+			pc = pc + 1;
+			break;
+		case I_NWORD:
+			i = sp > bol && iswordchar(sp[-1]);
+			i ^= iswordchar(sp[0]);
+			if (i)
+				return 0;
+			pc = pc + 1;
+			break;
+
+		case I_LPAR:
+			out->sub[pc->n].sp = sp;
+			pc = pc + 1;
+			break;
+		case I_RPAR:
+			out->sub[pc->n].ep = sp;
+			pc = pc + 1;
+			break;
+		default:
+			return 0;
 		}
-dead: ;
 	}
-	return 0;
 }
 
 int regexec(Reprog *prog, const char *sp, Resub *sub, int eflags)
--- a/regexp.h
+++ b/regexp.h
@@ -28,7 +28,7 @@
 	REG_NOTBOL = 4,
 
 	/* limits */
-	REG_MAXSUB = 16
+	REG_MAXSUB = 10
 };
 
 struct Resub {