shithub: libmujs

Download patch

ref: fee803ad49d86d1e41e369bbe365fa5c6d3e3495
parent: b60c1cca9b93d4b7975d4e06bc0a633b6c2cb67e
author: Tor Andersson <tor@ccxvii.net>
date: Wed Feb 26 10:06:22 EST 2014

Check for infinite loops matching the empty string at parse time.

--- a/regex.c
+++ b/regex.c
@@ -41,7 +41,7 @@
 	const char *source;
 	unsigned int ncclass;
 	unsigned int ncap;
-	int nref[MAXSUB];
+	Renode *cap[MAXSUB];
 
 	int lookahead;
 	Rune yychar;
@@ -391,9 +391,25 @@
 	return node;
 }
 
+static int empty(Renode *node)
+{
+	if (!node) return 1;
+	switch (node->type) {
+	default: return 1;
+	case P_CAT: return empty(node->x) && empty(node->y);
+	case P_ALT: return empty(node->x) || empty(node->y);
+	case P_REP: return empty(node->x) || node->m == 0;
+	case P_PAR: return empty(node->x);
+	case P_REF: return empty(node->x);
+	case P_ANY: case P_CHAR: case P_CCLASS: case P_NCCLASS: return 0;
+	}
+}
+
 static Renode *newrep(struct cstate *g, Renode *atom, int ng, int min, int max)
 {
 	Renode *rep = newnode(g, P_REP);
+	if (max == REPINF && empty(atom))
+		die(g, "infinite loop matching the empty string");
 	rep->ng = ng;
 	rep->m = min;
 	rep->n = max;
@@ -440,9 +456,10 @@
 	}
 	if (g->lookahead == L_REF) {
 		atom = newnode(g, P_REF);
-		if (g->yychar == 0 || g->yychar > g->ncap || !g->nref[g->yychar])
+		if (g->yychar == 0 || g->yychar > g->ncap || !g->cap[g->yychar])
 			die(g, "invalid back-reference");
 		atom->n = g->yychar;
+		atom->x = g->cap[g->yychar];
 		next(g);
 		return atom;
 	}
@@ -453,9 +470,9 @@
 		if (++g->ncap == MAXSUB)
 			die(g, "too many captures");
 		atom->n = g->ncap;
-		g->nref[atom->n] = 0;
+		g->cap[atom->n] = NULL;
 		atom->x = parsealt(g);
-		g->nref[atom->n] = 1;
+		g->cap[atom->n] = atom;
 		if (!accept(g, ')'))
 			die(g, "unmatched '('");
 		return atom;
@@ -787,7 +804,7 @@
 	g.ncclass = 0;
 	g.ncap = 0;
 	for (i = 0; i < MAXSUB; ++i)
-		g.nref[i] = 0;
+		g.cap[i] = 0;
 
 	g.prog->flags = cflags;