shithub: libmujs

Download patch

ref: e17dcbba7cff082374c4bb83d20304fdd220dc33
parent: 9838735f9311cb9e134a561c68b6c4ac9be47263
author: Tor Andersson <tor@ccxvii.net>
date: Wed Feb 5 08:57:44 EST 2014

Encapsulate all regexp state in js_Regexp struct.

--- a/jsi.h
+++ b/jsi.h
@@ -12,6 +12,7 @@
 #include <math.h>
 #include <float.h>
 
+typedef struct js_Regexp js_Regexp;
 typedef struct js_Value js_Value;
 typedef struct js_Object js_Object;
 typedef struct js_Ast js_Ast;
@@ -38,13 +39,13 @@
 void js_newfunction(js_State *J, js_Function *function, js_Environment *scope);
 void js_newscript(js_State *J, js_Function *function);
 
-void *js_toregexp(js_State *J, int idx, int *flags);
+js_Regexp *js_toregexp(js_State *J, int idx);
 
 void js_dup(js_State *J);
 void js_rot2(js_State *J);
 void js_rot3(js_State *J);
 
-int js_RegExp_prototype_exec(js_State *J, int idx, const char *text);
+int js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text);
 
 /* Exception handling */
 
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -6,89 +6,111 @@
 
 #include <regex.h>
 
-int js_RegExp_prototype_exec(js_State *J, int idx, const char *text)
+void js_newregexp(js_State *J, const char *pattern, int flags)
 {
+	char msg[256];
 	js_Object *obj;
-	int flags, opts, last;
 	regex_t *prog;
+	int opts, status;
+
+	obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
+
+	opts = REG_EXTENDED;
+	if (flags & JS_REGEXP_I) opts |= REG_ICASE;
+	if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+
+	prog = malloc(sizeof (regex_t));
+	status = regcomp(prog, pattern, opts);
+	if (status) {
+		free(prog);
+		regerror(status, prog, msg, sizeof msg);
+		js_syntaxerror(J, "%s", msg);
+	}
+
+	obj->u.r.prog = prog;
+	obj->u.r.source = pattern;
+	obj->u.r.flags = flags;
+	obj->u.r.last = 0;
+	js_pushobject(J, obj);
+}
+
+int js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text)
+{
 	regmatch_t m[10];
+	int opts;
 	int i;
 
-	prog = js_toregexp(J, idx, &flags);
-	obj = js_toobject(J, idx);
-
 	opts = 0;
-	if (flags & JS_REGEXP_G) {
-		last = obj->u.r.last;
-		if (last < 0 || last > strlen(text)) {
-			obj->u.r.last = 0;
+	if (re->flags & JS_REGEXP_G) {
+		if (re->last < 0 || re->last > strlen(text)) {
+			re->last = 0;
 			js_pushnull(J);
 			return 1;
 		}
-		if (last > 0) {
-			text += last;
+		if (re->last > 0) {
+			text += re->last;
 			opts |= REG_NOTBOL;
 		}
 	}
 
-	if (!regexec(prog, text, nelem(m), m, opts)) {
+	if (!regexec(re->prog, text, nelem(m), m, opts)) {
 		js_newarray(J);
 		for (i = 0; i < nelem(m) && m[i].rm_so >= 0; ++i) {
 			js_pushlstring(J, text + m[i].rm_so, m[i].rm_eo - m[i].rm_so);
 			js_setindex(J, -2, i);
 		}
-		if (flags & JS_REGEXP_G)
-			obj->u.r.last = last + m[0].rm_eo;
+		if (re->flags & JS_REGEXP_G)
+			re->last = re->last + m[0].rm_eo;
 		return 1;
 	}
 
-	if (flags & JS_REGEXP_G)
-		obj->u.r.last = 0;
+	if (re->flags & JS_REGEXP_G)
+		re->last = 0;
 
 	js_pushnull(J);
 	return 1;
 }
 
-void js_newregexp(js_State *J, const char *pattern, int flags)
+static int Rp_test(js_State *J, int argc)
 {
-	char msg[256];
-	js_Object *obj;
-	regex_t *prog;
-	int opts, status;
+	js_Regexp *re;
+	const char *text;
+	regmatch_t m[10];
+	int opts;
 
-	obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
+	re = js_toregexp(J, 0);
+	text = js_tostring(J, 1);
 
-	opts = REG_EXTENDED;
-	if (flags & JS_REGEXP_I) opts |= REG_ICASE;
-	if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+	opts = 0;
+	if (re->flags & JS_REGEXP_G) {
+		if (re->last < 0 || re->last > strlen(text)) {
+			re->last = 0;
+			js_pushboolean(J, 0);
+			return 1;
+		}
+		if (re->last > 0) {
+			text += re->last;
+			opts |= REG_NOTBOL;
+		}
+	}
 
-	prog = malloc(sizeof (regex_t));
-	status = regcomp(prog, pattern, opts);
-	if (status) {
-		free(prog);
-		regerror(status, prog, msg, sizeof msg);
-		js_syntaxerror(J, "%s", msg);
+	if (!regexec(re->prog, text, nelem(m), m, opts)) {
+		if (re->flags & JS_REGEXP_G)
+			re->last = re->last + m[0].rm_eo;
+		js_pushboolean(J, 1);
+		return 1;
 	}
 
-	obj->u.r.prog = prog;
-	obj->u.r.flags = flags;
-	obj->u.r.last = 0;
-	js_pushobject(J, obj);
+	if (re->flags & JS_REGEXP_G)
+		re->last = 0;
 
-	js_pushstring(J, pattern);
-	js_defproperty(J, -2, "source", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
-	js_pushboolean(J, flags & JS_REGEXP_G);
-	js_defproperty(J, -2, "global", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
-	js_pushboolean(J, flags & JS_REGEXP_I);
-	js_defproperty(J, -2, "ignoreCase", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
-	js_pushboolean(J, flags & JS_REGEXP_M);
-	js_defproperty(J, -2, "multiline", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
-
-	// TODO: lastIndex
+	js_pushboolean(J, 0);
+	return 1;
 }
 
 static int jsB_new_RegExp(js_State *J, int argc)
 {
+	js_Regexp *old;
 	const char *pattern;
 	int flags;
 
@@ -95,10 +117,9 @@
 	if (js_isregexp(J, 1)) {
 		if (argc > 1)
 			js_typeerror(J, "cannot supply flags when creating one RegExp from another");
-		js_toregexp(J, 1, &flags);
-		js_getproperty(J, 1, "source");
-		pattern = js_tostring(J, -1);
-		js_pop(J, 1);
+		old = js_toregexp(J, 1);
+		pattern = old->source;
+		flags = old->flags;
 	} else if (js_isundefined(J, 1)) {
 		pattern = "";
 		flags = 0;
@@ -138,26 +159,18 @@
 
 static int Rp_toString(js_State *J, int argc)
 {
-	const char *source;
-	int flags;
+	js_Regexp *re;
 	char *out;
 
-	js_Object *self = js_toobject(J, 0);
-	if (self->type != JS_CREGEXP)
-		js_typeerror(J, "not a regexp");
+	re = js_toregexp(J, 0);
 
-	flags = self->u.r.flags;
-
-	js_getproperty(J, 0, "source");
-	source = js_tostring(J, -1);
-
-	out = malloc(strlen(source) + 6); /* extra space for //gim */
+	out = malloc(strlen(re->source) + 6); /* extra space for //gim */
 	strcpy(out, "/");
-	strcat(out, source);
+	strcat(out, re->source);
 	strcat(out, "/");
-	if (flags & JS_REGEXP_G) strcat(out, "g");
-	if (flags & JS_REGEXP_I) strcat(out, "i");
-	if (flags & JS_REGEXP_M) strcat(out, "m");
+	if (re->flags & JS_REGEXP_G) strcat(out, "g");
+	if (re->flags & JS_REGEXP_I) strcat(out, "i");
+	if (re->flags & JS_REGEXP_M) strcat(out, "m");
 
 	if (js_try(J)) {
 		free(out);
@@ -172,47 +185,7 @@
 
 static int Rp_exec(js_State *J, int argc)
 {
-	return js_RegExp_prototype_exec(J, 0, js_tostring(J, 1));
-}
-
-static int Rp_test(js_State *J, int argc)
-{
-	int flags, opts, last;
-	js_Object *obj;
-	regmatch_t m[10];
-	regex_t *prog;
-	const char *text;
-
-	prog = js_toregexp(J, 0, &flags);
-	obj = js_toobject(J, 0);
-	text = js_tostring(J, 1);
-
-	opts = 0;
-	if (flags & JS_REGEXP_G) {
-		last = obj->u.r.last;
-		if (last < 0 || last > strlen(text)) {
-			obj->u.r.last = 0;
-			js_pushboolean(J, 0);
-			return 1;
-		}
-		if (last > 0) {
-			text += last;
-			opts |= REG_NOTBOL;
-		}
-	}
-
-	if (!regexec(prog, text, nelem(m), m, opts)) {
-		if (flags & JS_REGEXP_G)
-			obj->u.r.last = last + m[0].rm_eo;
-		js_pushboolean(J, 1);
-		return 1;
-	}
-
-	if (flags & JS_REGEXP_G)
-		obj->u.r.last = 0;
-
-	js_pushboolean(J, 0);
-	return 1;
+	return js_RegExp_prototype_exec(J, js_toregexp(J, 0), js_tostring(J, 1));
 }
 
 void jsB_initregexp(js_State *J)
--- a/jsrun.c
+++ b/jsrun.c
@@ -207,11 +207,11 @@
 	return jsV_toprimitive(J, stackidx(J, idx), hint);
 }
 
-void *js_toregexp(js_State *J, int idx, int *flags)
+js_Regexp *js_toregexp(js_State *J, int idx)
 {
 	const js_Value *v = stackidx(J, idx);
 	if (v->type == JS_TOBJECT && v->u.object->type == JS_CREGEXP)
-		return *flags = v->u.object->u.r.flags, v->u.object->u.r.prog;
+		return &v->u.object->u.r;
 	js_typeerror(J, "not a regexp");
 }
 
--- a/jsstring.c
+++ b/jsstring.c
@@ -318,10 +318,9 @@
 
 static int Sp_match(js_State *J, int argc)
 {
+	js_Regexp *re;
+	regmatch_t m[10];
 	const char *text;
-	regmatch_t m;
-	regex_t *prog;
-	int flags;
 	unsigned int len, a, b, c, e;
 
 	text = js_tostring(J, 0);
@@ -333,9 +332,9 @@
 	else
 		js_newregexp(J, js_tostring(J, 1), 0);
 
-	prog = js_toregexp(J, -1, &flags);
-	if (!(flags & JS_REGEXP_G))
-		return js_RegExp_prototype_exec(J, -1, text);
+	re = js_toregexp(J, -1);
+	if (!(re->flags & JS_REGEXP_G))
+		return js_RegExp_prototype_exec(J, re, text);
 
 	js_newarray(J);
 
@@ -343,11 +342,11 @@
 	len = 0;
 	a = 0;
 	while (a <= e) {
-		if (regexec(prog, text + a, 1, &m, a > 0 ? REG_NOTBOL : 0))
+		if (regexec(re->prog, text + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
 			break;
 
-		b = a + m.rm_so;
-		c = a + m.rm_eo;
+		b = a + m[0].rm_so;
+		c = a + m[0].rm_eo;
 
 		js_pushlstring(J, text + b, c - b);
 		js_setindex(J, -2, len++);
@@ -362,10 +361,9 @@
 
 static int Sp_search(js_State *J, int argc)
 {
+	js_Regexp *re;
+	regmatch_t m[10];
 	const char *text;
-	regmatch_t m;
-	regex_t *prog;
-	int flags;
 
 	text = js_tostring(J, 0);
 
@@ -376,10 +374,10 @@
 	else
 		js_newregexp(J, js_tostring(J, 1), 0);
 
-	prog = js_toregexp(J, -1, &flags);
+	re = js_toregexp(J, -1);
 
-	if (!regexec(prog, text, 1, &m, 0))
-		js_pushnumber(J, m.rm_so); // TODO: convert to utf-8 index offset
+	if (!regexec(re->prog, text, nelem(m), m, 0))
+		js_pushnumber(J, m[0].rm_so); // TODO: convert to utf-8 index offset
 	else
 		js_pushnumber(J, -1);
 
@@ -388,17 +386,16 @@
 
 static int Sp_replace_regexp(js_State *J, int argc)
 {
-	const char *source, *s, *r;
+	js_Regexp *re;
 	regmatch_t m[10];
-	regex_t *prog;
-	int flags;
+	const char *source, *s, *r;
 	struct sbuffer *sb = NULL;
 	int n, x;
 
 	source = js_tostring(J, 0);
-	prog = js_toregexp(J, 1, &flags);
+	re = js_toregexp(J, 1);
 
-	if (regexec(prog, source, nelem(m), m, 0)) {
+	if (regexec(re->prog, source, nelem(m), m, 0)) {
 		js_copy(J, 0);
 		return 1;
 	}
@@ -453,7 +450,7 @@
 		}
 	}
 
-	if (flags & JS_REGEXP_G) {
+	if (re->flags & JS_REGEXP_G) {
 		source = source + m[0].rm_eo;
 		if (n == 0) {
 			if (*source)
@@ -461,7 +458,7 @@
 			else
 				goto end;
 		}
-		if (!regexec(prog, source, nelem(m), m, REG_NOTBOL))
+		if (!regexec(re->prog, source, nelem(m), m, REG_NOTBOL))
 			goto loop;
 	}
 
@@ -548,15 +545,15 @@
 
 static int Sp_split_regexp(js_State *J, int argc)
 {
-	const char *str = js_tostring(J, 0);
-	unsigned int limit = !js_isundefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
+	js_Regexp *re;
 	regmatch_t m[10];
-	regex_t *prog;
-	int flags;
-	unsigned int len, k, e;
+	const char *str;
+	unsigned int limit, len, k, e;
 	unsigned int p, a, b, c;
 
-	prog = js_toregexp(J, 1, &flags);
+	str = js_tostring(J, 0);
+	re = js_toregexp(J, 1);
+	limit = !js_isundefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
 
 	js_newarray(J);
 	len = 0;
@@ -565,7 +562,7 @@
 
 	/* splitting the empty string */
 	if (e == 0) {
-		if (regexec(prog, str, nelem(m), m, 0)) {
+		if (regexec(re->prog, str, nelem(m), m, 0)) {
 			if (len == limit) return 1;
 			js_pushliteral(J, "");
 			js_setindex(J, -2, 0);
@@ -575,7 +572,7 @@
 
 	p = a = 0;
 	while (a < e) {
-		if (regexec(prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
+		if (regexec(re->prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
 			break; /* no match */
 
 		b = a + m[0].rm_so;
--- a/jsvalue.h
+++ b/jsvalue.h
@@ -44,6 +44,14 @@
 	} u;
 };
 
+struct js_Regexp
+{
+	void *prog;
+	const char *source;
+	unsigned short flags;
+	unsigned short last;
+};
+
 struct js_Object
 {
 	js_Class type;
@@ -65,11 +73,7 @@
 			js_CFunction function;
 			js_CFunction constructor;
 		} c;
-		struct {
-			void *prog;
-			unsigned short flags;
-			unsigned short last;
-		} r;
+		js_Regexp r;
 		struct {
 			js_Object *target;
 			js_Iterator *head;