shithub: libmujs

Download patch

ref: 2ac601d07e226f0243125f2079c7af041b74fa6f
parent: 577585c2d1580e831e07dcfd1cd0d036583a4422
author: Tor Andersson <tor@ccxvii.net>
date: Thu Feb 20 10:07:37 EST 2014

Prepare for regular expression library.

Wrap the posix regex calls with the desired API.

--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-SRCS := $(wildcard js*.c utf*.c)
-HDRS := $(wildcard js*.h utf.h)
+SRCS := $(wildcard js*.c utf*.c regex.c)
+HDRS := $(wildcard js*.h utf.h regex.h)
 OBJS := $(SRCS:%.c=build/%.o)
 
 CFLAGS = -Wall -g
--- a/jsgc.c
+++ b/jsgc.c
@@ -3,7 +3,7 @@
 #include "jsvalue.h"
 #include "jsrun.h"
 
-#include <regex.h>
+#include "regex.h"
 
 static void jsG_markobject(js_State *J, int mark, js_Object *obj);
 
@@ -44,10 +44,8 @@
 {
 	if (obj->head)
 		jsG_freeproperty(J, obj->head);
-	if (obj->type == JS_CREGEXP) {
-		regfree(obj->u.r.prog);
-		free(obj->u.r.prog);
-	}
+	if (obj->type == JS_CREGEXP)
+		js_regfree(obj->u.r.prog);
 	if (obj->type == JS_CITERATOR)
 		jsG_freeiterator(J, obj->u.iter.head);
 	free(obj);
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -1,17 +1,16 @@
 #include "jsi.h"
 #include "jsvalue.h"
 #include "jsbuiltin.h"
+#include "regex.h"
 
 #define nelem(a) (sizeof (a) / sizeof (a)[0])
 
-#include <regex.h>
-
 void js_newregexp(js_State *J, const char *pattern, int flags)
 {
-	char msg[256];
+	const char *error;
 	js_Object *obj;
-	regex_t *prog;
-	int opts, status;
+	Reprog *prog;
+	int opts;
 
 	obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
 
@@ -19,13 +18,9 @@
 	if (flags & JS_REGEXP_I) opts |= REG_ICASE;
 	if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
 
-	prog = malloc(sizeof (regex_t));
-	status = regcomp(prog, pattern, opts);
-	if (status) {
-		free(prog);
-		regerror(status, prog, msg, sizeof msg);
-		js_syntaxerror(J, "%s", msg);
-	}
+	prog = js_regcomp(pattern, opts, &error);
+	if (!prog)
+		js_syntaxerror(J, "regular expression: %s", error);
 
 	obj->u.r.prog = prog;
 	obj->u.r.source = pattern;
@@ -36,7 +31,7 @@
 
 int js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text)
 {
-	regmatch_t m[10];
+	Resub m[10];
 	int opts;
 	int i;
 
@@ -53,14 +48,14 @@
 		}
 	}
 
-	if (!regexec(re->prog, text, nelem(m), m, opts)) {
+	if (!js_regexec(re->prog, text, nelem(m), m, opts)) {
 		js_newarray(J);
-		for (i = 0; i < nelem(m) && m[i].rm_so >= 0; ++i) {
-			js_pushlstring(J, text + m[i].rm_so, m[i].rm_eo - m[i].rm_so);
+		for (i = 0; i < nelem(m) && m[i].sp; ++i) {
+			js_pushlstring(J, m[i].sp, m[i].ep - m[i].sp);
 			js_setindex(J, -2, i);
 		}
 		if (re->flags & JS_REGEXP_G)
-			re->last = re->last + m[0].rm_eo;
+			re->last = re->last + (m[0].ep - text);
 		return 1;
 	}
 
@@ -75,7 +70,7 @@
 {
 	js_Regexp *re;
 	const char *text;
-	regmatch_t m[10];
+	Resub m[10];
 	int opts;
 
 	re = js_toregexp(J, 0);
@@ -94,9 +89,9 @@
 		}
 	}
 
-	if (!regexec(re->prog, text, nelem(m), m, opts)) {
+	if (!js_regexec(re->prog, text, nelem(m), m, opts)) {
 		if (re->flags & JS_REGEXP_G)
-			re->last = re->last + m[0].rm_eo;
+			re->last = re->last + (m[0].ep - text);
 		js_pushboolean(J, 1);
 		return 1;
 	}
--- a/jsstring.c
+++ b/jsstring.c
@@ -2,9 +2,8 @@
 #include "jsvalue.h"
 #include "jsbuiltin.h"
 #include "utf.h"
+#include "regex.h"
 
-#include <regex.h>
-
 #define nelem(a) (sizeof (a) / sizeof (a)[0])
 
 int js_runeat(js_State *J, const char *s, int i)
@@ -323,9 +322,10 @@
 static int Sp_match(js_State *J, int argc)
 {
 	js_Regexp *re;
-	regmatch_t m[10];
+	Resub m[10];
 	const char *text;
-	unsigned int len, a, b, c, e;
+	unsigned int len;
+	const char *a, *b, *c, *e;
 
 	text = js_tostring(J, 0);
 
@@ -344,17 +344,17 @@
 
 	js_newarray(J);
 
-	e = strlen(text);
 	len = 0;
-	a = 0;
+	a = text;
+	e = text + strlen(text);
 	while (a <= e) {
-		if (regexec(re->prog, text + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
+		if (js_regexec(re->prog, a, nelem(m), m, a > text ? REG_NOTBOL : 0))
 			break;
 
-		b = a + m[0].rm_so;
-		c = a + m[0].rm_eo;
+		b = m[0].sp;
+		c = m[0].ep;
 
-		js_pushlstring(J, text + b, c - b);
+		js_pushlstring(J, b, c - b);
 		js_setindex(J, -2, len++);
 
 		a = c;
@@ -368,7 +368,7 @@
 static int Sp_search(js_State *J, int argc)
 {
 	js_Regexp *re;
-	regmatch_t m[10];
+	Resub m[10];
 	const char *text;
 
 	text = js_tostring(J, 0);
@@ -382,8 +382,8 @@
 
 	re = js_toregexp(J, -1);
 
-	if (!regexec(re->prog, text, nelem(m), m, 0))
-		js_pushnumber(J, js_utfptrtoidx(text, text + m[0].rm_so));
+	if (!js_regexec(re->prog, text, nelem(m), m, 0))
+		js_pushnumber(J, js_utfptrtoidx(text, m[0].sp));
 	else
 		js_pushnumber(J, -1);
 
@@ -393,7 +393,7 @@
 static int Sp_replace_regexp(js_State *J, int argc)
 {
 	js_Regexp *re;
-	regmatch_t m[10];
+	Resub m[10];
 	const char *source, *s, *r;
 	js_Buffer *sb = NULL;
 	int n, x;
@@ -401,7 +401,7 @@
 	source = js_tostring(J, 0);
 	re = js_toregexp(J, 1);
 
-	if (regexec(re->prog, source, nelem(m), m, 0)) {
+	if (js_regexec(re->prog, source, nelem(m), m, 0)) {
 		js_copy(J, 0);
 		return 1;
 	}
@@ -409,14 +409,14 @@
 	re->last = 0;
 
 loop:
-	s = source + m[0].rm_so;
-	n = m[0].rm_eo - m[0].rm_so;
+	s = m[0].sp;
+	n = m[0].ep - m[0].sp;
 
 	if (js_iscallable(J, 2)) {
 		js_copy(J, 2);
 		js_pushglobal(J);
-		for (x = 0; m[x].rm_so >= 0; ++x) /* arg 0..x: substring and subexps that matched */
-			js_pushlstring(J, source + m[x].rm_so, m[x].rm_eo - m[x].rm_so);
+		for (x = 0; m[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
+			js_pushlstring(J, m[x].sp, m[x].ep - m[x].sp);
 		js_pushnumber(J, s - source); /* arg x+2: offset within search string */
 		js_copy(J, 0); /* arg x+3: search string */
 		js_call(J, 2 + x);
@@ -439,8 +439,8 @@
 				case '0': case '1': case '2': case '3': case '4':
 				case '5': case '6': case '7': case '8': case '9':
 					x = *r - '0';
-					if (m[x].rm_so >= 0) {
-						sb_putm(&sb, source + m[x].rm_so, source + m[x].rm_eo);
+					if (m[x].sp) {
+						sb_putm(&sb, m[x].sp, m[x].ep);
 					} else {
 						sb_putc(&sb, '$');
 						sb_putc(&sb, '0'+x);
@@ -459,7 +459,7 @@
 	}
 
 	if (re->flags & JS_REGEXP_G) {
-		source = source + m[0].rm_eo;
+		source = m[0].ep;
 		if (n == 0) {
 			if (*source)
 				sb_putc(&sb, *source++);
@@ -466,7 +466,7 @@
 			else
 				goto end;
 		}
-		if (!regexec(re->prog, source, nelem(m), m, REG_NOTBOL))
+		if (!js_regexec(re->prog, source, nelem(m), m, REG_NOTBOL))
 			goto loop;
 	}
 
@@ -554,12 +554,12 @@
 static int Sp_split_regexp(js_State *J, int argc)
 {
 	js_Regexp *re;
-	regmatch_t m[10];
-	const char *str;
-	unsigned int limit, len, k, e;
-	unsigned int p, a, b, c;
+	Resub m[10];
+	const char *text;
+	unsigned int limit, len, k;
+	const char *p, *a, *b, *c, *e;
 
-	str = js_tostring(J, 0);
+	text = js_tostring(J, 0);
 	re = js_toregexp(J, 1);
 	limit = js_isdefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
 
@@ -566,11 +566,11 @@
 	js_newarray(J);
 	len = 0;
 
-	e = strlen(str);
+	e = text + strlen(text);
 
 	/* splitting the empty string */
 	if (e == 0) {
-		if (regexec(re->prog, str, nelem(m), m, 0)) {
+		if (js_regexec(re->prog, text, nelem(m), m, 0)) {
 			if (len == limit) return 1;
 			js_pushliteral(J, "");
 			js_setindex(J, -2, 0);
@@ -578,13 +578,13 @@
 		return 1;
 	}
 
-	p = a = 0;
+	p = a = text;
 	while (a < e) {
-		if (regexec(re->prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
+		if (js_regexec(re->prog, a, nelem(m), m, a > text ? REG_NOTBOL : 0))
 			break; /* no match */
 
-		b = a + m[0].rm_so;
-		c = a + m[0].rm_eo;
+		b = m[0].sp;
+		c = m[0].ep;
 
 		/* empty string at end of last match */
 		if (b == p) {
@@ -593,12 +593,12 @@
 		}
 
 		if (len == limit) return 1;
-		js_pushlstring(J, str + p, b - p);
+		js_pushlstring(J, p, b - p);
 		js_setindex(J, -2, len++);
 
-		for (k = 1; k < nelem(m) && m[k].rm_so >= 0; ++k) {
+		for (k = 1; k < nelem(m) && m[k].sp; ++k) {
 			if (len == limit) return 1;
-			js_pushlstring(J, str + a + m[k].rm_so, m[k].rm_eo - m[k].rm_so);
+			js_pushlstring(J, m[k].sp, m[k].ep - m[k].sp);
 			js_setindex(J, -2, len++);
 		}
 
@@ -606,7 +606,7 @@
 	}
 
 	if (len == limit) return 1;
-	js_pushstring(J, str + p);
+	js_pushstring(J, p);
 	js_setindex(J, -2, len);
 
 	return 1;
--- /dev/null
+++ b/regex.c
@@ -1,0 +1,45 @@
+#include <stdlib.h>
+#include "regex.h"
+
+Reprog *js_regcomp(const char *pattern, int cflags, const char **errorp)
+{
+	static char msg[256];
+	regex_t *prog = malloc(sizeof *prog);
+	int status = regcomp(prog, pattern, cflags);
+	if (status) {
+		free(prog);
+		if (errorp) {
+			regerror(status, prog, msg, sizeof msg);
+			*errorp = msg;
+		}
+		return NULL;
+	}
+	if (errorp)
+		*errorp = NULL;
+	return (Reprog*)prog;
+}
+
+int js_regexec(Reprog *prog, const char *string, int nmatch, Resub *pmatch, int eflags)
+{
+	regmatch_t m[10];
+	int i, status;
+	status = regexec((regex_t*)prog, string, 10, m, eflags);
+	for (i = 0; i < nmatch; ++i) {
+		if (m[i].rm_so >= 0) {
+			pmatch[i].sp = string + m[i].rm_so;
+			pmatch[i].ep = string + m[i].rm_eo;
+		} else {
+			pmatch[i].sp = NULL;
+			pmatch[i].ep = NULL;
+		}
+	}
+	return status;
+}
+
+void js_regfree(Reprog *prog)
+{
+	if (prog) {
+		regfree((regex_t*)prog);
+		free(prog);
+	}
+}
--- /dev/null
+++ b/regex.h
@@ -1,0 +1,16 @@
+#ifndef regex_h
+#define regex_h
+
+#include <regex.h>
+
+typedef struct Reprog Reprog;
+typedef struct {
+	const char *sp;
+	const char *ep;
+} Resub;
+
+Reprog *js_regcomp(const char *pattern, int cflags, const char **errorp);
+int js_regexec(Reprog *prog, const char *string, int nmatch, Resub *pmatch, int eflags);
+void js_regfree(Reprog *prog);
+
+#endif