ref: e17dcbba7cff082374c4bb83d20304fdd220dc33
parent: 9838735f9311cb9e134a561c68b6c4ac9be47263
author: Tor Andersson <tor@ccxvii.net>
date: Wed Feb 5 08:57:44 EST 2014
Encapsulate all regexp state in js_Regexp struct.
--- a/jsi.h
+++ b/jsi.h
@@ -12,6 +12,7 @@
#include <math.h>
#include <float.h>
+typedef struct js_Regexp js_Regexp;
typedef struct js_Value js_Value;
typedef struct js_Object js_Object;
typedef struct js_Ast js_Ast;
@@ -38,13 +39,13 @@
void js_newfunction(js_State *J, js_Function *function, js_Environment *scope);
void js_newscript(js_State *J, js_Function *function);
-void *js_toregexp(js_State *J, int idx, int *flags);
+js_Regexp *js_toregexp(js_State *J, int idx);
void js_dup(js_State *J);
void js_rot2(js_State *J);
void js_rot3(js_State *J);
-int js_RegExp_prototype_exec(js_State *J, int idx, const char *text);
+int js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text);
/* Exception handling */
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -6,89 +6,111 @@
#include <regex.h>
-int js_RegExp_prototype_exec(js_State *J, int idx, const char *text)
+void js_newregexp(js_State *J, const char *pattern, int flags)
{
+ char msg[256];
js_Object *obj;
- int flags, opts, last;
regex_t *prog;
+ int opts, status;
+
+ obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
+
+ opts = REG_EXTENDED;
+ if (flags & JS_REGEXP_I) opts |= REG_ICASE;
+ if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+
+ prog = malloc(sizeof (regex_t));
+ status = regcomp(prog, pattern, opts);
+ if (status) {
+ free(prog);
+ regerror(status, prog, msg, sizeof msg);
+ js_syntaxerror(J, "%s", msg);
+ }
+
+ obj->u.r.prog = prog;
+ obj->u.r.source = pattern;
+ obj->u.r.flags = flags;
+ obj->u.r.last = 0;
+ js_pushobject(J, obj);
+}
+
+int js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text)
+{
regmatch_t m[10];
+ int opts;
int i;
- prog = js_toregexp(J, idx, &flags);
- obj = js_toobject(J, idx);
-
opts = 0;
- if (flags & JS_REGEXP_G) {
- last = obj->u.r.last;
- if (last < 0 || last > strlen(text)) {
- obj->u.r.last = 0;
+ if (re->flags & JS_REGEXP_G) {
+ if (re->last < 0 || re->last > strlen(text)) {
+ re->last = 0;
js_pushnull(J);
return 1;
}
- if (last > 0) {
- text += last;
+ if (re->last > 0) {
+ text += re->last;
opts |= REG_NOTBOL;
}
}
- if (!regexec(prog, text, nelem(m), m, opts)) {
+ if (!regexec(re->prog, text, nelem(m), m, opts)) {
js_newarray(J);
for (i = 0; i < nelem(m) && m[i].rm_so >= 0; ++i) {
js_pushlstring(J, text + m[i].rm_so, m[i].rm_eo - m[i].rm_so);
js_setindex(J, -2, i);
}
- if (flags & JS_REGEXP_G)
- obj->u.r.last = last + m[0].rm_eo;
+ if (re->flags & JS_REGEXP_G)
+ re->last = re->last + m[0].rm_eo;
return 1;
}
- if (flags & JS_REGEXP_G)
- obj->u.r.last = 0;
+ if (re->flags & JS_REGEXP_G)
+ re->last = 0;
js_pushnull(J);
return 1;
}
-void js_newregexp(js_State *J, const char *pattern, int flags)
+static int Rp_test(js_State *J, int argc)
{
- char msg[256];
- js_Object *obj;
- regex_t *prog;
- int opts, status;
+ js_Regexp *re;
+ const char *text;
+ regmatch_t m[10];
+ int opts;
- obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
+ re = js_toregexp(J, 0);
+ text = js_tostring(J, 1);
- opts = REG_EXTENDED;
- if (flags & JS_REGEXP_I) opts |= REG_ICASE;
- if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+ opts = 0;
+ if (re->flags & JS_REGEXP_G) {
+ if (re->last < 0 || re->last > strlen(text)) {
+ re->last = 0;
+ js_pushboolean(J, 0);
+ return 1;
+ }
+ if (re->last > 0) {
+ text += re->last;
+ opts |= REG_NOTBOL;
+ }
+ }
- prog = malloc(sizeof (regex_t));
- status = regcomp(prog, pattern, opts);
- if (status) {
- free(prog);
- regerror(status, prog, msg, sizeof msg);
- js_syntaxerror(J, "%s", msg);
+ if (!regexec(re->prog, text, nelem(m), m, opts)) {
+ if (re->flags & JS_REGEXP_G)
+ re->last = re->last + m[0].rm_eo;
+ js_pushboolean(J, 1);
+ return 1;
}
- obj->u.r.prog = prog;
- obj->u.r.flags = flags;
- obj->u.r.last = 0;
- js_pushobject(J, obj);
+ if (re->flags & JS_REGEXP_G)
+ re->last = 0;
- js_pushstring(J, pattern);
- js_defproperty(J, -2, "source", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
- js_pushboolean(J, flags & JS_REGEXP_G);
- js_defproperty(J, -2, "global", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
- js_pushboolean(J, flags & JS_REGEXP_I);
- js_defproperty(J, -2, "ignoreCase", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
- js_pushboolean(J, flags & JS_REGEXP_M);
- js_defproperty(J, -2, "multiline", JS_READONLY | JS_DONTENUM | JS_DONTDELETE);
-
- // TODO: lastIndex
+ js_pushboolean(J, 0);
+ return 1;
}
static int jsB_new_RegExp(js_State *J, int argc)
{
+ js_Regexp *old;
const char *pattern;
int flags;
@@ -95,10 +117,9 @@
if (js_isregexp(J, 1)) {
if (argc > 1)
js_typeerror(J, "cannot supply flags when creating one RegExp from another");
- js_toregexp(J, 1, &flags);
- js_getproperty(J, 1, "source");
- pattern = js_tostring(J, -1);
- js_pop(J, 1);
+ old = js_toregexp(J, 1);
+ pattern = old->source;
+ flags = old->flags;
} else if (js_isundefined(J, 1)) {
pattern = "";
flags = 0;
@@ -138,26 +159,18 @@
static int Rp_toString(js_State *J, int argc)
{
- const char *source;
- int flags;
+ js_Regexp *re;
char *out;
- js_Object *self = js_toobject(J, 0);
- if (self->type != JS_CREGEXP)
- js_typeerror(J, "not a regexp");
+ re = js_toregexp(J, 0);
- flags = self->u.r.flags;
-
- js_getproperty(J, 0, "source");
- source = js_tostring(J, -1);
-
- out = malloc(strlen(source) + 6); /* extra space for //gim */
+ out = malloc(strlen(re->source) + 6); /* extra space for //gim */
strcpy(out, "/");
- strcat(out, source);
+ strcat(out, re->source);
strcat(out, "/");
- if (flags & JS_REGEXP_G) strcat(out, "g");
- if (flags & JS_REGEXP_I) strcat(out, "i");
- if (flags & JS_REGEXP_M) strcat(out, "m");
+ if (re->flags & JS_REGEXP_G) strcat(out, "g");
+ if (re->flags & JS_REGEXP_I) strcat(out, "i");
+ if (re->flags & JS_REGEXP_M) strcat(out, "m");
if (js_try(J)) {
free(out);
@@ -172,47 +185,7 @@
static int Rp_exec(js_State *J, int argc)
{
- return js_RegExp_prototype_exec(J, 0, js_tostring(J, 1));
-}
-
-static int Rp_test(js_State *J, int argc)
-{
- int flags, opts, last;
- js_Object *obj;
- regmatch_t m[10];
- regex_t *prog;
- const char *text;
-
- prog = js_toregexp(J, 0, &flags);
- obj = js_toobject(J, 0);
- text = js_tostring(J, 1);
-
- opts = 0;
- if (flags & JS_REGEXP_G) {
- last = obj->u.r.last;
- if (last < 0 || last > strlen(text)) {
- obj->u.r.last = 0;
- js_pushboolean(J, 0);
- return 1;
- }
- if (last > 0) {
- text += last;
- opts |= REG_NOTBOL;
- }
- }
-
- if (!regexec(prog, text, nelem(m), m, opts)) {
- if (flags & JS_REGEXP_G)
- obj->u.r.last = last + m[0].rm_eo;
- js_pushboolean(J, 1);
- return 1;
- }
-
- if (flags & JS_REGEXP_G)
- obj->u.r.last = 0;
-
- js_pushboolean(J, 0);
- return 1;
+ return js_RegExp_prototype_exec(J, js_toregexp(J, 0), js_tostring(J, 1));
}
void jsB_initregexp(js_State *J)
--- a/jsrun.c
+++ b/jsrun.c
@@ -207,11 +207,11 @@
return jsV_toprimitive(J, stackidx(J, idx), hint);
}
-void *js_toregexp(js_State *J, int idx, int *flags)
+js_Regexp *js_toregexp(js_State *J, int idx)
{
const js_Value *v = stackidx(J, idx);
if (v->type == JS_TOBJECT && v->u.object->type == JS_CREGEXP)
- return *flags = v->u.object->u.r.flags, v->u.object->u.r.prog;
+ return &v->u.object->u.r;
js_typeerror(J, "not a regexp");
}
--- a/jsstring.c
+++ b/jsstring.c
@@ -318,10 +318,9 @@
static int Sp_match(js_State *J, int argc)
{
+ js_Regexp *re;
+ regmatch_t m[10];
const char *text;
- regmatch_t m;
- regex_t *prog;
- int flags;
unsigned int len, a, b, c, e;
text = js_tostring(J, 0);
@@ -333,9 +332,9 @@
else
js_newregexp(J, js_tostring(J, 1), 0);
- prog = js_toregexp(J, -1, &flags);
- if (!(flags & JS_REGEXP_G))
- return js_RegExp_prototype_exec(J, -1, text);
+ re = js_toregexp(J, -1);
+ if (!(re->flags & JS_REGEXP_G))
+ return js_RegExp_prototype_exec(J, re, text);
js_newarray(J);
@@ -343,11 +342,11 @@
len = 0;
a = 0;
while (a <= e) {
- if (regexec(prog, text + a, 1, &m, a > 0 ? REG_NOTBOL : 0))
+ if (regexec(re->prog, text + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
break;
- b = a + m.rm_so;
- c = a + m.rm_eo;
+ b = a + m[0].rm_so;
+ c = a + m[0].rm_eo;
js_pushlstring(J, text + b, c - b);
js_setindex(J, -2, len++);
@@ -362,10 +361,9 @@
static int Sp_search(js_State *J, int argc)
{
+ js_Regexp *re;
+ regmatch_t m[10];
const char *text;
- regmatch_t m;
- regex_t *prog;
- int flags;
text = js_tostring(J, 0);
@@ -376,10 +374,10 @@
else
js_newregexp(J, js_tostring(J, 1), 0);
- prog = js_toregexp(J, -1, &flags);
+ re = js_toregexp(J, -1);
- if (!regexec(prog, text, 1, &m, 0))
- js_pushnumber(J, m.rm_so); // TODO: convert to utf-8 index offset
+ if (!regexec(re->prog, text, nelem(m), m, 0))
+ js_pushnumber(J, m[0].rm_so); // TODO: convert to utf-8 index offset
else
js_pushnumber(J, -1);
@@ -388,17 +386,16 @@
static int Sp_replace_regexp(js_State *J, int argc)
{
- const char *source, *s, *r;
+ js_Regexp *re;
regmatch_t m[10];
- regex_t *prog;
- int flags;
+ const char *source, *s, *r;
struct sbuffer *sb = NULL;
int n, x;
source = js_tostring(J, 0);
- prog = js_toregexp(J, 1, &flags);
+ re = js_toregexp(J, 1);
- if (regexec(prog, source, nelem(m), m, 0)) {
+ if (regexec(re->prog, source, nelem(m), m, 0)) {
js_copy(J, 0);
return 1;
}
@@ -453,7 +450,7 @@
}
}
- if (flags & JS_REGEXP_G) {
+ if (re->flags & JS_REGEXP_G) {
source = source + m[0].rm_eo;
if (n == 0) {
if (*source)
@@ -461,7 +458,7 @@
else
goto end;
}
- if (!regexec(prog, source, nelem(m), m, REG_NOTBOL))
+ if (!regexec(re->prog, source, nelem(m), m, REG_NOTBOL))
goto loop;
}
@@ -548,15 +545,15 @@
static int Sp_split_regexp(js_State *J, int argc)
{
- const char *str = js_tostring(J, 0);
- unsigned int limit = !js_isundefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
+ js_Regexp *re;
regmatch_t m[10];
- regex_t *prog;
- int flags;
- unsigned int len, k, e;
+ const char *str;
+ unsigned int limit, len, k, e;
unsigned int p, a, b, c;
- prog = js_toregexp(J, 1, &flags);
+ str = js_tostring(J, 0);
+ re = js_toregexp(J, 1);
+ limit = !js_isundefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
js_newarray(J);
len = 0;
@@ -565,7 +562,7 @@
/* splitting the empty string */
if (e == 0) {
- if (regexec(prog, str, nelem(m), m, 0)) {
+ if (regexec(re->prog, str, nelem(m), m, 0)) {
if (len == limit) return 1;
js_pushliteral(J, "");
js_setindex(J, -2, 0);
@@ -575,7 +572,7 @@
p = a = 0;
while (a < e) {
- if (regexec(prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
+ if (regexec(re->prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
break; /* no match */
b = a + m[0].rm_so;
--- a/jsvalue.h
+++ b/jsvalue.h
@@ -44,6 +44,14 @@
} u;
};
+struct js_Regexp
+{
+ void *prog;
+ const char *source;
+ unsigned short flags;
+ unsigned short last;
+};
+
struct js_Object
{
js_Class type;
@@ -65,11 +73,7 @@
js_CFunction function;
js_CFunction constructor;
} c;
- struct {
- void *prog;
- unsigned short flags;
- unsigned short last;
- } r;
+ js_Regexp r;
struct {
js_Object *target;
js_Iterator *head;