ref: 2ac601d07e226f0243125f2079c7af041b74fa6f
parent: 577585c2d1580e831e07dcfd1cd0d036583a4422
author: Tor Andersson <tor@ccxvii.net>
date: Thu Feb 20 10:07:37 EST 2014
Prepare for regular expression library. Wrap the posix regex calls with the desired API.
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-SRCS := $(wildcard js*.c utf*.c)
-HDRS := $(wildcard js*.h utf.h)
+SRCS := $(wildcard js*.c utf*.c regex.c)
+HDRS := $(wildcard js*.h utf.h regex.h)
OBJS := $(SRCS:%.c=build/%.o)
CFLAGS = -Wall -g
--- a/jsgc.c
+++ b/jsgc.c
@@ -3,7 +3,7 @@
#include "jsvalue.h"
#include "jsrun.h"
-#include <regex.h>
+#include "regex.h"
static void jsG_markobject(js_State *J, int mark, js_Object *obj);
@@ -44,10 +44,8 @@
{
if (obj->head)
jsG_freeproperty(J, obj->head);
- if (obj->type == JS_CREGEXP) {
- regfree(obj->u.r.prog);
- free(obj->u.r.prog);
- }
+ if (obj->type == JS_CREGEXP)
+ js_regfree(obj->u.r.prog);
if (obj->type == JS_CITERATOR)
jsG_freeiterator(J, obj->u.iter.head);
free(obj);
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -1,17 +1,16 @@
#include "jsi.h"
#include "jsvalue.h"
#include "jsbuiltin.h"
+#include "regex.h"
#define nelem(a) (sizeof (a) / sizeof (a)[0])
-#include <regex.h>
-
void js_newregexp(js_State *J, const char *pattern, int flags)
{
- char msg[256];
+ const char *error;
js_Object *obj;
- regex_t *prog;
- int opts, status;
+ Reprog *prog;
+ int opts;
obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
@@ -19,13 +18,9 @@
if (flags & JS_REGEXP_I) opts |= REG_ICASE;
if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
- prog = malloc(sizeof (regex_t));
- status = regcomp(prog, pattern, opts);
- if (status) {
- free(prog);
- regerror(status, prog, msg, sizeof msg);
- js_syntaxerror(J, "%s", msg);
- }
+ prog = js_regcomp(pattern, opts, &error);
+ if (!prog)
+ js_syntaxerror(J, "regular expression: %s", error);
obj->u.r.prog = prog;
obj->u.r.source = pattern;
@@ -36,7 +31,7 @@
int js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text)
{
- regmatch_t m[10];
+ Resub m[10];
int opts;
int i;
@@ -53,14 +48,14 @@
}
}
- if (!regexec(re->prog, text, nelem(m), m, opts)) {
+ if (!js_regexec(re->prog, text, nelem(m), m, opts)) {
js_newarray(J);
- for (i = 0; i < nelem(m) && m[i].rm_so >= 0; ++i) {
- js_pushlstring(J, text + m[i].rm_so, m[i].rm_eo - m[i].rm_so);
+ for (i = 0; i < nelem(m) && m[i].sp; ++i) {
+ js_pushlstring(J, m[i].sp, m[i].ep - m[i].sp);
js_setindex(J, -2, i);
}
if (re->flags & JS_REGEXP_G)
- re->last = re->last + m[0].rm_eo;
+ re->last = re->last + (m[0].ep - text);
return 1;
}
@@ -75,7 +70,7 @@
{
js_Regexp *re;
const char *text;
- regmatch_t m[10];
+ Resub m[10];
int opts;
re = js_toregexp(J, 0);
@@ -94,9 +89,9 @@
}
}
- if (!regexec(re->prog, text, nelem(m), m, opts)) {
+ if (!js_regexec(re->prog, text, nelem(m), m, opts)) {
if (re->flags & JS_REGEXP_G)
- re->last = re->last + m[0].rm_eo;
+ re->last = re->last + (m[0].ep - text);
js_pushboolean(J, 1);
return 1;
}
--- a/jsstring.c
+++ b/jsstring.c
@@ -2,9 +2,8 @@
#include "jsvalue.h"
#include "jsbuiltin.h"
#include "utf.h"
+#include "regex.h"
-#include <regex.h>
-
#define nelem(a) (sizeof (a) / sizeof (a)[0])
int js_runeat(js_State *J, const char *s, int i)
@@ -323,9 +322,10 @@
static int Sp_match(js_State *J, int argc)
{
js_Regexp *re;
- regmatch_t m[10];
+ Resub m[10];
const char *text;
- unsigned int len, a, b, c, e;
+ unsigned int len;
+ const char *a, *b, *c, *e;
text = js_tostring(J, 0);
@@ -344,17 +344,17 @@
js_newarray(J);
- e = strlen(text);
len = 0;
- a = 0;
+ a = text;
+ e = text + strlen(text);
while (a <= e) {
- if (regexec(re->prog, text + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
+ if (js_regexec(re->prog, a, nelem(m), m, a > text ? REG_NOTBOL : 0))
break;
- b = a + m[0].rm_so;
- c = a + m[0].rm_eo;
+ b = m[0].sp;
+ c = m[0].ep;
- js_pushlstring(J, text + b, c - b);
+ js_pushlstring(J, b, c - b);
js_setindex(J, -2, len++);
a = c;
@@ -368,7 +368,7 @@
static int Sp_search(js_State *J, int argc)
{
js_Regexp *re;
- regmatch_t m[10];
+ Resub m[10];
const char *text;
text = js_tostring(J, 0);
@@ -382,8 +382,8 @@
re = js_toregexp(J, -1);
- if (!regexec(re->prog, text, nelem(m), m, 0))
- js_pushnumber(J, js_utfptrtoidx(text, text + m[0].rm_so));
+ if (!js_regexec(re->prog, text, nelem(m), m, 0))
+ js_pushnumber(J, js_utfptrtoidx(text, m[0].sp));
else
js_pushnumber(J, -1);
@@ -393,7 +393,7 @@
static int Sp_replace_regexp(js_State *J, int argc)
{
js_Regexp *re;
- regmatch_t m[10];
+ Resub m[10];
const char *source, *s, *r;
js_Buffer *sb = NULL;
int n, x;
@@ -401,7 +401,7 @@
source = js_tostring(J, 0);
re = js_toregexp(J, 1);
- if (regexec(re->prog, source, nelem(m), m, 0)) {
+ if (js_regexec(re->prog, source, nelem(m), m, 0)) {
js_copy(J, 0);
return 1;
}
@@ -409,14 +409,14 @@
re->last = 0;
loop:
- s = source + m[0].rm_so;
- n = m[0].rm_eo - m[0].rm_so;
+ s = m[0].sp;
+ n = m[0].ep - m[0].sp;
if (js_iscallable(J, 2)) {
js_copy(J, 2);
js_pushglobal(J);
- for (x = 0; m[x].rm_so >= 0; ++x) /* arg 0..x: substring and subexps that matched */
- js_pushlstring(J, source + m[x].rm_so, m[x].rm_eo - m[x].rm_so);
+ for (x = 0; m[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
+ js_pushlstring(J, m[x].sp, m[x].ep - m[x].sp);
js_pushnumber(J, s - source); /* arg x+2: offset within search string */
js_copy(J, 0); /* arg x+3: search string */
js_call(J, 2 + x);
@@ -439,8 +439,8 @@
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
x = *r - '0';
- if (m[x].rm_so >= 0) {
- sb_putm(&sb, source + m[x].rm_so, source + m[x].rm_eo);
+ if (m[x].sp) {
+ sb_putm(&sb, m[x].sp, m[x].ep);
} else {
sb_putc(&sb, '$');
sb_putc(&sb, '0'+x);
@@ -459,7 +459,7 @@
}
if (re->flags & JS_REGEXP_G) {
- source = source + m[0].rm_eo;
+ source = m[0].ep;
if (n == 0) {
if (*source)
sb_putc(&sb, *source++);
@@ -466,7 +466,7 @@
else
goto end;
}
- if (!regexec(re->prog, source, nelem(m), m, REG_NOTBOL))
+ if (!js_regexec(re->prog, source, nelem(m), m, REG_NOTBOL))
goto loop;
}
@@ -554,12 +554,12 @@
static int Sp_split_regexp(js_State *J, int argc)
{
js_Regexp *re;
- regmatch_t m[10];
- const char *str;
- unsigned int limit, len, k, e;
- unsigned int p, a, b, c;
+ Resub m[10];
+ const char *text;
+ unsigned int limit, len, k;
+ const char *p, *a, *b, *c, *e;
- str = js_tostring(J, 0);
+ text = js_tostring(J, 0);
re = js_toregexp(J, 1);
limit = js_isdefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
@@ -566,11 +566,11 @@
js_newarray(J);
len = 0;
- e = strlen(str);
+ e = text + strlen(text);
/* splitting the empty string */
if (e == 0) {
- if (regexec(re->prog, str, nelem(m), m, 0)) {
+ if (js_regexec(re->prog, text, nelem(m), m, 0)) {
if (len == limit) return 1;
js_pushliteral(J, "");
js_setindex(J, -2, 0);
@@ -578,13 +578,13 @@
return 1;
}
- p = a = 0;
+ p = a = text;
while (a < e) {
- if (regexec(re->prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
+ if (js_regexec(re->prog, a, nelem(m), m, a > text ? REG_NOTBOL : 0))
break; /* no match */
- b = a + m[0].rm_so;
- c = a + m[0].rm_eo;
+ b = m[0].sp;
+ c = m[0].ep;
/* empty string at end of last match */
if (b == p) {
@@ -593,12 +593,12 @@
}
if (len == limit) return 1;
- js_pushlstring(J, str + p, b - p);
+ js_pushlstring(J, p, b - p);
js_setindex(J, -2, len++);
- for (k = 1; k < nelem(m) && m[k].rm_so >= 0; ++k) {
+ for (k = 1; k < nelem(m) && m[k].sp; ++k) {
if (len == limit) return 1;
- js_pushlstring(J, str + a + m[k].rm_so, m[k].rm_eo - m[k].rm_so);
+ js_pushlstring(J, m[k].sp, m[k].ep - m[k].sp);
js_setindex(J, -2, len++);
}
@@ -606,7 +606,7 @@
}
if (len == limit) return 1;
- js_pushstring(J, str + p);
+ js_pushstring(J, p);
js_setindex(J, -2, len);
return 1;
--- /dev/null
+++ b/regex.c
@@ -1,0 +1,45 @@
+#include <stdlib.h>
+#include "regex.h"
+
+Reprog *js_regcomp(const char *pattern, int cflags, const char **errorp)
+{
+ static char msg[256];
+ regex_t *prog = malloc(sizeof *prog);
+ int status = regcomp(prog, pattern, cflags);
+ if (status) {
+ free(prog);
+ if (errorp) {
+ regerror(status, prog, msg, sizeof msg);
+ *errorp = msg;
+ }
+ return NULL;
+ }
+ if (errorp)
+ *errorp = NULL;
+ return (Reprog*)prog;
+}
+
+int js_regexec(Reprog *prog, const char *string, int nmatch, Resub *pmatch, int eflags)
+{
+ regmatch_t m[10];
+ int i, status;
+ status = regexec((regex_t*)prog, string, 10, m, eflags);
+ for (i = 0; i < nmatch; ++i) {
+ if (m[i].rm_so >= 0) {
+ pmatch[i].sp = string + m[i].rm_so;
+ pmatch[i].ep = string + m[i].rm_eo;
+ } else {
+ pmatch[i].sp = NULL;
+ pmatch[i].ep = NULL;
+ }
+ }
+ return status;
+}
+
+void js_regfree(Reprog *prog)
+{
+ if (prog) {
+ regfree((regex_t*)prog);
+ free(prog);
+ }
+}
--- /dev/null
+++ b/regex.h
@@ -1,0 +1,16 @@
+#ifndef regex_h
+#define regex_h
+
+#include <regex.h>
+
+typedef struct Reprog Reprog;
+typedef struct {
+ const char *sp;
+ const char *ep;
+} Resub;
+
+Reprog *js_regcomp(const char *pattern, int cflags, const char **errorp);
+int js_regexec(Reprog *prog, const char *string, int nmatch, Resub *pmatch, int eflags);
+void js_regfree(Reprog *prog);
+
+#endif