ref: 107e78e61117bdadc667fb5bdece373b90a26015
parent: 709e7754dcec96e08464e52bb4c1551e427b5c06
author: Ori Bernstein <ori@eigenstate.org>
date: Wed May 11 12:03:33 EDT 2016
Implement regex.search()
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -4,6 +4,7 @@
pkg regex =
const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
+ const search : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
const matchfree : (pat : byte[:][:] -> void)
/*
FIXME: implement. This should scan for a possible start char in the
@@ -21,7 +22,7 @@
re.str = str
re.strp = 0
- thr = run(re)
+ thr = run(re, true)
if thr != Zthr
m = getmatches(re, thr)
thrfree(re, thr)
@@ -33,6 +34,26 @@
;;
}
+const search = {re, str
+ var thr
+ var m
+
+ for var i = 0; i < str.len; i++
+ re.str = str[i:]
+ re.strp = 0
+ thr = run(re, false)
+ if thr != Zthr
+ m = getmatches(re, thr)
+ thrfree(re, thr)
+ cleanup(re)
+ -> `std.Some m
+ else
+ cleanup(re)
+ ;;
+ ;;
+ -> `std.None
+}
+
const cleanup = {re
var thr, next
@@ -66,12 +87,14 @@
/* returns a matching thread, or Zthr if no threads matched */
-const run = {re
- var ip
+const run = {re, wholestr
+ var bestmatch
var consumed
- var thr
var states
+ var thr
+ var ip
+ bestmatch = Zthr
states = std.mkbs()
re.runq = mkthread(re, 0)
re.runq.mstart = std.slalloc(re.nmatch)
@@ -99,9 +122,18 @@
if thr.dead
thrfree(re, thr)
- elif thr.matched && re.strp == re.str.len
- std.bsfree(states)
- -> thr
+ elif thr.matched
+ trace(re, thr, "new bestmatch\n")
+ if bestmatch != Zthr
+ thrfree(re, bestmatch)
+ ;;
+
+ if re.strp == re.str.len
+ bestmatch = thr
+ goto done
+ elif !wholestr
+ bestmatch = thr
+ ;;
elif !thr.matched
std.bsput(states, thr.ip)
if re.expired == Zthr
@@ -122,8 +154,9 @@
re.expiredtail = Zthr
re.strp++
;;
+:done
std.bsfree(states)
- -> Zthr
+ -> bestmatch
}
/*
--- a/lib/regex/test/bld.sub
+++ b/lib/regex/test/bld.sub
@@ -47,3 +47,11 @@
lib @/lib/sys:sys
lib @/lib/regex:regex
;;
+
+test search =
+ search.myr
+ testmatch.myr
+ lib @/lib/std:std
+ lib @/lib/sys:sys
+ lib @/lib/regex:regex
+;;
--- /dev/null
+++ b/lib/regex/test/search.myr
@@ -1,0 +1,27 @@
+use std
+
+use "testmatch"
+
+const main = {
+ var s : byte[:]
+
+ s = std.strjoin([
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ ][:], "")
+ testsearch("bc", "Abcd", `std.Some [][:])
+ testsearch("Abc", "Abc", `std.Some [][:])
+ testsearch("(bc)", "Abc", `std.Some ["bc"][:])
+ testsearch("(bc.*)", "Abcde", `std.Some ["bcde"][:])
+ testsearch("(b.*c)", "ABbasdfcrap", `std.Some ["basdfc"][:])
+}
--- a/lib/regex/test/testmatch.myr
+++ b/lib/regex/test/testmatch.myr
@@ -3,22 +3,32 @@
pkg =
const testmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
+ const testsearch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
const dbgmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
;;
const testmatch = {pat, text, expected
- run(regex.compile(pat), pat, text, expected)
+ run(regex.compile(pat), pat, text, expected, false)
}
+const testsearch = {pat, text, expected
+ run(regex.compile(pat), pat, text, expected, true)
+}
+
const dbgmatch = {pat, text, expected
- run(regex.dbgcompile(pat), pat, text, expected)
+ run(regex.dbgcompile(pat), pat, text, expected, false)
}
-const run = {regex, pat, text, expected
- var i, re
+const run = {regex, pat, text, expected, search
+ var i, re, r
re = std.try(regex)
- match regex.exec(re, text)
+ if search
+ r = regex.search(re, text)
+ else
+ r = regex.exec(re, text)
+ ;;
+ match r
| `std.Some res:
match expected
| `std.None:
@@ -27,7 +37,7 @@
std.put("\t{}: {}\n", i, res[i])
;;
| `std.Some exp:
- if !std.sleq(res[0], text)
+ if !search && !std.sleq(res[0], text)
std.put("whole match does not match text!\n")
std.fatal("failed matching {} over {}\n", pat, text)
;;