shithub: mc

Download patch

ref: 107e78e61117bdadc667fb5bdece373b90a26015
parent: 709e7754dcec96e08464e52bb4c1551e427b5c06
author: Ori Bernstein <ori@eigenstate.org>
date: Wed May 11 12:03:33 EDT 2016

Implement regex.search()

--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -4,6 +4,7 @@
 
 pkg regex =
 	const exec	: (re : regex#, str : byte[:] -> std.option(byte[:][:]))
+	const search	: (re : regex#, str : byte[:] -> std.option(byte[:][:]))
 	const matchfree	: (pat : byte[:][:] -> void)
 	/*
 	FIXME: implement. This should scan for a possible start char in the
@@ -21,7 +22,7 @@
 
 	re.str = str
 	re.strp = 0
-	thr = run(re)
+	thr = run(re, true)
 	if thr != Zthr
 		m = getmatches(re, thr)
 		thrfree(re, thr)
@@ -33,6 +34,26 @@
 	;;
 }
 
+const search = {re, str
+	var thr
+	var m
+
+	for var i = 0; i < str.len; i++
+		re.str = str[i:]
+		re.strp = 0
+		thr = run(re, false)
+		if thr != Zthr
+			m = getmatches(re, thr)
+			thrfree(re, thr)
+			cleanup(re)
+			-> `std.Some m
+		else
+			cleanup(re)
+		;;
+	;;
+	->  `std.None
+}
+
 const cleanup = {re
 	var thr, next
 
@@ -66,12 +87,14 @@
 
 
 /* returns a matching thread, or Zthr if no threads matched */
-const run = {re
-	var ip
+const run = {re, wholestr
+	var bestmatch
 	var consumed
-	var thr
 	var states
+	var thr
+	var ip
 
+	bestmatch = Zthr
 	states = std.mkbs()
 	re.runq = mkthread(re, 0)
 	re.runq.mstart = std.slalloc(re.nmatch)
@@ -99,9 +122,18 @@
 
 			if thr.dead
 				thrfree(re, thr)
-			elif thr.matched && re.strp == re.str.len
-				std.bsfree(states)
-				-> thr
+			elif thr.matched
+				trace(re, thr, "new bestmatch\n")
+				if bestmatch != Zthr
+					thrfree(re, bestmatch)
+				;;
+
+				if re.strp == re.str.len
+					bestmatch = thr
+					goto done
+				elif !wholestr
+					bestmatch = thr
+				;;
 			elif !thr.matched
 				std.bsput(states, thr.ip)
 				if re.expired == Zthr
@@ -122,8 +154,9 @@
 		re.expiredtail = Zthr
 		re.strp++
 	;;
+:done
 	std.bsfree(states)
-	-> Zthr
+	-> bestmatch
 }
 
 /* 
--- a/lib/regex/test/bld.sub
+++ b/lib/regex/test/bld.sub
@@ -47,3 +47,11 @@
 	lib @/lib/sys:sys
 	lib @/lib/regex:regex
 ;;
+
+test search  =
+	search.myr
+	testmatch.myr
+	lib @/lib/std:std
+	lib @/lib/sys:sys
+	lib @/lib/regex:regex
+;;
--- /dev/null
+++ b/lib/regex/test/search.myr
@@ -1,0 +1,27 @@
+use std
+
+use "testmatch"
+
+const main = {
+	var s : byte[:]
+		
+	s = std.strjoin([
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+	][:], "")
+	testsearch("bc", "Abcd", `std.Some [][:])
+	testsearch("Abc", "Abc", `std.Some [][:])
+	testsearch("(bc)", "Abc", `std.Some ["bc"][:])
+	testsearch("(bc.*)", "Abcde", `std.Some ["bcde"][:])
+	testsearch("(b.*c)", "ABbasdfcrap", `std.Some ["basdfc"][:])
+}
--- a/lib/regex/test/testmatch.myr
+++ b/lib/regex/test/testmatch.myr
@@ -3,22 +3,32 @@
 
 pkg =
 	const testmatch	: (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
+	const testsearch	: (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
 	const dbgmatch	: (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
 ;;
 
 const testmatch = {pat, text, expected
-	run(regex.compile(pat), pat, text, expected)
+	run(regex.compile(pat), pat, text, expected, false)
 }
 
+const testsearch = {pat, text, expected
+	run(regex.compile(pat), pat, text, expected, true)
+}
+
 const dbgmatch = {pat, text, expected
-	run(regex.dbgcompile(pat), pat, text, expected)
+	run(regex.dbgcompile(pat), pat, text, expected, false)
 }
 
-const run = {regex, pat, text, expected
-	var i, re
+const run = {regex, pat, text, expected, search
+	var i, re, r
 
 	re = std.try(regex)
-	match regex.exec(re, text)
+	if search
+		r = regex.search(re, text)
+	else
+		r = regex.exec(re, text)
+	;;
+	match r
 	| `std.Some res:
 		match expected
 		| `std.None:
@@ -27,7 +37,7 @@
 				std.put("\t{}: {}\n", i, res[i])
 			;;
 		| `std.Some exp:
-			if !std.sleq(res[0], text)
+			if !search && !std.sleq(res[0], text)
 				std.put("whole match does not match text!\n")
 				std.fatal("failed matching {} over {}\n", pat, text)
 			;;