shithub: mc

Download patch

ref: cefdbe00dfad4086e2f3ba7cd0007d729e77e137
parent: 830f28c844022a71f6c7ad1caf1bcfb7ca9397dc
author: Ori Bernstein <ori@eigenstate.org>
date: Wed Jun 8 08:09:30 EDT 2016

Add matching that returns indexes.

--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -3,12 +3,20 @@
 use "types"
 
 pkg regex =
+	/* regex execution */
 	const exec	: (re : regex#, str : byte[:] -> std.option(byte[:][:]))
 	const search	: (re : regex#, str : byte[:] -> std.option(byte[:][:]))
+
+	/* regex execution returning indexes */
+	const iexec	: (re : regex#, str : byte[:] -> std.option((std.size, std.size)[:]))
+	const isearch	: (re : regex#, str : byte[:] -> std.option((std.size, std.size)[:]))
+
+	/* substitution */
 	const sub	: (re : regex#, str : byte[:], subst : byte[:][:] -> std.option(byte[:]))
 	const sbsub	: (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> bool)
 	const suball	: (re : regex#, str : byte[:], subst : byte[:][:] -> byte[:])
 	const sbsuball	: (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> void)
+
 	const matchfree	: (pat : byte[:][:] -> void)
 ;;
 
@@ -16,17 +24,23 @@
 const Zthr = (0 : rethread#)
 
 const exec = {re, str
-	var thr
-	var m
+	var thr, m
 
-	re.str = str
-	re.strp = 0
-	thr = run(re, true)
+	thr = run(re, str, 0, true)
 	m = getmatches(re, thr)
 	cleanup(re)
 	-> m
 }
 
+const iexec = {re, str
+	var thr, m
+
+	thr = run(re, str, 0, true)
+	m = getidxmatches(re, thr)
+	cleanup(re)
+	-> m
+}
+
 const search = {re, str
 	var thr
 	var m
@@ -33,9 +47,7 @@
 
 	m = `std.None
 	for var i = 0; i < str.len; i++
-		re.str = str[i:]
-		re.strp = 0
-		thr = run(re, false)
+		thr = run(re, str[i:], 0, false)
 		m = getmatches(re, thr)
 		match m
 		| `std.Some _:	break
@@ -46,6 +58,23 @@
 	->  m
 }
 
+const isearch = {re, str
+	var thr
+	var m
+
+	m = `std.None
+	for var i = 0; i < str.len; i++
+		thr = run(re, str[i:], 0, false)
+		m = getidxmatches(re, thr)
+		match m
+		| `std.Some _:	break
+		| `std.None:	/* nothing */
+		;;
+		cleanup(re)
+	;;
+	->  m
+}
+
 const sub = {re, str, subst
 	var sb
 
@@ -65,9 +94,7 @@
 		-> false
 	;;
 
-	re.str = str
-	re.strp = 0
-	thr = run(re, true)
+	thr = run(re, str, 0, true)
 	if thr == Zthr
 		m = false
 	else
@@ -95,9 +122,7 @@
 
 	i = 0
 	while i < str.len
-		re.str = str[i:]
-		re.strp = 0
-		thr = run(re, false)
+		thr = run(re, str[i:], 0, false)
 		if thr == Zthr
 			std.sbputb(sb, str[i])
 			i++
@@ -164,14 +189,34 @@
 	-> `std.Some ret
 }
 
+const getidxmatches = {re, thr
+	var ret
 
+	if thr == Zthr
+		-> `std.None
+	;;
+	ret = std.slalloc(re.nmatch)
+	for var i = 0; i < re.nmatch; i++
+		if thr.mstart[i] != -1 && thr.mend[i] != -1
+			ret[i] = (thr.mstart[i], thr.mend[i])
+		else
+			ret[i] = (-1, -1)
+		;;
+	;;
+	thrfree(re, thr)
+	-> `std.Some ret
+}
+
 /* returns a matching thread, or Zthr if no threads matched */
-const run = {re, wholestr
+const run = {re, str, idx, wholestr
 	var bestmatch
 	var consumed
 	var states
 	var thr
 	var ip
+
+	re.str = str
+	re.strp = 0
 
 	bestmatch = Zthr
 	states = std.mkbs()
--- a/lib/regex/test/bld.sub
+++ b/lib/regex/test/bld.sub
@@ -5,6 +5,15 @@
 	lib @/lib/sys:sys
 	lib @/lib/regex:regex
 ;;
+
+test idxmatch = 
+	idxmatch.myr
+	testmatch.myr
+	lib @/lib/std:std
+	lib @/lib/sys:sys
+	lib @/lib/regex:regex
+;;
+
 test boundaries =
 	boundaries.myr
 	testmatch.myr
--- /dev/null
+++ b/lib/regex/test/idxmatch.myr
@@ -1,0 +1,48 @@
+use std
+
+use "testmatch"
+
+const main = {
+	var s : byte[:]
+		
+	s = std.strjoin([
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+		"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+	][:], "")
+
+	testidxmatch(".*bc", "Abc", `std.Some [(0, 3)][:])
+	testidxmatch("(a*)*", "a", `std.Some [(0,1), (0, 1)][:])
+	testidxmatch("(aa|aab?)*", s, `std.Some [(0, 408), (406, 408)][:])
+        /* greedy matches */
+	testidxmatch("(<.*>).*", "<a foo> blah <bar>", `std.Some [
+			(0, 18),
+			(0, 18),
+		][:])
+	testidxmatch("(<.+>).*", "<a foo> blah <bar>", `std.Some [
+			(0, 18),
+			(0, 18),
+		][:])
+        /* reluctant matches */
+	testidxmatch("(<.*?>).*", "<a foo> blah <bar>", `std.Some [
+			(0, 18),
+			(0, 7),
+		][:])
+	testidxmatch("(<.+?>).*", "<a foo> blah <bar>", `std.Some [
+			(0, 18),
+			(0, 7),
+		][:])
+	testidxmatch(".*(<b.+?>).*", "<a foo> blah <bar>", `std.Some [
+			(0, 18),
+			(13, 18),
+		][:])
+}
--- a/lib/regex/test/testmatch.myr
+++ b/lib/regex/test/testmatch.myr
@@ -8,6 +8,12 @@
 		expected : std.option(byte[:][:]) \
 		-> void)
 
+	const testidxmatch	: (\
+		pat : byte[:], \
+		text : byte[:], \
+		expected : std.option((std.size, std.size)[:]) \
+		-> void)
+
 	const testsearch	: ( \
 		pat : byte[:], \
 		text : byte[:], \
@@ -39,6 +45,10 @@
 	run(regex.compile(pat), pat, text, expected, false)
 }
 
+const testidxmatch = {pat, text, expected
+	runidx(regex.compile(pat), pat, text, expected, false)
+}
+
 const testsearch = {pat, text, expected
 	run(regex.compile(pat), pat, text, expected, true)
 }
@@ -84,6 +94,51 @@
 	;;
 }
 
+const runidx = {regex, pat, text, expected : std.option((std.size, std.size)[:]), search
+	var re, r
+	var lo, elo, hi, ehi
+
+	re = std.try(regex)
+	if search
+		r = regex.isearch(re, text)
+	else
+		r = regex.iexec(re, text)
+	;;
+	match r
+	| `std.Some res:
+		match expected
+		| `std.None:
+			std.fatal("expected no match, got:")
+			for var i = 0; i < res.len; i++
+				std.put("\t{}: {}\n", i, res[i])
+			;;
+		| `std.Some exp:
+			if res.len != exp.len
+				std.put("mismatch: expected {} matches, got {}\n",  exp.len, res.len)
+				std.fatal("failed matching {} over {}\n", pat, text)
+			;;
+			for var i = 0; i < exp.len; i++
+				(elo, ehi) = exp[i]
+				(lo, hi) = res[i]
+				if lo != elo || hi != ehi
+					std.put("mismatch on {}: expected {}, got {}\n", i, exp[i], res[i])
+					std.fatal("failed matching {} over {}\n", pat, text)
+				;;
+			;;
+		;;
+	| `std.None:
+		match expected
+		| `std.None:	/* : expected failure */
+		| `std.Some matches:
+			std.put("expected matches:\n")
+			for var i = 0; i < matches.len; i++
+				std.put("\t{}: {}\n", i, matches[i])
+			;;
+			std.fatal("no match found\n")
+		;;
+	;;
+	regex.free(re)
+}
 const run = {regex, pat, text, expected, search
 	var i, re, r