ref: 11d385224d630ae234c7e8c4212cfefac8fd167d
dir: /lib/regex/redump.myr/
use std
use bio
use regex
const main = {args
var cmd, comp
var verbose
var fd
verbose = false
cmd = std.optparse(args, &[
.argdesc = "regex [inputs...]",
.minargs = 1,
.opts = [
[.opt='v', .desc="dump verbose regex output"]
][:],
])
for opt : cmd.opts
match opt
| ('v', _): verbose = true
| _: std.fatal("Unknown argument")
;;
;;
if verbose
comp = regex.dbgcompile(cmd.args[0], true)
else
comp = regex.dbgcompile(cmd.args[0], false)
;;
match comp
| `std.Err m:
std.fatal("unable to compile regex: {}\n", m)
| `std.Ok re:
if cmd.args.len > 1
runall(re, cmd.args)
else
fd = bio.mkfile(0, bio.Rd)
dump(re, fd)
bio.close(fd)
;;
;;
}
const runall = {re, files
for f : files
match bio.open(f, bio.Rd)
| `std.Ok fd:
dump(re, fd)
bio.close(fd)
| `std.Err m:
std.fatal("failed to open {}: {}\n", f, m)
;;
;;
}
const dump = {re, fd
while true
match bio.readln(fd)
| `std.Ok ln:
show(re, ln, regex.exec(re, ln))
std.slfree(ln)
| `std.Err `bio.Eof:
break
| `std.Err e:
std.put("error reading from input: {}", e)
break
;;
;;
}
/*
* Renders a match in a way that's pleasant to read. There are
* two cases here.
*
* 1) The pattern matched. In this case, we want to show the
* regions of the pattern that contributed to the match.
*
* 2) The pattern did not match. In this case, we want to show
* the location of the failed match.
*
* In both cases, we render a caret that describes the position
* of the match. Unfortunately, for the coverage code we don't
* have a great way of mapping whole subranges, so the caret can
* be slightly truncated. Fixing this isn't worth hte complexity.
*/
const show = {re, ln, mg
match mg
| `std.Some rl:
std.put("Matched: {}\n", rl[0])
for var i = 1; i < rl.len; i++
std.put("\tgroup {}: {}\n", i, rl[i])
;;
std.put("coverage:\n")
std.put("\t{}\n", re.pat)
showcoverage(re)
| `std.None:
std.put("Match failed at {}:\n", re.lastip)
std.put("\t{}\n", re.pat)
showpos(re, re.pcidx[re.lastip])
std.put("\t{}\n", ln)
showpos(re, re.strp - 1)
;;
}
/*
* Simple position carets for failures: Draws out
* an arrow of the form:
*
* ~~~~~^
*
* from the start of the line.
*/
const showpos = {re, idx
std.put("\t")
for var i = 0; i < idx; i++
std.put("~")
;;
std.put("^\n")
}
/*
* Coverage carets for success. This tries to output
* a '^' for every section of the string that matched.
*
* (this|that)
* ^^^^
*/
const showcoverage = {re
var hit
var idx
hit = std.slzalloc(re.pat.len)
for var ip = 0; ip < re.proglen; ip++
if !std.bshas(re.traces[re.lastthr], ip)
continue
;;
idx = re.pcidx[ip]
if idx >= 0 && idx < hit.len
hit[idx] = true
;;
;;
std.put("\t")
for h : hit
if h
std.put("^")
else
std.put(" ")
;;
;;
std.put("\n")
}