ref: f507c7ee81391e004d492d1095ca4263d3ee1830
dir: /compile.myr/
use std
use "types.use"
pkg regex =
const compile : (re : byte[:] -> std.error(regex#, status))
;;
type tree = union
/* basic string building */
`Alt [tree#, tree#]
`Cat [tree#, tree#]
/* repetition */
`Star tree#
`Plus tree#
`Quest tree#
/* end matches */
`Byte byte
`Chr char
`Class [char, char][:]
`Dot
/* meta */
`Cap tree#
`Astart
`Aend
;;
type parseresult = union
`Some tree#
`None
`Fail status
;;
const compile = {pat
var re : regex#
re = std.zalloc()
re.pat = pat
match parse(re)
`None: std.put("Empty parse\n");;
`Fail f: -> `std.Failure f;;
`Some t:
dump(t, 0)
gen(re, t)
append(re, `Imatch)
idump(re)
-> `std.Success re
;;
;;
-> `std.Failure (`Noimpl)
}
const gen = {re, t
match t#
`Alt (a, b): genalt(re, a, b);;
`Cat (a, b): gen(re, a); gen(re, b);;
/* repetition */
`Star a: genstar(re, a);;
`Plus a: gen(re, a); genstar(re, a);;
`Quest a: genquest(re, a);;
/* end matches */
`Class sl:
std.die("Can't gen class\n")
;;
`Byte b: append(re, `Ibyte b);;
`Chr c: genchar(re, c);;
`Dot: append(re, `Idot);;
/* meta */
`Cap a:
std.put("WARNING: Capture not implemented")
gen(re, a)
;;
;;
-> re.proglen
}
const genalt = {re, l, r
var alt
var jmp
var l0
var l1
var l2
alt = re.proglen
l0 = append(re, `Ifork (-1, -1)) /* needs to be replaced */
gen(re, l)
jmp = re.proglen
l1 = append(re, `Ijmp -1) /* needs to be replaced */
l2 = gen(re, l)
re.prog[alt] = `Ifork(l0, l1)
re.prog[jmp] = `Ijmp l2
-> re.proglen
}
const genstar = {re, rep
var alt
var jmp
var l0
var l1
var l2
l0 = re.proglen
alt = re.proglen
l1 = append(re, `Ifork (-1, -1)) /* needs to be replaced */
jmp = gen(re, rep)
l2 = append(re, `Ijmp -1)
re.prog[alt] = `Ifork (l1, l2)
re.prog[jmp] = `Ijmp l0
-> re.proglen
}
const genquest = {re, q
var alt
var l0
var l1
alt = re.proglen
l0 = append(re, `Ifork (-1, -1)) /* needs to be replaced */
l1 = gen(re, q)
re.prog[alt] = `Ifork (l0, l1)
-> re.proglen
}
const genchar = {re, c
var b : byte[4]
var n
var i
n = std.encode(b[:], c)
for i = 0; i < n; i++
append(re, `Ibyte b[i])
;;
-> re.proglen
}
const append = {re, insn
if re.proglen == re.prog.len
re.prog = std.slgrow(re.prog, std.max(1, 2*re.proglen))
;;
re.prog[re.proglen] = insn
re.proglen++
-> re.proglen
}
const idump = {re
var i
for i = 0; i < re.proglen; i++
std.put("%i:\t", i)
match re.prog[i]
/* Char matching. Consume exactly one byte from the string. */
`Ibyte b:
std.put("`Ibyte %b (%c)\n", b, b castto(char))
;;
`Irange (start, end):
std.put("`Irange (%b,%b)\n", start, end)
;;
`Idot:
std.put("`Idot\n")
;;
/*
Control flow. All of these recursively call step() until
exactly one byte is consumed from the string.
*/
`Ifork (lip, rip):
std.put("`Ifork (%z,%z)\n", lip, rip)
;;
`Ijmp ip:
std.put("`Ijmp %z\n", ip)
;;
`Imatch:
std.put("`Imatch\n")
;;
;;
;;
}
const dump = {t, indent
var i
for i = 0; i < indent; i++
std.put(" ")
;;
match t#
`Alt (a, b):
std.put("Alt\n")
dump(a, indent + 1)
dump(b, indent + 1)
;;
`Cat (a, b):
std.put("Cat\n")
dump(a, indent + 1)
dump(b, indent + 1)
;;
/* repetition */
`Star a:
std.put("Star\n")
dump(a, indent + 1)
;;
`Plus a:
std.put("Plus\n")
dump(a, indent + 1)
;;
`Quest a:
std.put("Quest\n")
dump(a, indent + 1)
;;
/* end matches */
`Class sl:
std.put("Class [a..b]\n")
;;
`Byte b:
std.put("Byte %b\n", b)
;;
`Chr c:
std.put("Char %c\n", c)
;;
`Dot:
std.put("Dot\n")
;;
/* meta */
`Cap a:
std.put("Cap\n")
dump(a, indent + 1)
;;
;;
}
const parse = {re
match altexpr(re)
`Some t:
if re.pat.len == 0
-> `Some t
else
free(t)
-> `Fail (`Earlystop)
;;
;;
`None:
-> `None
;;
;;
}
const altexpr = {re
var ret : tree#
std.put("-> alt\n")
match catexpr(re)
`Some t:
ret = t
if matchc(re, '|')
match altexpr(re)
`Some rhs:
ret = mk(`Alt (ret, rhs))
;;
`None:
free(ret)
-> `Fail (`Earlystop)
;;
`Fail f:
-> `Fail f
;;
;;
;;
;;
other: -> other;;
;;
-> `Some ret
}
const catexpr = {re
var ret
std.put("-> cat\n")
match repexpr(re)
`Some t:
ret = t
match catexpr(re)
`Some rhs:
ret = mk(`Cat (t, rhs))
;;
`Fail f: -> `Fail f;;
`None: /* nothing */;;
;;
;;
other: -> other;;
;;
-> `Some ret
}
const repexpr = {re
var ret
std.put("-> rep\n")
match baseexpr(re)
`Some t:
if matchc(re, '*')
ret = mk(`Star t)
elif matchc(re, '+')
ret = mk(`Plus t)
elif matchc(re, '?')
ret = mk(`Quest t)
else
ret = t
;;
;;
other: -> other;;
;;
-> `Some ret
}
const baseexpr = {re
var ret
std.put("-> base\n")
if re.pat.len == 0
-> `None
;;
match getc(re)
'[': ret = chrclass(re);;
'.': ret = mk(`Dot);;
'^': ret = mk(`Astart);;
'$': ret = mk(`Aend);;
'(':
match altexpr(re)
`Some s: ret = mk(`Cap s);;
`None: -> `Fail (`Emptyparen);;
;;
if !matchc(re, ')')
free(ret)
-> `Fail (`Unbalanced)
;;
;;
'*': -> `Fail (`Badrep);;
'+': -> `Fail (`Badrep);;
'?': -> `Fail (`Badrep);;
c:
if c == '\\'
if re.pat.len == 0
-> `Fail (`Earlystop)
;;
c = getc(re)
;;
ret = mk(`Chr c)
;;
;;
std.put("<- base\n")
dump(ret, 0)
-> `Some ret
}
const chrclass = {pat
var t
t = std.alloc()
t# = `Class [][:]
-> t
}
const matchc = {re, c
var str
var chr
(chr, str) = std.striter(re.pat)
if chr != c
-> false
;;
re.pat = str
-> true
}
const getc = {re
var c
(c, re.pat) = std.striter(re.pat)
-> c
}
const mk = {v
var t
t = std.alloc()
t# = v
-> t
}
const free = {t
match t#
`Alt (a, b): free(a); free(b);;
`Cat (a, b): free(a); free(b);;
/* repetition */
`Star a: free(a);;
`Plus a: free(a);;
`Quest a: free(a);;
/* end matches */
`Class sl: std.slfree(sl);;
`Byte b: ;;
`Chr c: ;;
`Dot: ;;
/* meta */
`Cap a: free(a);;
;;
std.free(t)
}