ref: ac2d8aa21e1e6ae45573b268151fda57cada110d
dir: /lib/std/striter.myr/
use "die"
use "types"
use "utf"
use "strfind"
use "option"
use "chartype"
use "slpush"
use "alloc"
pkg std =
type chariter = struct
rest : byte[:]
;;
type graphemeiter = struct
rest : byte[:]
;;
type charoffiter = struct
str : byte[:]
idx : size
;;
type splititer = struct
rest : byte[:]
split : byte[:]
;;
type tokiter = struct
str : byte[:]
idx : size
;;
impl iterable chariter -> char
impl iterable graphemeiter -> char[:]
impl iterable charoffiter -> (char, size)
impl iterable splititer -> byte[:]
impl iterable tokiter -> byte[:]
const bychar : (str : byte[:] -> chariter)
const bygrapheme : (str : byte[:] -> graphemeiter)
const bycharoff : (str : byte[:] -> charoffiter)
const bysplit : (str : byte[:], split : byte[:] -> splititer)
const bytok : (str : byte[:] -> tokiter)
;;
/*
* Iterate through a string char by char,
* decoding the utf8 bytes into a single
* codepoint.
*/
impl iterable chariter -> char =
__iternext__ = {ci, c
if ci.rest.len == 0
-> false
;;
(c#, ci.rest) = charstep(ci.rest)
-> true
}
__iterfin__ = {ci, c
}
;;
const bychar = {str
-> [.rest = str]
}
/*
* Iterate through a string grapheme by grapheme,
* returning a slice of characters composing the
* grapheme.
*/
impl iterable graphemeiter -> char[:] =
__iternext__ = {ci, g : char[:]#
var gb, gc : char[:]
if ci.rest.len == 0
-> false
;;
(gb, ci.rest) = graphemestep(ci.rest)
/*
* Graphemestep returns bytes, but we
* want to a slice of chars.
*/
gc = [][:]
for c : std.bychar(gb)
std.slpush(&gc, c)
;;
g# = gc
-> true
}
__iterfin__ = {ci, g
std.slfree(g#)
}
;;
const bygrapheme = {str
-> [.rest = str]
}
/*
* Iterates through a string character by
* character, similar to chariter, but returns
* the offset into the string of the codepoint.
* For example,
* "ὐbὐc
* would return the sequence:
* (ὐ, 0), (b, 3), (ὐ, 4), (c, 7)
*/
impl iterable charoffiter -> (char, size) =
__iternext__ = {ci, cv
var c
if ci.idx == ci.str.len
-> false
;;
c = std.decode(ci.str[ci.idx:])
ci.idx += std.charlen(c)
cv# = (c, ci.idx)
-> true
}
__iterfin__ = {ci, c
}
;;
const bycharoff = {s
-> [.str=s, .idx=0]
}
/*
* Iterates through the splits of a string by a
* delimiter, skippin gthe delimiter.
*/
impl iterable splititer -> byte[:] =
__iternext__ = {si, sp
match std.strfind(si.rest, si.split)
| `Some off:
sp# = si.rest[:off]
si.rest = si.rest[off + si.split.len:]
-> true
| `None:
if si.rest.len > 0
sp# = si.rest
si.rest = ""
-> true
;;
;;
-> false
}
__iterfin__ = {ci, c
}
;;
const bysplit = {str, split
-> [.rest = str, .split = split]
}
/*
* Tokenizes a string by spaces, iterating over
* the results.
*/
impl iterable tokiter -> byte[:] =
__iternext__ = {it, sp
var s, lo, hi, c
s = it.str
lo = it.idx
while lo < s.len
c = std.decode(s[lo:])
if !isspace(c)
break
;;
lo += charlen(c)
;;
hi = lo
while hi < s.len
c = std.decode(s[hi:])
if isspace(c)
break
;;
hi += charlen(c)
;;
it.idx = hi
sp# = s[lo:hi]
-> hi > lo
}
__iterfin__ = {ci, c
}
;;
const bytok = {str
-> [.str = str, .idx = 0]
}