shithub: mc

diff: cannot open b/mparse//null: file does not exist: 'b/mparse//null'

--- /dev/null

+++ b/mparse/bld.proj

@@ -1,0 +1,7 @@

+bin tok =

+	main.myr

+	types.myr

+	tok.myr

+	tokdefs.myr

+	util.myr

+;;

--- /dev/null

+++ b/mparse/main.myr

@@ -1,0 +1,15 @@

+use std

+use "tok.use"

+const main = {

+	var ts

+	ts = parse.tokinitf(0)

+	while true

+		match parse.toknext(ts)

+		| `parse.Teof:	break

+		| tok:	std.put("{}\n", tok)

+		;;

+	;;

+}

--- /dev/null

+++ b/mparse/tok.myr

@@ -1,0 +1,540 @@

+use std

+use "types.use"

+use "tokdefs.use"

+use "util.use"

+pkg parse =

+	type tokstream = struct

+		next	: std.option(tok)

+		rest	: byte[:]

+		data	: byte[:]

+		loc	: srcloc

+	;;

+	const tokinit	: (path : byte[:]	-> tokstream#)

+	const tokinitf	: (path : std.fd	-> tokstream#)

+	const tokclose	: (ts : tokstream#	-> void)

+	const toknext	: (ts : tokstream#	-> tok)

+	const tokpeek	: (ts : tokstream#	-> tok)

+;;

+const Eof = std.Badchar

+const tokinit = {path

+	match std.slurp(path)

+	| `std.Ok data:	-> std.mk([.next=`std.None, .rest=data, .data=data])

+	| `std.Fail e:	std.fatal("could not read file {}: {}\n", path, e)

+	;;

+}

+const tokinitf = {fd

+	match std.fslurp(fd)

+	| `std.Ok data:	-> std.mk([.next=`std.None, .rest=data, .data=data])

+	| `std.Fail e:	std.fatal("could not read file {}: {}\n", fd, e)

+	;;

+}

+const tokclose = {ts

+	std.slfree(ts.data)

+	std.free(ts)

+}

+const toknext = {ts

+	var t

+	match ts.next

+	| `std.Some tok:

+		ts.next = `std.None

+		std.put("tok: {}\n", tok)

+		-> tok

+	| `std.None:

+		t = tokread(ts)

+		std.put("t: {}\n", t)

+		-> t

+	;;

+}

+const tokpeek = {ts

+	var tok

+	match ts.next

+	| `std.Some t:

+		-> t

+	| `std.None:

+		tok = tokread(ts)

+		ts.next = `std.Some tok

+		-> tok

+	;;

+}

+const tokread : (ts : tokstream# -> tok) = {ts

+	var c

+	skipspace(ts)

+	c = peekc(ts)

+	if ts.rest.len == 0

+		-> `Teof

+	elif c == '\n'

+		takec(ts)

+		ts.loc.line++

+		ts.loc.col = 1

+		-> `Tendln

+	elif c == '\''

+		-> chrlit(ts)

+	elif c == '"'

+		-> strlit(ts)

+	elif c == '@'

+		-> typaram(ts)

+	elif isident(c)

+		-> kwident(ts)

+	elif std.isdigit(c)

+		-> numlit(ts)

+	else

+		-> oper(ts)

+	;;

+}

+const skipspace = {ts

+	var ignorenl

+	ignorenl = false

+	while true

+		match peekc(ts)

+		| '\n':

+			if ignorenl

+				takec(ts)

+				ts.loc.line++

+				ts.loc.col = 1

+			else

+				break

+			;;

+		| '\\':

+			ignorenl = true

+			takec(ts)

+		| '/':

+			match npeekc(ts, 1)

+			| '/':	skipto(ts, '\n')

+			| '*':	skipcomment(ts)

+			| _:	break

+			;;

+		| c:

+			if std.isspace(c)

+				takec(ts)

+			else

+				break

+			;;

+		;;

+	;;

+}

+const skipcomment = {ts

+	var depth, startln

+	depth = 0

+	startln = ts.loc.line

+	while true

+		match takec(ts)

+		| '/':

+			if matchc(ts, '*')

+				depth++

+			;;

+		| '*':

+			if matchc(ts, '/')

+				depth--

+			;;

+		| '\n':

+			ts.loc.line++

+			ts.loc.col = 1

+		| Eof:

+			err(ts.loc, "file ended in comment starting on line {}\n", startln)

+		| _:

+		;;

+		if depth == 0

+			break

+		;;

+	;;

+}

+const chrlit = {ts

+	var c, close

+	takec(ts)

+	c = takec(ts)

+	if c == '\\'

+		c = unescape(ts)

+	;;

+	close = takec(ts)

+	if close != '\''

+		err(ts.loc, "expected closing ' in character literal, got {}\n", close)

+	;;

+	-> `Tchrlit c

+}

+const strlit = {ts

+	var sb

+	takec(ts)

+	sb = std.mksb()

+	while true

+		match takec(ts)

+		| Eof:

+			err(ts.loc, "unexpected EOF within string literal\n")

+		| '\n':

+			err(ts.loc, "unexpected \\n within string literal\n")

+		| '"':

+			break

+		| '\\':

+			std.sbputc(sb, unescape(ts))

+		| c:

+			std.sbputc(sb, c)

+		;;

+	;;

+	-> `Tstrlit std.sbfin(sb)

+}

+const unescape = {ts

+	var c, c1, c2

+	c = takec(ts)

+	/* we've already seen the '\' */

+	match c

+	| 'n':	-> '\n'

+	| 'r':	-> '\r'

+	| 't':	-> '\t'

+	| 'b':	-> '\b'

+	| '"':	-> '\"'

+	| '\'':	-> '\''

+	| 'v':	-> '\v'

+	| '\\':	-> '\\'

+	| '0':	-> '\0'

+	| 'u':	-> utfesc(ts);

+	| 'x':

+		c1 = takec(ts)

+		if !std.isxdigit(c1)

+			err(ts.loc, "expected hex digit, got {}\n", c1)

+		;;

+		c2 = takec(ts)

+		if !std.isxdigit(c2)

+			err(ts.loc, "expected hex digit, got {}\n", c2)

+		;;

+		-> 16*std.charval(c1, 16) + std.charval(c2, 16)

+		c2 = takec(ts)

+	| esc:

+		err(ts.loc, "unknown escape code \\{}\n", esc)

+	;;

+}

+const utfesc = {ts

+	var c, v

+	if takec(ts) != '{'

+		err(ts.loc, "\\u escape sequence without initial '{'\n")

+	;;

+	v = 0

+	c = std.Badchar

+	while true

+		c = takec(ts)

+		if std.isxdigit(c)

+			v *= 16

+			v += std.charval(c, 16)

+		else

+			break

+		;;

+		if v > 0x10FFFF

+			err(ts.loc, "invalid codepoint in \\u escape sequence\n")

+		;;

+	;;

+	if c != '}'

+		err(ts.loc, "\\u escape sequence without closing '{'\n")

+	;;

+	-> v

+}

+const typaram = {ts

+	takec(ts)

+	match kwident(ts)

+	| `Tident id:

+		-> `Ttyparam id

+	| kw:

+		err(ts.loc, "'{}' used as type parameter\n", kw)

+	;;

+}

+const numlit = {ts

+	var t

+	if matchc(ts, '0')

+		if matchc(ts, 'x')

+			t = number(ts, 16)

+		elif matchc(ts, 'b')

+			t = number(ts, 2)

+		elif matchc('o')

+			t = number(ts, 8)

+		else

+			t = number(ts, 10)

+		;;

+	else

+		t = number(ts, 10)

+	;;

+	-> t

+}

+/*

+only deals with the body of the number. if we reach

+this code, then it's guaranteed that we already have

+a numerical value.

+*/

+const number = {ts, base

+}

+const kwident = {ts

+	match identstr(ts)

+	| "$": 	-> `Tidxlen

+	| "_": 	-> `Tgap

+	| "$noret": 	-> `Tattr `Attrnoret

+	| "break": 	-> `Tbreak

+	| "castto": 	-> `Tcast

+	| "const": 	-> `Tconst

+	| "continue": 	-> `Tcontinue

+	| "elif": 	-> `Telif

+	| "else": 	-> `Telse

+	| "extern": 	-> `Tattr `Attrextern

+	| "false": 	-> `Tboollit false

+	| "for": 	-> `Tfor

+	| "generic": 	-> `Tgeneric

+	| "goto": 	-> `Tgoto

+	| "if": 	-> `Tif

+	| "impl": 	-> `Timpl

+	| "in": 	-> `Tin

+	| "match": 	-> `Tmatch

+	| "pkg": 	-> `Tpkg

+	| "pkglocal": 	-> `Tattr `Attrpkglocal

+	| "sizeof": 	-> `Tsizeof

+	| "struct": 	-> `Tstruct

+	| "trait": 	-> `Ttrait

+	| "true": 	-> `Tboollit true

+	| "type": 	-> `Ttype

+	| "union": 	-> `Tunion

+	| "use": 	-> `Tuse

+	| "var": 	-> `Tvar

+	| "void": 	-> `Tvoidlit

+	| "while": 	-> `Twhile

+	| ident:	-> `Tident ident

+	;;

+}

+const oper = {ts

+	var t, chr

+	chr = takec(ts)

+	std.put("c = '{}'\n", chr)

+	t = `Tobrace

+	match chr

+	| '{': t = `Tobrace

+	| '}': t = `Tcbrace

+	| '(': t = `Toparen

+	| ')': t = `Tcparen

+	| '[': t = `Tosqbrac

+	| ']': t = `Tcsqbrac

+	| ',': t = `Tcomma

+	| '`': t = `Ttick

+	| '#': t = `Tderef

+	| '~': t = `Tbnot

+	| ':':

+		if matchc(ts, ':')

+			t = `Twith

+		else

+			t = `Tcolon;

+		;;

+	| ';':

+		if matchc(ts, ';')

+			t = `Tendblk;

+		else

+			t = `Tendln;

+		;;

+	| '.':

+		if npeekc(ts, 1) == '.' && npeekc(ts, 2) == '.'

+			takec(ts)

+			takec(ts)

+			t = `Tellipsis;

+		else

+			t = `Tdot;

+		;;

+	| '+':

+		if matchc(ts, '=')

+			t = `Taddeq;

+		elif matchc(ts, '+')

+			t = `Tinc;

+		else

+			t = `Tplus;

+		;;

+	| '-':

+		if matchc(ts, '=')

+			t = `Tsubeq;

+		elif matchc(ts, '-')

+			t = `Tdec;

+		elif matchc(ts, '>')

+			t = `Tret;

+		else

+			t = `Tminus;

+		;;

+	| '*':

+		if matchc(ts, '=')

+			t = `Tmuleq;

+		else

+			t = `Tmul;

+		;;

+	| '/':

+		if matchc(ts, '=')

+			t = `Tdiveq;

+		else

+			t = `Tdiv;

+		;;

+	| '%':

+		if matchc(ts, '=')

+			t = `Tmodeq;

+		else

+			t = `Tmod;

+		;;

+	| '=':

+		if matchc(ts, '=')

+			t = `Teq;

+		else

+			t = `Tasn;

+		;;

+	| '|':

+		if matchc(ts, '=')

+			t = `Tboreq;

+		elif matchc(ts, '|')

+			t = `Tlor;

+		else

+			t = `Tbor;

+		;;

+	| '&':

+		if matchc(ts, '=')

+			t = `Tbandeq;

+		elif matchc(ts, '&')

+			t = `Tland;

+		else

+			t = `Tband;

+		;;

+	| '^':

+		if matchc(ts, '=')

+			t = `Tbxoreq;

+		else

+			t = `Tbxor;

+		;;

+	| '<':

+		if matchc(ts, '=')

+			t = `Tle;

+		elif matchc(ts, '<')

+			if matchc(ts, '=')

+				t = `Tbsleq;

+			else

+				t = `Tbsl;

+			;;

+		else

+			t = `Tlt;

+		;;

+	| '>':

+		if matchc(ts, '=')

+			t = `Tge;

+		elif matchc(ts, '>')

+			if matchc(ts, '=')

+				t = `Tbsreq;

+			else

+				t = `Tbsr;

+			;;

+		else

+			t = `Tgt;

+		;;

+	| '!':

+		if matchc(ts, '=')

+			t = `Tne;

+		else

+			t = `Tlnot;

+		;;

+	| c:

+		t = `Terror;

+		err(ts.loc, "junk character {}", c);

+	;;

+	-> t

+}

+const identstr = {ts

+	var i, str

+	/* ASCII */

+	if ts.rest.len == 0 || std.isdigit(ts.rest[0] castto(char))

+		-> ""

+	;;

+	for i = 0; i < ts.rest.len; i++

+		if !isident(ts.rest[i] castto(char))

+			break

+		;;

+	;;

+	str = ts.rest[:i]

+	ts.rest = ts.rest[i:]

+	-> std.sldup(str)

+}

+const isident = {c

+	-> c & 0x80 == 0 && \

+		(c >= 'a' && c <= 'z' || \

+		 c >= 'A' && c <= 'Z' || \

+		 c >= '0' && c <= '9' || \

+		 c == '_' || c == '$')

+}

+const peekc = {ts

+	-> std.decode(ts.rest)

+}

+const npeekc = {ts, n

+	var c, s

+	s = ts.rest

+	for var i = 0; i < n; i++

+		(c, s) = std.strstep(s)

+	;;

+	-> std.decode(s)

+}

+const takec = {ts

+	var c, s

+	(c, s) = std.strstep(ts.rest)

+	ts.rest = s

+	-> c

+}

+const skipto = {ts, chr

+	var c, s

+	s = ts.rest

+	while true

+		(c, s) = std.strstep(s)

+		if s.len == 0 || c == chr

+			break

+		;;

+	;;

+}

+const matchc = {ts, chr

+	var c, s

+	(c, s) = std.strstep(ts.rest)

+	if c == chr

+		ts.rest = s

+		-> true

+	else

+		-> false

+	;;

+}

--- /dev/null

+++ b/mparse/tokdefs.myr

@@ -1,0 +1,210 @@

+use std

+use "types.use"

+pkg parse =

+	type tok = union

+		`Terror

+		`Teof

+		`Tplus    /* + */

+		`Tminus   /* - */

+		`Tmul     /* * */

+		`Tdiv     /* / */

+		`Tinc     /* ++ */

+		`Tdec     /* -- */

+		`Tmod     /* % */

+		`Tasn     /* = */

+		`Taddeq   /* += */

+		`Tsubeq   /* -= */

+		`Tmuleq   /* *= */

+		`Tdiveq   /* /= */

+		`Tmodeq   /* %= */

+		`Tboreq   /* |= */

+		`Tbxoreq  /* ^= */

+		`Tbandeq  /* &= */

+		`Tbsleq   /* <<= */

+		`Tbsreq   /* >>= */

+		`Tbor     /* | */

+		`Tbxor    /* ^ */

+		`Tband    /* & */

+		`Tbsl     /* << */

+		`Tbsr     /* >> */

+		`Tbnot    /* ~ */

+		`Teq      /* == */

+		`Tgt      /* > */

+		`Tlt      /* < */

+		`Tge      /* >= */

+		`Tle      /* <= */

+		`Tne      /* != */

+		`Tlor     /* || */

+		`Tland    /* && */

+		`Tlnot    /* ! */

+		`Tobrace  /* { */

+		`Tcbrace  /* } */

+		`Toparen  /* ( */

+		`Tcparen  /* ) */

+		`Tosqbrac /* [ */

+		`Tcsqbrac /* ] */

+		`Tat      /* @ */

+		`Ttick    /* ` */

+		`Tderef   /* # */

+		`Tidxlen  /* $ */

+		`Ttype    /* type */

+		`Tfor     /* for */

+		`Tin      /* in */

+		`Twhile   /* while */

+		`Tif      /* if */

+		`Telse    /* else */

+		`Telif    /* else */

+		`Tmatch   /* match */

+		`Tgoto    /* goto */

+		`Tbreak   /* break */

+		`Tcontinue   /* continue */

+		`Tintlit int64

+		`Tstrlit byte[:]

+		`Tfltlit flt64

+		`Tchrlit char

+		`Tboollit bool

+		`Tvoidlit

+		`Ttrait   /* trait */

+		`Timpl   /* trait */

+		`Tstruct  /* struct */

+		`Tunion   /* union */

+		`Ttyparam byte[:] /* @typename */

+		`Tconst   /* const */

+		`Tvar     /* var */

+		`Tgeneric /* var */

+		`Tcast    /* castto */

+		`Tgap     /* _ */

+		`Tellipsis/* ... */

+		`Tendln   /* ; or \n */

+		`Tendblk  /* ;; */

+		`Tcolon   /* : */

+		`Twith    /* :: */

+		`Tdot     /* . */

+		`Tcomma   /* , */

+		`Tret     /* -> */

+		`Tuse     /* use */

+		`Tpkg     /* pkg */

+		`Tsizeof  /* sizeof */

+		`Tattr attr   /* $attr */

+		`Tident byte[:]

+	;;

+;;

+const __init__ = {

+	var dummy : tok

+	dummy = `Terror

+	std.fmtinstall(std.typeof(dummy), tokfmt, [][:])

+}

+const tokfmt = {sb, ap, opts

+	var tok

+	tok = std.vanext(ap)

+	match tok

+	| `Terror:	std.sbfmt(sb, "ERROR")

+	| `Teof:	std.sbfmt(sb, "EOF")

+	| `Tplus:	std.sbfmt(sb, "+")

+	| `Tminus:	std.sbfmt(sb, "-")

+	| `Tmul:	std.sbfmt(sb, "*")

+	| `Tdiv:	std.sbfmt(sb, "/")

+	| `Tinc:	std.sbfmt(sb, "++")

+	| `Tdec:	std.sbfmt(sb, "--")

+	| `Tmod:	std.sbfmt(sb, "%")

+	| `Tasn:	std.sbfmt(sb, "=")

+	| `Taddeq:	std.sbfmt(sb, "+=")

+	| `Tsubeq:	std.sbfmt(sb, "-=")

+	| `Tmuleq:	std.sbfmt(sb, "*=")

+	| `Tdiveq:	std.sbfmt(sb, "/=")

+	| `Tmodeq:	std.sbfmt(sb, "%=")

+	| `Tboreq:	std.sbfmt(sb, "|=")

+	| `Tbxoreq:	std.sbfmt(sb, "^=")

+	| `Tbandeq:	std.sbfmt(sb, "&=")

+	| `Tbsleq:	std.sbfmt(sb, "<<=")

+	| `Tbsreq:	std.sbfmt(sb, ">>=")

+	| `Tbor:	std.sbfmt(sb, "|")

+	| `Tbxor:	std.sbfmt(sb, "^")

+	| `Tband:	std.sbfmt(sb, "&")

+	| `Tbsl:	std.sbfmt(sb, "<<")

+	| `Tbsr:	std.sbfmt(sb, ">>")

+	| `Tbnot:	std.sbfmt(sb, "~")

+	| `Teq:		std.sbfmt(sb, "==")

+	| `Tgt:		std.sbfmt(sb, ">")

+	| `Tlt:		std.sbfmt(sb, "<")

+	| `Tge:		std.sbfmt(sb, ">=")

+	| `Tle:		std.sbfmt(sb, "<=")

+	| `Tne:		std.sbfmt(sb, "!=")

+	| `Tlor:	std.sbfmt(sb, "||")

+	| `Tland:	std.sbfmt(sb, "&&")

+	| `Tlnot:	std.sbfmt(sb, "!")

+	| `Tobrace:	std.sbfmt(sb, "{{")

+	| `Tcbrace:	std.sbfmt(sb, "}}")

+	| `Toparen:	std.sbfmt(sb, "(")

+	| `Tcparen:	std.sbfmt(sb, ")")

+	| `Tosqbrac:	std.sbfmt(sb, "[")

+	| `Tcsqbrac:	std.sbfmt(sb, "]")

+	| `Tat:		std.sbfmt(sb, "@")

+	| `Ttick:	std.sbfmt(sb, "`")

+	| `Tderef:	std.sbfmt(sb, "#")

+	| `Tidxlen:	std.sbfmt(sb, "$")

+	| `Ttype:	std.sbfmt(sb, "type")

+	| `Tfor:	std.sbfmt(sb, "for")

+	| `Tin:		std.sbfmt(sb, "in")

+	| `Twhile:	std.sbfmt(sb, "while")

+	| `Tif:		std.sbfmt(sb, "if")

+	| `Telse:	std.sbfmt(sb, "else")

+	| `Telif:	std.sbfmt(sb, "else")

+	| `Tmatch:	std.sbfmt(sb, "match")

+	| `Tgoto:	std.sbfmt(sb, "goto")

+	| `Tbreak:	std.sbfmt(sb, "break")

+	| `Tcontinue:	std.sbfmt(sb, "continue")

+	| `Tintlit v:	std.sbfmt(sb, "{}", v)

+	| `Tstrlit v:	std.sbfmt(sb, "{e}", v)

+	| `Tfltlit v:	std.sbfmt(sb, "{}", v)

+	| `Tchrlit v:	std.sbfmt(sb, "{}", v)

+	| `Tboollit v:	std.sbfmt(sb, "{}", v)

+	| `Tvoidlit:	std.sbfmt(sb, "void")

+	| `Ttrait:	std.sbfmt(sb, "trait")

+	| `Timpl:	std.sbfmt(sb, "trait")

+	| `Tstruct:	std.sbfmt(sb, "struct")

+	| `Tunion:	std.sbfmt(sb, "union")

+	| `Ttyparam tp:	std.sbfmt(sb, "@{}", tp)

+	| `Tconst:	std.sbfmt(sb, "const")

+	| `Tvar:	std.sbfmt(sb, "var")

+	| `Tgeneric:	std.sbfmt(sb, "var")

+	| `Tcast:	std.sbfmt(sb, "castto")

+	| `Tgap:	std.sbfmt(sb, "_")

+	| `Tellipsis:	std.sbfmt(sb, "...")

+	| `Tendln:	std.sbfmt(sb, ";")

+	| `Tendblk:	std.sbfmt(sb, ";;")

+	| `Tcolon:	std.sbfmt(sb, ":")

+	| `Twith:	std.sbfmt(sb, "::")

+	| `Tdot:	std.sbfmt(sb, ".")

+	| `Tcomma:	std.sbfmt(sb, ",")

+	| `Tret:	std.sbfmt(sb, "->")

+	| `Tuse:	std.sbfmt(sb, "use")

+	| `Tpkg:	std.sbfmt(sb, "pkg")

+	| `Tattr a:	std.sbfmt(sb, "{}", a)

+	| `Tsizeof:	std.sbfmt(sb, "sizeof")

+	| `Tident str:	std.sbfmt(sb, "{}", str)

+	;;

+}

--- /dev/null

+++ b/mparse/types.myr

@@ -1,0 +1,13 @@

+pkg parse =

+	type srcloc = struct

+		file	: byte[:]

+		line	: int

+		col	: int

+	;;

+	type attr = union

+		`Attrpkglocal

+		`Attrextern

+		`Attrnoret

+	;;

+;;

--- /dev/null

+++ b/mparse/util.myr

@@ -1,0 +1,14 @@

+use std

+use "types.use"

+pkg parse =

+	$noret const err	: (loc : srcloc, msg : byte[:], args : ... -> void)

+	$noret const verr	: (loc : srcloc, msg : byte[:], args : std.valist -> void)

+;;

+const err = {loc, msg, args

+}

+const verr = {loc, msg, ap

+}