shithub: femtolisp

Download patch

ref: f510f5f6eaa66e5f6a399f6bc4fa7091e00a6151
parent: 4dc8cff4fd04e8536f5577522ecb95032064f295
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Thu Dec 12 21:05:16 EST 2024

ios, read: better parse error location logic

--- a/ios.c
+++ b/ios.c
@@ -542,9 +542,9 @@
 		MEM_FREE(s->buf);
 	s->buf = nil;
 	s->size = s->maxsize = s->bpos = 0;
-	if(s->filename != emptystr){
-		MEM_FREE(s->filename);
-		s->filename = emptystr;
+	if(s->loc.filename != emptystr){
+		MEM_FREE(s->loc.filename);
+		s->loc.filename = emptystr;
 	}
 }
 
@@ -718,7 +718,7 @@
 	s->fpos = -1;
 	s->fd = -1;
 	s->ownbuf = 1;
-	s->lineno = 1;
+	s->loc.lineno = 1;
 }
 
 /* stream object initializers. we do no allocation. */
@@ -744,7 +744,7 @@
 		goto open_file_err;
 	if(!wr)
 		s->readonly = 1;
-	s->filename = MEM_STRDUP(fname);
+	s->loc.filename = MEM_STRDUP(fname);
 	return s;
 open_file_err:
 	s->fd = -1;
@@ -756,7 +756,7 @@
 {
 	_ios_init(s);
 	s->bm = bm_mem;
-	s->filename = emptystr;
+	s->loc.filename = emptystr;
 	_buf_realloc(s, initsize);
 	return s;
 }
@@ -801,17 +801,17 @@
 {
 	ios_stdin = MEM_ALLOC(sizeof(ios_t));
 	ios_fd(ios_stdin, STDIN_FILENO, 0, 0);
-	ios_stdin->filename = MEM_STRDUP("*stdin*");
+	ios_stdin->loc.filename = MEM_STRDUP("*stdin*");
 
 	ios_stdout = MEM_ALLOC(sizeof(ios_t));
 	ios_fd(ios_stdout, STDOUT_FILENO, 0, 0);
 	ios_stdout->bm = bm_line;
-	ios_stdout->filename = MEM_STRDUP("*stdout*");
+	ios_stdout->loc.filename = MEM_STRDUP("*stdout*");
 
 	ios_stderr = MEM_ALLOC(sizeof(ios_t));
 	ios_fd(ios_stderr, STDERR_FILENO, 0, 0);
 	ios_stderr->bm = bm_none;
-	ios_stderr->filename = MEM_STRDUP("*stderr*");
+	ios_stderr->loc.filename = MEM_STRDUP("*stderr*");
 }
 
 /* higher level interface */
@@ -831,22 +831,17 @@
 	return ios_write(s, &ch, 1);
 }
 
-int
-ios_getc(ios_t *s)
+static void
+ios_loc(ios_t *s, uint8_t ch)
 {
-	uint8_t ch;
-	if(s->state == bst_rd && s->bpos < s->size)
-		ch = s->buf[s->bpos++];
-	else if(s->_eof || ios_read(s, &ch, 1) < 1)
-		return IOS_EOF;
 	if(ch == '\n'){
-		s->lineno++;
-		s->colno = 0;
+		s->loc.lineno++;
+		s->loc.colno = 0;
 		s->colnowait = 0;
 	}else if(s->colnowait > 0){
 		s->colnowait--;
 	}else{
-		s->colno++;
+		s->loc.colno++;
 		if(ch & 0x80){
 			if((ch & 0xe0) == 0xc0)
 				s->colnowait = 1;
@@ -856,6 +851,17 @@
 				s->colnowait = 3;
 		}
 	}
+}
+
+int
+ios_getc(ios_t *s)
+{
+	uint8_t ch;
+	if(s->state == bst_rd && s->bpos < s->size)
+		ch = s->buf[s->bpos++];
+	else if(s->_eof || ios_read(s, &ch, 1) < 1)
+		return IOS_EOF;
+	ios_loc(s, ch);
 	return ch;
 }
 
@@ -918,9 +924,9 @@
 	if(*r == Runeerror)
 		return 0;
 	if(*r == '\n')
-		s->colno = 0;
+		s->loc.colno = 0;
 	else
-		s->colno++;
+		s->loc.colno++;
 	return 1;
 }
 
@@ -934,8 +940,10 @@
 void
 ios_purge(ios_t *s)
 {
-	if(s->state == bst_rd)
-		s->bpos = s->size;
+	if(s->state == bst_rd){
+		for(; s->bpos < s->size; s->bpos++)
+			ios_loc(s, s->buf[s->bpos]);
+	}
 }
 
 int
--- a/ios.h
+++ b/ios.h
@@ -18,13 +18,17 @@
 #define IOS_BUFSIZE 32768
 
 typedef struct {
+	char *filename;
+	uint32_t lineno;
+	uint32_t colno;
+}ios_loc_t;
+
+typedef struct {
 	uint8_t *buf;		// start of buffer
 	size_t maxsize;   // space allocated to buffer
 	size_t size;	  // length of valid data in buf, >=ndirty
 	size_t bpos;	  // current position in buffer
 	size_t ndirty;	// # bytes at &buf[0] that need to be written
-	size_t lineno;
-	size_t colno;
 	off_t fpos;	   // cached file pos
 	bufmode_t bm;
 	int colnowait;
@@ -54,7 +58,7 @@
 	// request durable writes (fsync)
 	// uint8_t durable:1;
 
-	char *filename;
+	ios_loc_t loc;
 
 	// todo: mutex
 	uint8_t local[IOS_INLSIZE];
--- a/iostream.c
+++ b/iostream.c
@@ -10,9 +10,9 @@
 {
 	USED(v);
 	fl_print_str("#<io stream", f);
-	if(*f->filename){
+	if(*f->loc.filename){
 		fl_print_chr(' ', f);
-		fl_print_str(f->filename, f);
+		fl_print_str(f->loc.filename, f);
 	}
 	fl_print_chr('>', f);
 }
@@ -351,19 +351,19 @@
 BUILTIN("io-filename", io_filename)
 {
 	argcount(nargs, 1);
-	return cvalue_static_cstring(toiostream(args[0])->filename);
+	return cvalue_static_cstring(toiostream(args[0])->loc.filename);
 }
 
 BUILTIN("io-line", io_line)
 {
 	argcount(nargs, 1);
-	return size_wrap(toiostream(args[0])->lineno);
+	return size_wrap(toiostream(args[0])->loc.lineno);
 }
 
 BUILTIN("io-set-line!", io_set_line)
 {
 	argcount(nargs, 2);
-	toiostream(args[0])->lineno = tosize(args[1]);
+	toiostream(args[0])->loc.lineno = tosize(args[1]);
 	return FL_t;
 }
 
@@ -370,13 +370,13 @@
 BUILTIN("io-column", io_column)
 {
 	argcount(nargs, 1);
-	return size_wrap(toiostream(args[0])->colno);
+	return size_wrap(toiostream(args[0])->loc.colno);
 }
 
 BUILTIN("io-set-column!", io_set_column)
 {
 	argcount(nargs, 2);
-	toiostream(args[0])->colno = tosize(args[1]);
+	toiostream(args[0])->loc.colno = tosize(args[1]);
 	return FL_t;
 }
 
--- a/read.c
+++ b/read.c
@@ -10,11 +10,14 @@
 	TOK_OPENC, TOK_CLOSEC,
 };
 
+#define PAtLoc "at %"PRIu32":%"PRIu32
+
 typedef struct Rctx Rctx;
 
 struct Rctx {
 	uint32_t toktype;
 	value_t tokval;
+	ios_loc_t loc;
 	char buf[1024];
 };
 
@@ -110,7 +113,7 @@
 }
 
 static _Noreturn void
-parse_error(const char *format, ...)
+parse_error(ios_loc_t *loc, const char *format, ...)
 {
 	char msgbuf[512];
 	va_list args;
@@ -117,7 +120,7 @@
 	int n;
 
 	n = snprintf(msgbuf, sizeof(msgbuf), "%s:%"PRIu64":%"PRIu64": ",
-		RS->filename, (uint64_t)RS->lineno, (uint64_t)RS->colno);
+		loc->filename, (uint64_t)loc->lineno, (uint64_t)loc->colno);
 	if(n >= (int)sizeof(msgbuf))
 		n = 0;
 	va_start(args, format);
@@ -133,7 +136,7 @@
 {
 	ctx->buf[(*pi)++] = c;
 	if(*pi >= (int)(sizeof(ctx->buf)-1))
-		parse_error("token too long");
+		parse_error(&ctx->loc, "token too long");
 }
 
 // return: 1 if escaped (forced to be symbol)
@@ -194,6 +197,7 @@
 	if(ctx->toktype != TOK_NONE)
 		return ctx->toktype;
 	c = nextchar();
+	ctx->loc = RS->loc;
 	if(ios_eof(RS))
 		return TOK_NONE;
 	if(c == '(')
@@ -217,7 +221,7 @@
 	else if(c == '#'){
 		ch = ios_getc(RS); c = (char)ch;
 		if(ch == IOS_EOF)
-			parse_error("invalid read macro");
+			parse_error(&ctx->loc, "invalid read macro");
 		if(c == '.')
 			ctx->toktype = TOK_SHARPDOT;
 		else if(c == '\'')
@@ -225,12 +229,12 @@
 		else if(c == '\\'){
 			Rune cval;
 			if(ios_getutf8(RS, &cval) == IOS_EOF)
-				parse_error("end of input in character constant");
+				parse_error(&ctx->loc, "end of input in character constant");
 			if(cval == 'u' || cval == 'U' || cval == 'x'){
 				read_token(ctx, 'u', 0);
 				if(ctx->buf[1] != '\0'){ // not a solitary 'u','U','x'
 					if(!fl_read_numtok(&ctx->buf[1], &ctx->tokval, 16))
-						parse_error("invalid hex character constant");
+						parse_error(&ctx->loc, "invalid hex character constant");
 					cval = numval(ctx->tokval);
 				}
 			}else if(cval >= 'a' && cval <= 'z'){
@@ -250,7 +254,7 @@
 				else if(ctx->tokval == FL(spacesym))     cval = 0x20;
 				else if(ctx->tokval == FL(deletesym))    cval = 0x7F;
 				else
-					parse_error("unknown character #\\%s", ctx->buf);
+					parse_error(&ctx->loc, "unknown character #\\%s", ctx->buf);
 			}
 			ctx->toktype = TOK_NUM;
 			ctx->tokval = mk_rune(cval);
@@ -257,7 +261,7 @@
 		}else if(c == '('){
 			ctx->toktype = TOK_SHARPOPEN;
 		}else if(c == '<'){
-			parse_error("unreadable object");
+			parse_error(&ctx->loc, "unreadable object");
 		}else if(isdigit(c)){
 			read_token(ctx, c, 1);
 			c = (char)ios_getc(RS);
@@ -266,10 +270,10 @@
 			else if(c == '=')
 				ctx->toktype = TOK_LABEL;
 			else
-				parse_error("invalid label");
+				parse_error(&ctx->loc, "invalid label");
 			x = strtoll(ctx->buf, &end, 10);
 			if(*end != '\0')
-				parse_error("invalid label");
+				parse_error(&ctx->loc, "invalid label");
 			ctx->tokval = fixnum(x);
 		}else if(c == '!'){
 			// #! single line comment for shbang script support
@@ -284,7 +288,7 @@
 				ch = ios_getc(RS);
 			hashpipe_gotc:
 				if(ch == IOS_EOF)
-					parse_error("eof within comment");
+					parse_error(&ctx->loc, "eof within comment");
 				if((char)ch == '|'){
 					ch = ios_getc(RS);
 					if((char)ch == '#'){
@@ -317,7 +321,7 @@
 			read_token(ctx, (char)ch, 0);
 			x = strtol(ctx->buf, &end, 10);
 			if(*end != '\0' || ctx->buf[0] == '\0')
-				parse_error("invalid gensym label");
+				parse_error(&ctx->loc, "invalid gensym label");
 			ctx->toktype = TOK_GENSYM;
 			ctx->tokval = fixnum(x);
 		}else if(symchar(c)){
@@ -328,7 +332,7 @@
 			    (c == 'd' && (base = 10)) ||
 			    (c == 'x' && (base = 16))) && (isdigit_base(ctx->buf[1], base) || ctx->buf[1] == '-')){
 				if(!fl_read_numtok(&ctx->buf[1], &ctx->tokval, base))
-					parse_error("invalid base %d constant", base);
+					parse_error(&ctx->loc, "invalid base %d constant", base);
 				return (ctx->toktype = TOK_NUM);
 			}
 
@@ -335,7 +339,7 @@
 			ctx->toktype = TOK_SHARPSYM;
 			ctx->tokval = symbol(ctx->buf, true);
 		}else{
-			parse_error("unknown read macro");
+			parse_error(&ctx->loc, "unknown read macro");
 		}
 	}else if(c == ','){
 		ctx->toktype = TOK_COMMA;
@@ -396,7 +400,7 @@
 		ptrhash_put(&FL(readstate)->backrefs, (void*)label, (void*)v);
 	while(peek(ctx) != closer){
 		if(ios_eof(RS))
-			parse_error("unexpected end of input");
+			parse_error(&ctx->loc, "unexpected end of input");
 		v = FL(stack)[FL(sp)-1]; // reload after possible alloc in peek()
 		if(i >= vector_size(v)){
 			v = FL(stack)[FL(sp)-1] = vector_grow(v, label != UNBOUND);
@@ -438,7 +442,7 @@
 			if(temp == nil){
 				if(buf == ctx->buf)
 					MEM_FREE(buf);
-				parse_error("out of memory reading string");
+				parse_error(&ctx->loc, "out of memory reading string");
 			}
 			buf = temp;
 		}
@@ -446,7 +450,7 @@
 		if(c == IOS_EOF){
 			if(buf != ctx->buf)
 				MEM_FREE(buf);
-			parse_error("unexpected end of input in string");
+			parse_error(&ctx->loc, "unexpected end of input in string");
 		}
 		if(c == '"')
 			break;
@@ -455,7 +459,7 @@
 			if(c == IOS_EOF){
 				if(buf != ctx->buf)
 					MEM_FREE(buf);
-				parse_error("end of input in escape sequence");
+				parse_error(&ctx->loc, "end of input in escape sequence");
 			}
 			j = 0;
 			if(octal_digit(c)){
@@ -484,7 +488,7 @@
 				if(!j || r > Runemax){
 					if(buf != ctx->buf)
 						MEM_FREE(buf);
-					parse_error("invalid escape sequence");
+					parse_error(&ctx->loc, "invalid escape sequence");
 				}
 				if(ndig == 2)
 					buf[i++] = (char)r;
@@ -497,7 +501,14 @@
 				if(esc == (char)c && !strchr("\\'\"`", esc)){
 					if(buf != ctx->buf)
 						MEM_FREE(buf);
-					parse_error("invalid escape sequence: \\%c", (char)c);
+					ios_loc_t *l = &RS->loc;
+					parse_error(
+						&ctx->loc,
+						"invalid escape sequence \\%c "PAtLoc,
+						(char)c,
+						l->lineno,
+						l->colno
+					);
 				}
 				buf[i++] = esc;
 			}
@@ -520,16 +531,16 @@
 {
 	value_t c, *pc;
 	uint32_t t;
-	uint64_t lineno0, colno0;
+	ios_loc_t loc0;
 
-	lineno0 = RS->lineno;
-	colno0 = RS->colno - 1;
+	loc0 = RS->loc;
+	loc0.colno--;
 	PUSH(FL_nil);
 	pc = &FL(stack)[FL(sp)-1];  // to keep track of current cons cell
 	t = peek(ctx);
 	while(t != closer){
 		if(ios_eof(RS))
-			parse_error("unexpected end of input: %"PRIu64":%"PRIu64" not closed", lineno0, colno0);
+			parse_error(&loc0, "not closed: unexpected EOI "PAtLoc, ctx->loc.lineno, ctx->loc.colno);
 		c = mk_cons(); car_(c) = cdr_(c) = FL_nil;
 		if(iscons(*pc))
 			cdr_(*pc) = c;
@@ -549,10 +560,11 @@
 			cdr_(*pc) = c;
 			t = peek(ctx);
 			if(ios_eof(RS))
-				parse_error("unexpected end of input");
+				parse_error(&ctx->loc, "unexpected end of input");
 			if(t != closer){
 				take(ctx);
 				parse_error(
+					&ctx->loc,
 					"expected '%c'",
 					closer == TOK_CLOSEB ? ']' : (closer == TOK_CLOSEC ? '}' : ')')
 				);
@@ -623,9 +635,10 @@
 			return FL_f;
 		// constructor notation
 		c = nextchar();
+		ctx->loc = RS->loc;
 		if(c != '('){
 			take(ctx);
-			parse_error("expected argument list for %s", symbol_name(ctx->tokval));
+			parse_error(&ctx->loc, "expected argument list for %s", symbol_name(ctx->tokval));
 		}
 		PUSH(FL_nil);
 		read_list(ctx, &FL(stack)[FL(sp)-1], UNBOUND, TOK_CLOSE);
@@ -658,7 +671,7 @@
 	case TOK_LABEL:
 		// create backreference label
 		if(ptrhash_has(&FL(readstate)->backrefs, (void*)ctx->tokval))
-			parse_error("label %"PRIdPTR" redefined", numval(ctx->tokval));
+			parse_error(&ctx->loc, "label %"PRIdPTR" redefined", numval(ctx->tokval));
 		oldtokval = ctx->tokval;
 		v = do_read_sexpr(ctx, ctx->tokval);
 		ptrhash_put(&FL(readstate)->backrefs, (void*)oldtokval, (void*)v);
@@ -667,7 +680,7 @@
 		// look up backreference
 		v = (value_t)ptrhash_get(&FL(readstate)->backrefs, (void*)ctx->tokval);
 		if(v == (value_t)HT_NOTFOUND)
-			parse_error("undefined label %"PRIdPTR, numval(ctx->tokval));
+			parse_error(&ctx->loc, "undefined label %"PRIdPTR, numval(ctx->tokval));
 		return v;
 	case TOK_GENSYM:
 		pv = (value_t*)ptrhash_bp(&FL(readstate)->gensyms, (void*)ctx->tokval);
@@ -677,13 +690,13 @@
 	case TOK_DOUBLEQUOTE:
 		return read_string(ctx);
 	case TOK_CLOSE:
-		parse_error("unexpected ')'");
+		parse_error(&ctx->loc, "unexpected ')'");
 	case TOK_CLOSEB:
-		parse_error("unexpected ']'");
+		parse_error(&ctx->loc, "unexpected ']'");
 	case TOK_CLOSEC:
-		parse_error("unexpected '}'");
+		parse_error(&ctx->loc, "unexpected '}'");
 	case TOK_DOT:
-		parse_error("unexpected '.'");
+		parse_error(&ctx->loc, "unexpected '.'");
 	}
 	return FL_unspecified;
 }