shithub: sl

Download patch

ref: 4ba6e67e51377bf0824573f6445c4714b41d8e7a
parent: 70c36896d20df1971a65a800acbe3872157a7910
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Mon Mar 24 23:39:50 EDT 2025

add «verbatim strings»

--- a/README.md
+++ b/README.md
@@ -24,21 +24,20 @@
 Some of the changes from the original include:
 
  * aggressive clean up, removal, renaming and refactoring
- * seamless bignums
+ * bignums
+ * previously available (but not merged) patches from the community and [Julia](https://github.com/JuliaLang/julia) are applied
  * `[` and `]`, `{` and `}` are synonyms to `(` and `)`
+ * `«` and `»` for verbatim strings
  * `define` → `def`, `define-macro` → `defmacro`
  * `λ` as a shorthand for `lambda`
- * `t`/`T` instead of `#t`/`#T` and `nil` instead of `#f`
+ * `T` instead of `#t`/`#T` and `NIL` instead of `#f`
+ * `c***r` of empty list returns empty list
  * docstrings - `(def (f ...) "Docs here" ...)` and `(help ...)`
  * automatic gensyms for macros (`blah#`) at read time
  * proper `(void)` and `void?`
  * better error reporting - disassembly at the current instruction, location of syntax errors
- * some of the previously available (but not merged) patches from the community and [Julia](https://github.com/JuliaLang/julia) are applied
- * `c***r` of empty list returns empty list
  * "boot" image is built into the executable
  * vm opcode definitions and tables are generated from a single file
- * fixed bootstrap (makes it work properly when opcodes change)
- * built-in symbols aren't constants and can be redefined
 
 Two ways to learn about more changes:
 
--- a/src/read.c
+++ b/src/read.c
@@ -8,7 +8,7 @@
 	TOK_BQ, TOK_COMMA, TOK_COMMAAT, TOK_COMMADOT,
 	TOK_SHARPDOT, TOK_LABEL, TOK_BACKREF, TOK_SHARPQUOTE, TOK_SHARPOPEN,
 	TOK_OPENB, TOK_CLOSEB, TOK_SHARPSYM, TOK_GENSYM, TOK_DOUBLEQUOTE,
-	TOK_OPENC, TOK_CLOSEC,
+	TOK_OPENC, TOK_CLOSEC, TOK_VERBATIM,
 };
 
 #define PAtLoc "at %"PRIu32":%"PRIu32
@@ -199,7 +199,8 @@
 static u32int
 peek(Rctx *ctx)
 {
-	char c, *end;
+	u8int c;
+	char *end;
 	sl_fx x;
 	int ch, base;
 
@@ -227,7 +228,10 @@
 		ctx->toktype = TOK_BQ;
 	else if(c == '"')
 		ctx->toktype = TOK_DOUBLEQUOTE;
-	else if(c == '#'){
+	else if(c == 0xc2 && ios_peekc(RS) == 0xab){ // «
+		ctx->toktype = TOK_VERBATIM;
+		ios_getc(RS);
+	}else if(c == '#'){
 		c = ch = ios_getc(RS);
 		if(ch == IOS_EOF)
 			parse_error(&ctx->loc, "invalid read macro");
@@ -440,12 +444,52 @@
 }
 
 static sl_v
+read_verbatim(Rctx *ctx)
+{
+	char *temp;
+	usize i = 0, sz = sizeof(ctx->buf);
+	char *buf = ctx->buf;
+
+	while(1){
+		if(i >= sz-UTFmax){ // -UTFmax: leaves room for longest utf8 sequence
+			sz *= 2;
+			if(buf == ctx->buf){
+				if((temp = MEM_ALLOC(sz)) != nil)
+					memcpy(temp, ctx->buf, i);
+			}else
+				temp = MEM_REALLOC(buf, sz);
+			if(temp == nil){
+				if(buf == ctx->buf)
+					MEM_FREE(buf);
+				parse_error(&ctx->loc, "out of memory reading verbatim string");
+			}
+			buf = temp;
+		}
+		int c = ios_getc(RS);
+		if(c == IOS_EOF){
+			if(buf != ctx->buf)
+				MEM_FREE(buf);
+			parse_error(&ctx->loc, "unexpected end of input in verbatim string");
+		}
+		if(c == 0xc2 && ios_peekc(RS) == 0xbb){ // »
+			ios_getc(RS);
+			break;
+		}
+		buf[i++] = c;
+	}
+	sl_v s = cvalue_str(i);
+	memcpy(cvalue_data(s), buf, i);
+	if(buf != ctx->buf)
+		MEM_FREE(buf);
+	return s;
+}
+
+static sl_v
 read_str(Rctx *ctx)
 {
 	char *buf, *temp;
 	char eseq[10];
 	usize i = 0, j, sz, ndig;
-	int c;
 	sl_v s;
 	Rune r = 0;
 
@@ -466,7 +510,7 @@
 			}
 			buf = temp;
 		}
-		c = ios_getc(RS);
+		int c = ios_getc(RS);
 		if(c == IOS_EOF){
 			if(buf != ctx->buf)
 				MEM_FREE(buf);
@@ -708,6 +752,8 @@
 		return *pv;
 	case TOK_DOUBLEQUOTE:
 		return read_str(ctx);
+	case TOK_VERBATIM:
+		return read_verbatim(ctx);
 	case TOK_CLOSE:
 		parse_error(&ctx->loc, "unexpected ')'");
 	case TOK_CLOSEB: