ref: 4ba6e67e51377bf0824573f6445c4714b41d8e7a
parent: 70c36896d20df1971a65a800acbe3872157a7910
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Mon Mar 24 23:39:50 EDT 2025
add «verbatim strings»
--- a/README.md
+++ b/README.md
@@ -24,21 +24,20 @@
Some of the changes from the original include:
* aggressive clean up, removal, renaming and refactoring
- * seamless bignums
+ * bignums
+ * previously available (but not merged) patches from the community and [Julia](https://github.com/JuliaLang/julia) are applied
* `[` and `]`, `{` and `}` are synonyms to `(` and `)`
+ * `«` and `»` for verbatim strings
* `define` → `def`, `define-macro` → `defmacro`
* `λ` as a shorthand for `lambda`
- * `t`/`T` instead of `#t`/`#T` and `nil` instead of `#f`
+ * `T` instead of `#t`/`#T` and `NIL` instead of `#f`
+ * `c***r` of empty list returns empty list
* docstrings - `(def (f ...) "Docs here" ...)` and `(help ...)`
* automatic gensyms for macros (`blah#`) at read time
* proper `(void)` and `void?`
* better error reporting - disassembly at the current instruction, location of syntax errors
- * some of the previously available (but not merged) patches from the community and [Julia](https://github.com/JuliaLang/julia) are applied
- * `c***r` of empty list returns empty list
* "boot" image is built into the executable
* vm opcode definitions and tables are generated from a single file
- * fixed bootstrap (makes it work properly when opcodes change)
- * built-in symbols aren't constants and can be redefined
Two ways to learn about more changes:
--- a/src/read.c
+++ b/src/read.c
@@ -8,7 +8,7 @@
TOK_BQ, TOK_COMMA, TOK_COMMAAT, TOK_COMMADOT,
TOK_SHARPDOT, TOK_LABEL, TOK_BACKREF, TOK_SHARPQUOTE, TOK_SHARPOPEN,
TOK_OPENB, TOK_CLOSEB, TOK_SHARPSYM, TOK_GENSYM, TOK_DOUBLEQUOTE,
- TOK_OPENC, TOK_CLOSEC,
+ TOK_OPENC, TOK_CLOSEC, TOK_VERBATIM,
};
#define PAtLoc "at %"PRIu32":%"PRIu32
@@ -199,7 +199,8 @@
static u32int
peek(Rctx *ctx)
{
- char c, *end;
+ u8int c;
+ char *end;
sl_fx x;
int ch, base;
@@ -227,7 +228,10 @@
ctx->toktype = TOK_BQ;
else if(c == '"')
ctx->toktype = TOK_DOUBLEQUOTE;
- else if(c == '#'){
+ else if(c == 0xc2 && ios_peekc(RS) == 0xab){ // «
+ ctx->toktype = TOK_VERBATIM;
+ ios_getc(RS);
+ }else if(c == '#'){
c = ch = ios_getc(RS);
if(ch == IOS_EOF)
parse_error(&ctx->loc, "invalid read macro");
@@ -440,12 +444,52 @@
}
static sl_v
+read_verbatim(Rctx *ctx)
+{
+ char *temp;
+ usize i = 0, sz = sizeof(ctx->buf);
+ char *buf = ctx->buf;
+
+ while(1){
+ if(i >= sz-UTFmax){ // -UTFmax: leaves room for longest utf8 sequence
+ sz *= 2;
+ if(buf == ctx->buf){
+ if((temp = MEM_ALLOC(sz)) != nil)
+ memcpy(temp, ctx->buf, i);
+ }else
+ temp = MEM_REALLOC(buf, sz);
+ if(temp == nil){
+ if(buf == ctx->buf)
+ MEM_FREE(buf);
+ parse_error(&ctx->loc, "out of memory reading verbatim string");
+ }
+ buf = temp;
+ }
+ int c = ios_getc(RS);
+ if(c == IOS_EOF){
+ if(buf != ctx->buf)
+ MEM_FREE(buf);
+ parse_error(&ctx->loc, "unexpected end of input in verbatim string");
+ }
+ if(c == 0xc2 && ios_peekc(RS) == 0xbb){ // »
+ ios_getc(RS);
+ break;
+ }
+ buf[i++] = c;
+ }
+ sl_v s = cvalue_str(i);
+ memcpy(cvalue_data(s), buf, i);
+ if(buf != ctx->buf)
+ MEM_FREE(buf);
+ return s;
+}
+
+static sl_v
read_str(Rctx *ctx)
{
char *buf, *temp;
char eseq[10];
usize i = 0, j, sz, ndig;
- int c;
sl_v s;
Rune r = 0;
@@ -466,7 +510,7 @@
}
buf = temp;
}
- c = ios_getc(RS);
+ int c = ios_getc(RS);
if(c == IOS_EOF){
if(buf != ctx->buf)
MEM_FREE(buf);
@@ -708,6 +752,8 @@
return *pv;
case TOK_DOUBLEQUOTE:
return read_str(ctx);
+ case TOK_VERBATIM:
+ return read_verbatim(ctx);
case TOK_CLOSE:
parse_error(&ctx->loc, "unexpected ')'");
case TOK_CLOSEB: