shithub: femtolisp

Download patch

ref: d9d1a7eb98c2fc31292d75c30a9dd6c6fc1bb4f9
parent: c14c252714aeff844abc0c9410cd5a081aa70320
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Nov 12 11:24:38 EST 2024

ios: ios_getc: better colno calculation when multi-byte runes are present

--- a/ios.c
+++ b/ios.c
@@ -713,7 +713,6 @@
 	s->fd = -1;
 	s->ownbuf = 1;
 	s->lineno = 1;
-	s->colno = 0;
 }
 
 /* stream object initializers. we do no allocation. */
@@ -837,8 +836,19 @@
 	if(ch == '\n'){
 		s->lineno++;
 		s->colno = 0;
+		s->colnowait = 0;
+	}else if(s->colnowait > 0){
+		s->colnowait--;
 	}else{
 		s->colno++;
+		if(ch & 0x80){
+			if((ch & 0xe0) == 0xc0)
+				s->colnowait = 1;
+			else if((ch & 0xf0) == 0xe0)
+				s->colnowait = 2;
+			else
+				s->colnowait = 3;
+		}
 	}
 	return ch;
 }
@@ -904,7 +914,7 @@
 	if(*r == '\n')
 		s->colno = 0;
 	else
-		s->colno += wcwidth(*r);
+		s->colno++;
 	return 1;
 }
 
--- a/ios.h
+++ b/ios.h
@@ -27,6 +27,7 @@
 	size_t colno;
 	off_t fpos;	   // cached file pos
 	bufmode_t bm;
+	int colnowait;
 
 	// the state only indicates where the underlying file position is relative
 	// to the buffer. reading: at the end. writing: at the beginning.
--- a/read.c
+++ b/read.c
@@ -225,10 +225,11 @@
 }
 
 // return: 1 if escaped (forced to be symbol)
-static int
-read_token(Rctx *ctx, char c, int digits)
+static bool
+read_token(Rctx *ctx, char c, bool digits)
 {
-	int i = 0, ch, escaped = 0, issym = 0, nc = 0;
+	int i = 0, ch, nc = 0;
+	bool escaped = false, issym = false;
 
 	while(1){
 		if(nc != 0){
@@ -240,10 +241,10 @@
 			c = (char)ch;
 		}
 		if(c == '|'){
-			issym = 1;
+			issym = true;
 			escaped = !escaped;
 		}else if(c == '\\'){
-			issym = 1;
+			issym = true;
 			ios_getc(RS);
 			ch = ios_peekc(RS);
 			if(ch == IOS_EOF)
@@ -258,7 +259,7 @@
 	}
 	if(nc == 0)
 		ios_skip(RS, -1);
- terminate:
+terminate:
 	ctx->buf[i++] = '\0';
 	return issym;
 }
@@ -513,7 +514,7 @@
 
 	buf = LLT_ALLOC(sz);
 	while(1){
-		if(i >= sz-4){ // -4: leaves room for longest utf8 sequence
+		if(i >= sz-UTFmax){ // -UTFmax: leaves room for longest utf8 sequence
 			sz *= 2;
 			temp = LLT_REALLOC(buf, sz);
 			if(temp == nil){