ref: d9d1a7eb98c2fc31292d75c30a9dd6c6fc1bb4f9
parent: c14c252714aeff844abc0c9410cd5a081aa70320
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Nov 12 11:24:38 EST 2024
ios: ios_getc: better colno calculation when multi-byte runes are present
--- a/ios.c
+++ b/ios.c
@@ -713,7 +713,6 @@
s->fd = -1;
s->ownbuf = 1;
s->lineno = 1;
- s->colno = 0;
}
/* stream object initializers. we do no allocation. */
@@ -837,8 +836,19 @@
if(ch == '\n'){
s->lineno++;
s->colno = 0;
+ s->colnowait = 0;
+ }else if(s->colnowait > 0){
+ s->colnowait--;
}else{
s->colno++;
+ if(ch & 0x80){
+ if((ch & 0xe0) == 0xc0)
+ s->colnowait = 1;
+ else if((ch & 0xf0) == 0xe0)
+ s->colnowait = 2;
+ else
+ s->colnowait = 3;
+ }
}
return ch;
}
@@ -904,7 +914,7 @@
if(*r == '\n')
s->colno = 0;
else
- s->colno += wcwidth(*r);
+ s->colno++;
return 1;
}
--- a/ios.h
+++ b/ios.h
@@ -27,6 +27,7 @@
size_t colno;
off_t fpos; // cached file pos
bufmode_t bm;
+ int colnowait;
// the state only indicates where the underlying file position is relative
// to the buffer. reading: at the end. writing: at the beginning.
--- a/read.c
+++ b/read.c
@@ -225,10 +225,11 @@
}
// return: 1 if escaped (forced to be symbol)
-static int
-read_token(Rctx *ctx, char c, int digits)
+static bool
+read_token(Rctx *ctx, char c, bool digits)
{
- int i = 0, ch, escaped = 0, issym = 0, nc = 0;
+ int i = 0, ch, nc = 0;
+ bool escaped = false, issym = false;
while(1){
if(nc != 0){
@@ -240,10 +241,10 @@
c = (char)ch;
}
if(c == '|'){
- issym = 1;
+ issym = true;
escaped = !escaped;
}else if(c == '\\'){
- issym = 1;
+ issym = true;
ios_getc(RS);
ch = ios_peekc(RS);
if(ch == IOS_EOF)
@@ -258,7 +259,7 @@
}
if(nc == 0)
ios_skip(RS, -1);
- terminate:
+terminate:
ctx->buf[i++] = '\0';
return issym;
}
@@ -513,7 +514,7 @@
buf = LLT_ALLOC(sz);
while(1){
- if(i >= sz-4){ // -4: leaves room for longest utf8 sequence
+ if(i >= sz-UTFmax){ // -UTFmax: leaves room for longest utf8 sequence
sz *= 2;
temp = LLT_REALLOC(buf, sz);
if(temp == nil){