ref: 4b8271cd44fd92d2580e9e2da23c5dcc85b1ffad
parent: 7e5c86493a88235c1a1f68a1d026654bcdefcfa5
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Jan 28 19:43:30 EST 2025
string-encode: fix to work on string containing \0
--- a/src/string.c
+++ b/src/string.c
@@ -87,7 +87,7 @@
BUILTIN("string-decode", string_decode)
{
- int term = 0;
+ bool term = false;
if(nargs == 2)
term = args[1] != FL_f;
else
@@ -97,7 +97,7 @@
cvalue_t *cv = ptr(args[0]);
char *ptr = (char*)cv_data(cv);
size_t nb = cv_len(cv);
- size_t nc = utfnlen(ptr, nb);
+ size_t nc = u8_runelen(ptr, nb);
size_t newsz = nc*sizeof(Rune);
if(term)
newsz += sizeof(Rune);
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -38,6 +38,15 @@
return trailingBytesForUTF8[(uint8_t)s[0]] + 1;
}
+size_t
+u8_runelen(const char *s, size_t nb)
+{
+ size_t nr, i;
+ for(i = nr = 0; i < nb; nr++)
+ i += trailingBytesForUTF8[(uint8_t)s[i]] + 1;
+ return nr;
+}
+
/* byte offset => charnum */
size_t
u8_charnum(const char *s, size_t offset)
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -12,6 +12,9 @@
/* returns length of next utf-8 sequence */
size_t u8_seqlen(const char *s) fl_purefn;
+/* length of a utf-8 string in runes */
+size_t u8_runelen(const char *s, size_t nb) fl_purefn;
+
char read_escape_control_char(char c) fl_constfn;
/* given a wide character, convert it to an ASCII escape sequence stored in
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -492,6 +492,9 @@
(io-close b))
(define s "привет\0пока")
+
+(assert (equal? s (string-encode (string-decode s))))
+
(assert (eq? 21 (sizeof s)))
(assert (eq? 21 (length s)))
(assert (eq? 11 (string-length s)))