shithub: sl

Download patch

ref: 4b8271cd44fd92d2580e9e2da23c5dcc85b1ffad
parent: 7e5c86493a88235c1a1f68a1d026654bcdefcfa5
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Jan 28 19:43:30 EST 2025

string-encode: fix to work on string containing \0

--- a/src/string.c
+++ b/src/string.c
@@ -87,7 +87,7 @@
 
 BUILTIN("string-decode", string_decode)
 {
-	int term = 0;
+	bool term = false;
 	if(nargs == 2)
 		term = args[1] != FL_f;
 	else
@@ -97,7 +97,7 @@
 	cvalue_t *cv = ptr(args[0]);
 	char *ptr = (char*)cv_data(cv);
 	size_t nb = cv_len(cv);
-	size_t nc = utfnlen(ptr, nb);
+	size_t nc = u8_runelen(ptr, nb);
 	size_t newsz = nc*sizeof(Rune);
 	if(term)
 		newsz += sizeof(Rune);
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -38,6 +38,15 @@
 	return trailingBytesForUTF8[(uint8_t)s[0]] + 1;
 }
 
+size_t
+u8_runelen(const char *s, size_t nb)
+{
+	size_t nr, i;
+	for(i = nr = 0; i < nb; nr++)
+		i += trailingBytesForUTF8[(uint8_t)s[i]] + 1;
+	return nr;
+}
+
 /* byte offset => charnum */
 size_t
 u8_charnum(const char *s, size_t offset)
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -12,6 +12,9 @@
 /* returns length of next utf-8 sequence */
 size_t u8_seqlen(const char *s) fl_purefn;
 
+/* length of a utf-8 string in runes */
+size_t u8_runelen(const char *s, size_t nb) fl_purefn;
+
 char read_escape_control_char(char c) fl_constfn;
 
 /* given a wide character, convert it to an ASCII escape sequence stored in
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -492,6 +492,9 @@
   (io-close b))
 
 (define s "привет\0пока")
+
+(assert (equal? s (string-encode (string-decode s))))
+
 (assert (eq? 21 (sizeof s)))
 (assert (eq? 21 (length s)))
 (assert (eq? 11 (string-length s)))