shithub: sl

Download patch

ref: a186c3bafb15cb6e95867848889e57537fd083ae
parent: a32850e030c8672030ab743b3dda31c90f925f18
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Mon Mar 17 21:44:44 EDT 2025

str-find: index using runes instead of bytes

References: https://todo.sr.ht/~ft/sl/30

--- a/src/str.c
+++ b/src/str.c
@@ -289,40 +289,45 @@
 
 BUILTIN("str-find", str_find)
 {
-	char cbuf[UTFmax+1];
-	usize start = 0;
+	usize startrune = 0;
 	if(nargs == 3)
-		start = tosize(args[2]);
+		startrune = tosize(args[2]);
 	else
 		argcount(nargs, 2);
 	char *s = tostr(args[0]);
-	usize len = cv_len(ptr(args[0]));
-	if(start > len)
-		bounds_error(args[0], args[2]);
-	char *needle; usize needlesz;
+	usize sbytes = cv_len(ptr(args[0]));
 
+	// needle
+	char *nd;
+	usize ndbytes;
 	sl_v v = args[1];
+	char rbuf[UTFmax+1];
 	sl_cprim *cp = ptr(v);
 	if(iscprim(v) && cp_class(cp) == sl_runetype){
 		Rune r = *(Rune*)cp_data(cp);
-		needlesz = runetochar(cbuf, &r);
-		needle = cbuf;
-		needle[needlesz] = 0;
+		ndbytes = runetochar(rbuf, &r);
+		nd = rbuf;
+		nd[ndbytes] = 0;
 	}else if(sl_isstr(v)){
 		csl_v *cv = ptr(v);
-		needlesz = cv_len(cv);
-		needle = (char*)cv_data(cv);
+		ndbytes = cv_len(cv);
+		nd = (char*)cv_data(cv);
 	}else{
-		type_error("str", args[1]);
+		type_error("str or rune", args[1]);
 	}
-	if(needlesz > len-start)
-		return sl_nil;
-	if(needlesz == 0)
-		return size_wrap(start);
-	usize i;
-	for(i = start; i < len-needlesz+1; i++){
-		if(s[i] == needle[0] && memcmp(&s[i+1], needle+1, needlesz-1) == 0)
-			return size_wrap(i);
+	if(ndbytes == 0)
+		return size_wrap(startrune);
+	usize i, n;
+	// first iterate to the starting rune
+	for(i = n = 0; n < startrune && i < sbytes; n++)
+		i += u8_seqlen(s+i);
+	if(n != startrune)
+		bounds_error(args[0], fixnum(startrune));
+	// now search for the needle
+	for(; i < sbytes-ndbytes+1; n++){
+		if(s[i] == nd[0] && memcmp(&s[i+1], nd+1, ndbytes-1) == 0)
+			return size_wrap(n);
+		i += u8_seqlen(s+i);
 	}
 	return sl_nil;
 }
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -596,20 +596,20 @@
 (assert (rune-title-case? #\Dž))
 
 (def s "hello й goodbye")
-(assert (= 6 (str-find s #\й)))
-(assert (= 6 (str-find s #\й 6)))
-(assert-fail (str-find s #\o -1))
-(assert (not (str-find s #\o 16)))
-(assert-fail (str-find s #\o 17))
 (assert (= 4 (str-find s #\o)))
-(assert (= 10 (str-find s #\o 5)))
-(assert (= 11 (str-find s #\o 11)))
-(assert (not (str-find s #\o 12)))
+(assert (= 9 (str-find s #\o 5)))
+(assert (= 10 (str-find s #\o 10)))
+(assert (not (str-find s #\o 11)))
+(assert (not (str-find s #\o 15)))
 (assert (= 4 (str-find s "o")))
 (assert (= 2 (str-find s "ll")))
 (assert (not (str-find s "ll" 3)))
 (assert (= 0 (str-find s "")))
 (assert (= 7 (str-find s "" 7)))
+(assert (= 6 (str-find s #\й)))
+(assert (= 6 (str-find s #\й 6)))
+(assert-fail (str-find s #\o -1))
+(assert-fail (str-find s #\o 16))
 (assert-fail (str-find s 0))
 (assert-fail (str-find s (byte #\o)))