ref: a186c3bafb15cb6e95867848889e57537fd083ae
parent: a32850e030c8672030ab743b3dda31c90f925f18
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Mon Mar 17 21:44:44 EDT 2025
str-find: index using runes instead of bytes References: https://todo.sr.ht/~ft/sl/30
--- a/src/str.c
+++ b/src/str.c
@@ -289,40 +289,45 @@
BUILTIN("str-find", str_find)
{
- char cbuf[UTFmax+1];
- usize start = 0;
+ usize startrune = 0;
if(nargs == 3)
- start = tosize(args[2]);
+ startrune = tosize(args[2]);
else
argcount(nargs, 2);
char *s = tostr(args[0]);
- usize len = cv_len(ptr(args[0]));
- if(start > len)
- bounds_error(args[0], args[2]);
- char *needle; usize needlesz;
+ usize sbytes = cv_len(ptr(args[0]));
+ // needle
+ char *nd;
+ usize ndbytes;
sl_v v = args[1];
+ char rbuf[UTFmax+1];
sl_cprim *cp = ptr(v);
if(iscprim(v) && cp_class(cp) == sl_runetype){
Rune r = *(Rune*)cp_data(cp);
- needlesz = runetochar(cbuf, &r);
- needle = cbuf;
- needle[needlesz] = 0;
+ ndbytes = runetochar(rbuf, &r);
+ nd = rbuf;
+ nd[ndbytes] = 0;
}else if(sl_isstr(v)){
csl_v *cv = ptr(v);
- needlesz = cv_len(cv);
- needle = (char*)cv_data(cv);
+ ndbytes = cv_len(cv);
+ nd = (char*)cv_data(cv);
}else{
- type_error("str", args[1]);
+ type_error("str or rune", args[1]);
}
- if(needlesz > len-start)
- return sl_nil;
- if(needlesz == 0)
- return size_wrap(start);
- usize i;
- for(i = start; i < len-needlesz+1; i++){
- if(s[i] == needle[0] && memcmp(&s[i+1], needle+1, needlesz-1) == 0)
- return size_wrap(i);
+ if(ndbytes == 0)
+ return size_wrap(startrune);
+ usize i, n;
+ // first iterate to the starting rune
+ for(i = n = 0; n < startrune && i < sbytes; n++)
+ i += u8_seqlen(s+i);
+ if(n != startrune)
+ bounds_error(args[0], fixnum(startrune));
+ // now search for the needle
+ for(; i < sbytes-ndbytes+1; n++){
+ if(s[i] == nd[0] && memcmp(&s[i+1], nd+1, ndbytes-1) == 0)
+ return size_wrap(n);
+ i += u8_seqlen(s+i);
}
return sl_nil;
}
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -596,20 +596,20 @@
(assert (rune-title-case? #\Dž))
(def s "hello й goodbye")
-(assert (= 6 (str-find s #\й)))
-(assert (= 6 (str-find s #\й 6)))
-(assert-fail (str-find s #\o -1))
-(assert (not (str-find s #\o 16)))
-(assert-fail (str-find s #\o 17))
(assert (= 4 (str-find s #\o)))
-(assert (= 10 (str-find s #\o 5)))
-(assert (= 11 (str-find s #\o 11)))
-(assert (not (str-find s #\o 12)))
+(assert (= 9 (str-find s #\o 5)))
+(assert (= 10 (str-find s #\o 10)))
+(assert (not (str-find s #\o 11)))
+(assert (not (str-find s #\o 15)))
(assert (= 4 (str-find s "o")))
(assert (= 2 (str-find s "ll")))
(assert (not (str-find s "ll" 3)))
(assert (= 0 (str-find s "")))
(assert (= 7 (str-find s "" 7)))
+(assert (= 6 (str-find s #\й)))
+(assert (= 6 (str-find s #\й 6)))
+(assert-fail (str-find s #\o -1))
+(assert-fail (str-find s #\o 16))
(assert-fail (str-find s 0))
(assert-fail (str-find s (byte #\o)))