ref: b919a8a8873b9b191c056e8124363b7d9e1d506f
parent: 10ce12b59ef284066aa81d630feb59d8968cd7bd
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Thu Mar 13 20:07:05 EDT 2025
string-split: add optional :trim T/NIL to filter out empty strings
--- a/src/sl.c
+++ b/src/sl.c
@@ -15,7 +15,7 @@
sl_v sl_commadot, sl_trycatch, sl_backquote;
sl_v sl_conssym, sl_symbolsym, sl_fixnumsym, sl_vectorsym, sl_builtinsym, sl_vu8sym;
sl_v sl_defsym, sl_defmacrosym, sl_forsym, sl_setqsym;
-sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym;
+sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym, sl_trimsym;
sl_v sl_nulsym, sl_alarmsym, sl_backspacesym, sl_tabsym, sl_linefeedsym, sl_newlinesym;
sl_v sl_vtabsym, sl_pagesym, sl_returnsym, sl_escsym, sl_spacesym, sl_deletesym;
sl_v sl_errio, sl_errparse, sl_errtype, sl_errarg, sl_errmem;
@@ -1272,6 +1272,7 @@
sl_evalsym = csymbol("eval");
sl_vu8sym = csymbol("vu8");
sl_fnsym = csymbol("fn");
+ sl_trimsym = csymbol(":trim");
sl_nulsym = csymbol("nul");
sl_alarmsym = csymbol("alarm");
sl_backspacesym = csymbol("backspace");
--- a/src/sl.h
+++ b/src/sl.h
@@ -415,7 +415,7 @@
extern sl_v sl_commadot, sl_trycatch, sl_backquote;
extern sl_v sl_conssym, sl_symbolsym, sl_fixnumsym, sl_vectorsym, sl_builtinsym, sl_vu8sym;
extern sl_v sl_defsym, sl_defmacrosym, sl_forsym, sl_setqsym;
-extern sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym;
+extern sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym, sl_trimsym;
extern sl_v sl_nulsym, sl_alarmsym, sl_backspacesym, sl_tabsym, sl_linefeedsym, sl_newlinesym;
extern sl_v sl_vtabsym, sl_pagesym, sl_returnsym, sl_escsym, sl_spacesym, sl_deletesym;
extern sl_v sl_errio, sl_errparse, sl_errtype, sl_errarg, sl_errmem;
--- a/src/string.c
+++ b/src/string.c
@@ -141,11 +141,30 @@
BUILTIN("string-split", string_split)
{
- argcount(nargs, 2);
+ if(nargs < 1)
+ argcount(nargs, 1);
char *s = tostring(args[0]);
- char *delim = tostring(args[1]);
usize len = cv_len(ptr(args[0]));
- usize dlen = cv_len(ptr(args[1]));
+ // split on whitespace by default
+ const char *delim0 = " \t\n\r\v", *delim = delim0;
+ usize dlen = 5;
+ int n = 1;
+ // second is either a :trim or a separator
+ if(nargs > n && args[n] != sl_trimsym){
+ delim = tostring(args[n]);
+ dlen = cv_len(ptr(args[n]));
+ n++;
+ }
+ bool trim = false;
+ // it can only be a :trim X now
+ if(nargs > n){
+ if(args[n] != sl_trimsym)
+ lerrorf(sl_errarg, "invalid argument at position %d", n);
+ n++;
+ if(nargs <= n)
+ argcount(nargs, n+1);
+ trim = args[n] != sl_nil;
+ }
usize ssz, tokend, tokstart, i = 0;
sl_v first = sl_nil, c = sl_nil, last;
usize junk;
@@ -155,15 +174,20 @@
do{
// find and allocate next token
tokstart = tokend = i;
- while(i < len && !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
+ while(i < len && !u8_memchr((char*)delim, u8_nextmemchar(s, &i), dlen, &junk))
tokend = i;
ssz = tokend - tokstart;
+ if(ssz == 0 && trim)
+ continue;
+
last = c; // save previous cons cell
c = mk_cons(cvalue_string(ssz), sl_nil);
- // we've done allocation; reload movable pointers
- s = cvalue_data(args[0]);
- delim = cvalue_data(args[1]);
+ if(delim != delim0){
+ // we've done allocation; reload movable pointers
+ s = cvalue_data(args[0]);
+ delim = cvalue_data(args[1]);
+ }
if(ssz)
memmove(cvalue_data(car_(c)), &s[tokstart], ssz);
@@ -177,7 +201,7 @@
// note this tricky condition: if the string ends with a
// delimiter, we need to go around one more time to add an
// empty string. this happens when (i == len && tokend < i)
- }while(i < len || (i == len && (tokend != i)));
+ }while(i < len || (i == len && tokend != i));
sl_free_gc_handles(2);
return first;
}
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -507,6 +507,7 @@
(io-close b))
(def s "привет\0пока")
+(def s2 "hello \t \n world\n ")
(assert (equal? s (string-encode (string-decode s))))
(assert (equal? (string s "\0") (string-encode (string-decode s t))))
@@ -551,6 +552,11 @@
(assert (equal? '("привет" "пока") (string-split s "\0")))
(assert (equal? '("пр" "вет" "пок" "") (string-split s "аи\0")))
(assert (equal? '("" "") (string-split "1" "1")))
+
+(assert (equal? '("hello" "world") (string-split s2 :trim T)))
+(assert (equal? '("hello" "\t" "\n" "world\n") (string-split s2 " " :trim T)))
+(assert (equal? (list s2) (string-split s2 "X" :trim T)))
+(assert (equal? (list s2) (string-split s2 "X")))
(assert (equal? #\а (string-char s 10)))
(assert (equal? #\nul (string-char s 6)))