shithub: sl

Download patch

ref: b919a8a8873b9b191c056e8124363b7d9e1d506f
parent: 10ce12b59ef284066aa81d630feb59d8968cd7bd
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Thu Mar 13 20:07:05 EDT 2025

string-split: add optional :trim T/NIL to filter out empty strings

--- a/src/sl.c
+++ b/src/sl.c
@@ -15,7 +15,7 @@
 sl_v sl_commadot, sl_trycatch, sl_backquote;
 sl_v sl_conssym, sl_symbolsym, sl_fixnumsym, sl_vectorsym, sl_builtinsym, sl_vu8sym;
 sl_v sl_defsym, sl_defmacrosym, sl_forsym, sl_setqsym;
-sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym;
+sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym, sl_trimsym;
 sl_v sl_nulsym, sl_alarmsym, sl_backspacesym, sl_tabsym, sl_linefeedsym, sl_newlinesym;
 sl_v sl_vtabsym, sl_pagesym, sl_returnsym, sl_escsym, sl_spacesym, sl_deletesym;
 sl_v sl_errio, sl_errparse, sl_errtype, sl_errarg, sl_errmem;
@@ -1272,6 +1272,7 @@
 	sl_evalsym = csymbol("eval");
 	sl_vu8sym = csymbol("vu8");
 	sl_fnsym = csymbol("fn");
+	sl_trimsym = csymbol(":trim");
 	sl_nulsym = csymbol("nul");
 	sl_alarmsym = csymbol("alarm");
 	sl_backspacesym = csymbol("backspace");
--- a/src/sl.h
+++ b/src/sl.h
@@ -415,7 +415,7 @@
 extern sl_v sl_commadot, sl_trycatch, sl_backquote;
 extern sl_v sl_conssym, sl_symbolsym, sl_fixnumsym, sl_vectorsym, sl_builtinsym, sl_vu8sym;
 extern sl_v sl_defsym, sl_defmacrosym, sl_forsym, sl_setqsym;
-extern sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym;
+extern sl_v sl_booleansym, sl_nullsym, sl_evalsym, sl_fnsym, sl_trimsym;
 extern sl_v sl_nulsym, sl_alarmsym, sl_backspacesym, sl_tabsym, sl_linefeedsym, sl_newlinesym;
 extern sl_v sl_vtabsym, sl_pagesym, sl_returnsym, sl_escsym, sl_spacesym, sl_deletesym;
 extern sl_v sl_errio, sl_errparse, sl_errtype, sl_errarg, sl_errmem;
--- a/src/string.c
+++ b/src/string.c
@@ -141,11 +141,30 @@
 
 BUILTIN("string-split", string_split)
 {
-	argcount(nargs, 2);
+	if(nargs < 1)
+		argcount(nargs, 1);
 	char *s = tostring(args[0]);
-	char *delim = tostring(args[1]);
 	usize len = cv_len(ptr(args[0]));
-	usize dlen = cv_len(ptr(args[1]));
+	// split on whitespace by default
+	const char *delim0 = " \t\n\r\v", *delim = delim0;
+	usize dlen = 5;
+	int n = 1;
+	// second is either a :trim or a separator
+	if(nargs > n && args[n] != sl_trimsym){
+		delim = tostring(args[n]);
+		dlen = cv_len(ptr(args[n]));
+		n++;
+	}
+	bool trim = false;
+	// it can only be a :trim X now
+	if(nargs > n){
+		if(args[n] != sl_trimsym)
+			lerrorf(sl_errarg, "invalid argument at position %d", n);
+		n++;
+		if(nargs <= n)
+			argcount(nargs, n+1);
+		trim = args[n] != sl_nil;
+	}
 	usize ssz, tokend, tokstart, i = 0;
 	sl_v first = sl_nil, c = sl_nil, last;
 	usize junk;
@@ -155,15 +174,20 @@
 	do{
 		// find and allocate next token
 		tokstart = tokend = i;
-		while(i < len && !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
+		while(i < len && !u8_memchr((char*)delim, u8_nextmemchar(s, &i), dlen, &junk))
 			tokend = i;
 		ssz = tokend - tokstart;
+		if(ssz == 0 && trim)
+			continue;
+
 		last = c; // save previous cons cell
 		c = mk_cons(cvalue_string(ssz), sl_nil);
 
-		// we've done allocation; reload movable pointers
-		s = cvalue_data(args[0]);
-		delim = cvalue_data(args[1]);
+		if(delim != delim0){
+			// we've done allocation; reload movable pointers
+			s = cvalue_data(args[0]);
+			delim = cvalue_data(args[1]);
+		}
 
 		if(ssz)
 			memmove(cvalue_data(car_(c)), &s[tokstart], ssz);
@@ -177,7 +201,7 @@
 		// note this tricky condition: if the string ends with a
 		// delimiter, we need to go around one more time to add an
 		// empty string. this happens when (i == len && tokend < i)
-	}while(i < len || (i == len && (tokend != i)));
+	}while(i < len || (i == len && tokend != i));
 	sl_free_gc_handles(2);
 	return first;
 }
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -507,6 +507,7 @@
   (io-close b))
 
 (def s "привет\0пока")
+(def s2 "hello       \t   \n world\n ")
 
 (assert (equal? s (string-encode (string-decode s))))
 (assert (equal? (string s "\0") (string-encode (string-decode s t))))
@@ -551,6 +552,11 @@
 (assert (equal? '("привет" "пока") (string-split s "\0")))
 (assert (equal? '("пр" "вет" "пок" "") (string-split s "аи\0")))
 (assert (equal? '("" "") (string-split "1" "1")))
+
+(assert (equal? '("hello" "world") (string-split s2 :trim T)))
+(assert (equal? '("hello" "\t" "\n" "world\n") (string-split s2 " " :trim T)))
+(assert (equal? (list s2) (string-split s2 "X" :trim T)))
+(assert (equal? (list s2) (string-split s2 "X")))
 
 (assert (equal? #\а (string-char s 10)))
 (assert (equal? #\nul (string-char s 6)))