ref: 12c9d2fc728b51aa1eb9a70d0d331eb9464912d9
dir: /src/string.c/
/*
string functions
*/
#include "sl.h"
#include "operators.h"
#include "cvalues.h"
#include "print.h"
#include "read.h"
#include "equal.h"
#include "iostream.h"
sl_purefn
BUILTIN("string?", stringp)
{
argcount(nargs, 1);
return sl_isstring(args[0]) ? sl_t : sl_nil;
}
BUILTIN("string-length", string_length)
{
usize start = 0;
if(nargs < 1 || nargs > 3)
argcount(nargs, 1);
if(!sl_isstring(args[0]))
type_error("string", args[0]);
usize len = cv_len(ptr(args[0]));
usize stop = len;
if(nargs > 1){
start = tosize(args[1]);
if(start > len)
bounds_error(args[0], args[1]);
if(nargs > 2){
stop = tosize(args[2]);
if(stop > len)
bounds_error(args[0], args[2]);
if(stop <= start)
return fixnum(0);
}
}
char *str = cvalue_data(args[0]);
return size_wrap(u8_charnum(str+start, stop-start));
}
BUILTIN("string-width", string_width)
{
argcount(nargs, 1);
if(iscprim(args[0])){
sl_cprim *cp = ptr(args[0]);
if(cp_class(cp) == sl_runetype){
int w = sl_wcwidth(*(Rune*)cp_data(cp));
return w < 0 ? sl_nil : fixnum(w);
}
}
if(!sl_isstring(args[0]))
type_error("string", args[0]);
char *str = tostring(args[0]);
usize len = cv_len(ptr(args[0]));
ssize w = u8_strwidth(str, len);
return w < 0 ? sl_nil : size_wrap(w);
}
BUILTIN("string-reverse", string_reverse)
{
argcount(nargs, 1);
if(!sl_isstring(args[0]))
type_error("string", args[0]);
usize len = cv_len(ptr(args[0]));
sl_v ns = cvalue_string(len);
u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len);
return ns;
}
BUILTIN("string-encode", string_encode)
{
argcount(nargs, 1);
if(iscvalue(args[0])){
csl_v *cv = ptr(args[0]);
sl_type *t = cv_class(cv);
if(t->eltype == sl_runetype){
usize nr = cv_len(cv) / sizeof(Rune);
Rune *r = (Rune*)cv_data(cv);
usize nb = runenlen(r, nr);
sl_v str = cvalue_string(nb);
char *s = cvalue_data(str);
for(usize i = 0; i < nr; i++)
s += runetochar(s, r+i);
return str;
}
}
type_error("rune array", args[0]);
}
BUILTIN("string-decode", string_decode)
{
bool term = false;
if(nargs == 2)
term = args[1] != sl_nil;
else
argcount(nargs, 1);
if(!sl_isstring(args[0]))
type_error("string", args[0]);
csl_v *cv = ptr(args[0]);
char *ptr = (char*)cv_data(cv);
usize nb = cv_len(cv);
usize nc = u8_runelen(ptr, nb);
usize newsz = nc*sizeof(Rune);
if(term)
newsz += sizeof(Rune);
sl_v runestr = cvalue(sl_runestringtype, newsz);
ptr = cvalue_data(args[0]); // relocatable pointer
Rune *r = cvalue_data(runestr);
for(usize i = 0; i < nb; i++)
ptr += chartorune(r+i, ptr);
if(term)
r[nb] = 0;
return runestr;
}
BUILTIN("string", string)
{
if(nargs == 1 && sl_isstring(args[0]))
return args[0];
sl_v arg, buf = fn_builtin_buffer(nil, 0);
sl_gc_handle(&buf);
sl_ios *s = value2c(sl_ios*, buf);
sl_v oldpr = symbol_value(sl_printreadablysym);
sl_v oldpp = symbol_value(sl_printprettysym);
set(sl_printreadablysym, sl_nil);
set(sl_printprettysym, sl_nil);
int i;
FOR_ARGS(i, 0, arg, args){
USED(arg);
sl_print(s, args[i]);
}
set(sl_printreadablysym, oldpr);
set(sl_printprettysym, oldpp);
sl_v outp = stream_to_string(&buf);
sl_free_gc_handles(1);
return outp;
}
BUILTIN("string-split", string_split)
{
argcount(nargs, 2);
char *s = tostring(args[0]);
char *delim = tostring(args[1]);
usize len = cv_len(ptr(args[0]));
usize dlen = cv_len(ptr(args[1]));
usize ssz, tokend, tokstart, i = 0;
sl_v first = sl_nil, c = sl_nil, last;
usize junk;
sl_gc_handle(&first);
sl_gc_handle(&last);
do{
// find and allocate next token
tokstart = tokend = i;
while(i < len && !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
tokend = i;
ssz = tokend - tokstart;
last = c; // save previous cons cell
c = mk_cons(cvalue_string(ssz), sl_nil);
// we've done allocation; reload movable pointers
s = cvalue_data(args[0]);
delim = cvalue_data(args[1]);
if(ssz)
memmove(cvalue_data(car_(c)), &s[tokstart], ssz);
// link new cell
if(last == sl_nil)
first = c; // first time, save first cons
else
((sl_cons*)ptr(last))->cdr = c;
// note this tricky condition: if the string ends with a
// delimiter, we need to go around one more time to add an
// empty string. this happens when (i == len && tokend < i)
}while(i < len || (i == len && (tokend != i)));
sl_free_gc_handles(2);
return first;
}
BUILTIN("string-sub", string_sub)
{
if(nargs != 2)
argcount(nargs, 3);
char *s = tostring(args[0]);
usize lenbytes = cv_len(ptr(args[0]));
usize startbytes, n, startchar = tosize(args[1]);
for(startbytes = n = 0; n < startchar && startbytes < lenbytes; n++)
startbytes += u8_seqlen(s+startbytes);
if(n != startchar)
bounds_error(args[0], args[1]);
usize endbytes = lenbytes;
if(nargs == 3){
usize endchar = tosize(args[2]);
for(endbytes = startbytes; n < endchar && endbytes < lenbytes; n++)
endbytes += u8_seqlen(s+endbytes);
if(n != endchar)
bounds_error(args[0], args[2]);
}
sl_v ns = cvalue_string(endbytes-startbytes);
s = cvalue_data(args[0]); // reload after alloc
memmove(cvalue_data(ns), s+startbytes, endbytes-startbytes);
return ns;
}
BUILTIN("string-char", string_char)
{
argcount(nargs, 2);
char *s = tostring(args[0]);
usize lenbytes = cv_len(ptr(args[0]));
usize startbytes, n, startchar = tosize(args[1]);
for(startbytes = n = 0; n < startchar && startbytes < lenbytes; n++)
startbytes += u8_seqlen(s+startbytes);
if(n != startchar || startbytes >= lenbytes)
bounds_error(args[0], args[1]);
Rune r;
chartorune(&r, s+startbytes);
return mk_rune(r);
}
BUILTIN("char-upcase", char_upcase)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return mk_rune(toupperrune(*(Rune*)cp_data(cp)));
}
BUILTIN("char-downcase", char_downcase)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return mk_rune(tolowerrune(*(Rune*)cp_data(cp)));
}
BUILTIN("char-titlecase", char_titlecase)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return mk_rune(totitlerune(*(Rune*)cp_data(cp)));
}
sl_purefn
BUILTIN("char-alphabetic?", char_alphabeticp)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return isalpharune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}
sl_purefn
BUILTIN("char-lower-case?", char_lower_casep)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return islowerrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}
sl_purefn
BUILTIN("char-upper-case?", char_upper_casep)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return isupperrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}
sl_purefn
BUILTIN("char-title-case?", char_title_casep)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return istitlerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}
sl_purefn
BUILTIN("char-numeric?", char_numericp)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return isdigitrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}
sl_purefn
BUILTIN("char-whitespace?", char_whitespacep)
{
argcount(nargs, 1);
sl_cprim *cp = ptr(args[0]);
if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
type_error("rune", args[0]);
return isspacerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}
BUILTIN("string-find", string_find)
{
char cbuf[UTFmax+1];
usize start = 0;
if(nargs == 3)
start = tosize(args[2]);
else
argcount(nargs, 2);
char *s = tostring(args[0]);
usize len = cv_len(ptr(args[0]));
if(start > len)
bounds_error(args[0], args[2]);
char *needle; usize needlesz;
sl_v v = args[1];
sl_cprim *cp = ptr(v);
if(iscprim(v) && cp_class(cp) == sl_runetype){
Rune r = *(Rune*)cp_data(cp);
needlesz = runetochar(cbuf, &r);
needle = cbuf;
needle[needlesz] = 0;
}else if(iscprim(v) && cp_class(cp) == sl_bytetype){
needlesz = 1;
needle = cbuf;
needle[0] = *(char*)cp_data(cp);
needle[needlesz] = 0;
}else if(sl_isstring(v)){
csl_v *cv = ptr(v);
needlesz = cv_len(cv);
needle = (char*)cv_data(cv);
}else{
type_error("string", args[1]);
}
if(needlesz > len-start)
return sl_nil;
if(needlesz == 0)
return size_wrap(start);
usize i;
for(i = start; i < len-needlesz+1; i++){
if(s[i] == needle[0] && memcmp(&s[i+1], needle+1, needlesz-1) == 0)
return size_wrap(i);
}
return sl_nil;
}
static unsigned long
get_radix_arg(sl_v arg)
{
unsigned long radix = tosize(arg);
if(radix < 2 || radix > 36)
lerrorf(sl_errarg, "invalid radix");
return radix;
}
BUILTIN("number->string", number_2_string)
{
if(nargs < 1 || nargs > 2)
argcount(nargs, 2);
sl_v n = args[0];
bool neg = false;
u64int num;
int radix = 10;
if(nargs == 2)
radix = get_radix_arg(args[1]);
if(isfixnum(n))
num = numval(n);
else if(iscprim(n)){
void *data = ptr(n);
if(cp_numtype(data) < T_FLOAT)
num = conv_to_u64(cp_data(data), cp_numtype(data));
else if(radix != 10)
lerrorf(sl_errarg, "invalid radix with floating point");
else
return fn_builtin_string(args, nargs);
}else if(ismp(n)){
if(radix != 16 && radix != 10 && radix != 8 && radix != 4 && radix != 2)
lerrorf(sl_errarg, "invalid radix with bignum");
mpint *i = tomp(n);
char *s = mptoa(i, radix, nil, 0);
assert(s != nil);
if(radix == 16){ /* FFFF → ffff */
for(int k = 0; s[k]; k++)
s[k] = tolower(s[k]);
}
n = string_from_cstr(s);
MEM_FREE(s);
return n;
}else{
type_error("integer", n);
}
if(numval(sl_compare(args[0], fixnum(0), false)) < 0){
num = -num;
neg = true;
}
char buf[128], *str = uint2str(buf, sizeof(buf), num, radix);
if(neg && str > buf)
*(--str) = '-';
return string_from_cstr(str);
}
BUILTIN("string->number", string_2_number)
{
if(nargs < 1 || nargs > 2)
argcount(nargs, 2);
char *str = tostring(args[0]);
sl_v n;
unsigned long radix = 0;
if(nargs == 2)
radix = get_radix_arg(args[1]);
if(!sl_read_numtok(str, &n, (int)radix))
return sl_nil;
return n;
}
sl_purefn
BUILTIN("string-utf8?", string_utf8p)
{
argcount(nargs, 1);
char *s = tostring(args[0]);
usize len = cv_len(ptr(args[0]));
return u8_isvalid(s, len) ? sl_t : sl_nil;
}