ref: a70379d7e4b822f532fb0a8ccdd1624a90b64a68
dir: /src/str.c/
/* string functions */ #include "sl.h" #include "operators.h" #include "cvalues.h" #include "print.h" #include "read.h" #include "equal.h" #include "io.h" sl_purefn BUILTIN("str?", strp) { argcount(nargs, 1); return sl_isstr(args[0]) ? sl_t : sl_nil; } BUILTIN("str-length", str_length) { usize start = 0; if(nargs < 1 || nargs > 3) argcount(nargs, 1); if(!sl_isstr(args[0])) type_error("str", args[0]); usize len = cv_len(ptr(args[0])); usize stop = len; if(nargs > 1){ start = tosize(args[1]); if(start > len) bounds_error(args[0], args[1]); if(nargs > 2){ stop = tosize(args[2]); if(stop > len) bounds_error(args[0], args[2]); if(stop <= start) return fixnum(0); } } char *str = cvalue_data(args[0]); return size_wrap(u8_charnum(str+start, stop-start)); } BUILTIN("str-width", str_width) { argcount(nargs, 1); if(iscprim(args[0])){ sl_cprim *cp = ptr(args[0]); if(cp_class(cp) == sl_runetype){ int w = sl_wcwidth(*(Rune*)cp_data(cp)); return w < 0 ? sl_nil : fixnum(w); } } if(!sl_isstr(args[0])) type_error("str", args[0]); char *str = tostr(args[0]); usize len = cv_len(ptr(args[0])); ssize w = u8_strwidth(str, len); return w < 0 ? sl_nil : size_wrap(w); } BUILTIN("str-reverse", str_reverse) { argcount(nargs, 1); if(!sl_isstr(args[0])) type_error("str", args[0]); usize len = cv_len(ptr(args[0])); sl_v ns = cvalue_str(len); u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len); return ns; } BUILTIN("str", str) { if(nargs == 1 && sl_isstr(args[0])) return args[0]; sl_v arg, buf = fn_builtin_buffer(nil, 0); sl_gc_handle(&buf); sl_ios *s = value2c(sl_ios*, buf); sl_v oldpr = sym_value(sl_printreadablysym); sl_v oldpp = sym_value(sl_printprettysym); set(sl_printreadablysym, sl_nil); set(sl_printprettysym, sl_nil); int i; FOR_ARGS(i, 0, arg, args){ USED(arg); sl_print(s, args[i]); } set(sl_printreadablysym, oldpr); set(sl_printprettysym, oldpp); sl_v outp = io_to_str(&buf); sl_free_gc_handles(1); return outp; } BUILTIN("str-split", str_split) { if(nargs < 2) argcount(nargs, 1); char *s = tostr(args[0]); usize len = cv_len(ptr(args[0])); // split on whitespace by default const char *delim0 = " \t\n\r\v", *delim = delim0; usize dlen = 5; int n = 1; // second is either a :trim or a separator if(nargs > n && args[n] != sl_trimsym){ delim = tostr(args[n]); dlen = cv_len(ptr(args[n])); n++; } bool trim = false; // it can only be a :trim X now if(nargs > n){ if(args[n] != sl_trimsym) lerrorf(sl_errarg, "invalid argument at position %d", n); n++; if(nargs <= n) argcount(nargs, n+1); trim = args[n] != sl_nil; } usize ssz, tokend, tokstart, i = 0; sl_v first = sl_nil, c = sl_nil, last; usize junk; sl_gc_handle(&first); sl_gc_handle(&last); do{ // find and allocate next token tokstart = tokend = i; while(i < len && !u8_memchr((char*)delim, u8_nextmemchar(s, &i), dlen, &junk)) tokend = i; ssz = tokend - tokstart; if(ssz == 0 && trim) continue; last = c; // save previous cons cell c = mk_cons(cvalue_str(ssz), sl_nil); if(delim != delim0){ // we've done allocation; reload movable pointers s = cvalue_data(args[0]); delim = cvalue_data(args[1]); } if(ssz) memmove(cvalue_data(car_(c)), &s[tokstart], ssz); // link new cell if(last == sl_nil) first = c; // first time, save first cons else ((sl_cons*)ptr(last))->cdr = c; // note this tricky condition: if the string ends with a // delimiter, we need to go around one more time to add an // empty string. this happens when (i == len && tokend < i) }while(i < len || (i == len && tokend != i)); sl_free_gc_handles(2); return first; } BUILTIN("str-sub", str_sub) { if(nargs != 2) argcount(nargs, 3); char *s = tostr(args[0]); usize lenbytes = cv_len(ptr(args[0])); usize startbytes, n, startrune = tosize(args[1]); for(startbytes = n = 0; n < startrune && startbytes < lenbytes; n++) startbytes += u8_seqlen(s+startbytes); if(n != startrune) bounds_error(args[0], args[1]); usize endbytes = lenbytes; if(nargs == 3){ usize endrune = tosize(args[2]); for(endbytes = startbytes; n < endrune && endbytes < lenbytes; n++) endbytes += u8_seqlen(s+endbytes); if(n != endrune) bounds_error(args[0], args[2]); } sl_v ns = cvalue_str(endbytes-startbytes); s = cvalue_data(args[0]); // reload after alloc memmove(cvalue_data(ns), s+startbytes, endbytes-startbytes); return ns; } BUILTIN("str-rune", str_rune) { argcount(nargs, 2); char *s = tostr(args[0]); usize lenbytes = cv_len(ptr(args[0])); usize startbytes, n, startrune = tosize(args[1]); for(startbytes = n = 0; n < startrune && startbytes < lenbytes; n++) startbytes += u8_seqlen(s+startbytes); if(n != startrune || startbytes >= lenbytes) bounds_error(args[0], args[1]); Rune r; chartorune(&r, s+startbytes); return mk_rune(r); } BUILTIN("rune-upcase", rune_upcase) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return mk_rune(toupperrune(*(Rune*)cp_data(cp))); } BUILTIN("rune-downcase", rune_downcase) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return mk_rune(tolowerrune(*(Rune*)cp_data(cp))); } BUILTIN("rune-titlecase", rune_titlecase) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return mk_rune(totitlerune(*(Rune*)cp_data(cp))); } sl_purefn BUILTIN("rune-alphabetic?", rune_alphabeticp) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return isalpharune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil; } sl_purefn BUILTIN("rune-lower-case?", rune_lower_casep) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return islowerrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil; } sl_purefn BUILTIN("rune-upper-case?", rune_upper_casep) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return isupperrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil; } sl_purefn BUILTIN("rune-title-case?", rune_title_casep) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return istitlerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil; } sl_purefn BUILTIN("rune-numeric?", rune_numericp) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return isdigitrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil; } sl_purefn BUILTIN("rune-whitespace?", rune_whitespacep) { argcount(nargs, 1); sl_cprim *cp = ptr(args[0]); if(!iscprim(args[0]) || cp_class(cp) != sl_runetype) type_error("rune", args[0]); return isspacerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil; } BUILTIN("str-find", str_find) { usize startrune = 0; if(nargs == 3) startrune = tosize(args[2]); else argcount(nargs, 2); char *s = tostr(args[0]); usize sbytes = cv_len(ptr(args[0])); if(sbytes == 0) return sl_nil; // needle char *nd; usize ndbytes; sl_v v = args[1]; char rbuf[UTFmax+1]; sl_cprim *cp = ptr(v); if(iscprim(v) && cp_class(cp) == sl_runetype){ Rune r = *(Rune*)cp_data(cp); ndbytes = runetochar(rbuf, &r); nd = rbuf; nd[ndbytes] = 0; }else if(sl_isstr(v)){ sl_cv *cv = ptr(v); ndbytes = cv_len(cv); nd = (char*)cv_data(cv); }else{ type_error("str or rune", args[1]); } if(ndbytes == 0) return size_wrap(startrune); if(ndbytes > sbytes) return sl_nil; usize i, n; // first iterate to the starting rune for(i = n = 0; n < startrune && i < sbytes; n++) i += u8_seqlen(s+i); if(n != startrune) bounds_error(args[0], fixnum(startrune)); // now search for the needle for(; i < sbytes-ndbytes+1; n++){ if(s[i] == nd[0] && memcmp(&s[i+1], nd+1, ndbytes-1) == 0) return size_wrap(n); i += u8_seqlen(s+i); } return sl_nil; } static unsigned long get_radix_arg(sl_v arg) { unsigned long radix = tosize(arg); if(radix < 2 || radix > 36) lerrorf(sl_errarg, "invalid radix"); return radix; } BUILTIN("num->str", numb_2_str) { if(nargs < 1 || nargs > 2) argcount(nargs, 2); sl_v n = args[0]; bool neg = false; u64int num; int radix = 10; if(nargs == 2) radix = get_radix_arg(args[1]); if(isfixnum(n)) num = numval(n); else if(iscprim(n)){ void *data = ptr(n); if(cp_numtype(data) < T_FLOAT) num = conv_to_u64(cp_data(data), cp_numtype(data)); else if(radix != 10) lerrorf(sl_errarg, "invalid radix with floating point"); else return fn_builtin_str(args, nargs); }else if(ismp(n)){ if(radix != 16 && radix != 10 && radix != 8 && radix != 4 && radix != 2) lerrorf(sl_errarg, "invalid radix with bignum"); mpint *i = tomp(n); char *s = mptoa(i, radix, nil, 0); assert(s != nil); if(radix == 16){ /* FFFF → ffff */ for(int k = 0; s[k]; k++) s[k] = tolower(s[k]); } n = str_from_cstr(s); MEM_FREE(s); return n; }else{ type_error("int", n); } if(numval(sl_compare(args[0], fixnum(0), false)) < 0){ num = -num; neg = true; } char buf[128], *str = uint2str(buf, sizeof(buf), num, radix); if(neg && str > buf) *(--str) = '-'; return str_from_cstr(str); } BUILTIN("str->num", str_2_num) { if(nargs < 1 || nargs > 2) argcount(nargs, 2); char *str = tostr(args[0]); sl_v n; unsigned long radix = 0; if(nargs == 2) radix = get_radix_arg(args[1]); if(!sl_read_numtok(str, &n, (int)radix)) return sl_nil; return n; } sl_purefn BUILTIN("str-utf8?", str_utf8p) { argcount(nargs, 1); char *s = tostr(args[0]); usize len = cv_len(ptr(args[0])); return u8_isvalid(s, len) ? sl_t : sl_nil; }