ref: 336696822979453b4977b9e07d2f6d308e3edde7
dir: /string.c/
/*
string functions
*/
#include "llt.h"
#include "flisp.h"
BUILTIN("string?", stringp)
{
argcount(nargs, 1);
return fl_isstring(args[0]) ? FL_T : FL_F;
}
BUILTIN("string.count", string_count)
{
size_t start = 0;
if (nargs < 1 || nargs > 3)
argcount(nargs, 1);
if (!fl_isstring(args[0]))
type_error("string", args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t stop = len;
if (nargs > 1) {
start = toulong(args[1]);
if (start > len)
bounds_error(args[0], args[1]);
if (nargs > 2) {
stop = toulong(args[2]);
if (stop > len)
bounds_error(args[0], args[2]);
if (stop <= start)
return fixnum(0);
}
}
char *str = cvalue_data(args[0]);
return size_wrap(u8_charnum(str+start, stop-start));
}
BUILTIN("string.width", string_width)
{
argcount(nargs, 1);
if (iscprim(args[0])) {
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (cp_class(cp) == wchartype) {
int w = wcwidth(*(wchar_t*)cp_data(cp));
if (w < 0)
return FL_F;
return fixnum(w);
}
}
char *s = tostring(args[0]);
return size_wrap(u8_strwidth(s));
}
BUILTIN("string.reverse", string_reverse)
{
argcount(nargs, 1);
if (!fl_isstring(args[0]))
type_error("string", args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
value_t ns = cvalue_string(len);
u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len);
return ns;
}
BUILTIN("string.encode", string_encode)
{
argcount(nargs, 1);
if (iscvalue(args[0])) {
cvalue_t *cv = (cvalue_t*)ptr(args[0]);
fltype_t *t = cv_class(cv);
if (t->eltype == wchartype) {
size_t nc = cv_len(cv) / sizeof(uint32_t);
uint32_t *ptr = (uint32_t*)cv_data(cv);
size_t nbytes = u8_codingsize(ptr, nc);
value_t str = cvalue_string(nbytes);
ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
u8_toutf8(cvalue_data(str), nbytes, ptr, nc);
return str;
}
}
type_error("wchar array", args[0]);
}
BUILTIN("string.decode", string_decode)
{
int term=0;
if (nargs == 2) {
term = (args[1] != FL_F);
}
else {
argcount(nargs, 1);
}
if (!fl_isstring(args[0]))
type_error("string", args[0]);
cvalue_t *cv = (cvalue_t*)ptr(args[0]);
char *ptr = (char*)cv_data(cv);
size_t nb = cv_len(cv);
size_t nc = u8_charnum(ptr, nb);
size_t newsz = nc*sizeof(uint32_t);
if (term) newsz += sizeof(uint32_t);
value_t wcstr = cvalue(wcstringtype, newsz);
ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
uint32_t *pwc = cvalue_data(wcstr);
u8_toucs(pwc, nc, ptr, nb);
if (term) pwc[nc] = 0;
return wcstr;
}
extern BUILTIN("buffer", buffer);
extern value_t stream_to_string(value_t *ps);
BUILTIN("string", string)
{
if (nargs == 1 && fl_isstring(args[0]))
return args[0];
value_t arg, buf = fn_builtin_buffer(nil, 0);
fl_gc_handle(&buf);
ios_t *s = value2c(ios_t*,buf);
int i;
value_t oldpr = symbol_value(printreadablysym);
value_t oldpp = symbol_value(printprettysym);
set(printreadablysym, FL_F);
set(printprettysym, FL_F);
FOR_ARGS(i,0,arg,args) {
USED(arg);
fl_print(s, args[i]);
}
set(printreadablysym, oldpr);
set(printprettysym, oldpp);
value_t outp = stream_to_string(&buf);
fl_free_gc_handles(1);
return outp;
}
BUILTIN("string.split", string_split)
{
argcount(nargs, 2);
char *s = tostring(args[0]);
char *delim = tostring(args[1]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t dlen = cv_len((cvalue_t*)ptr(args[1]));
size_t ssz, tokend, tokstart, i=0;
value_t first=FL_NIL, c=FL_NIL, last;
size_t junk;
fl_gc_handle(&first);
fl_gc_handle(&last);
do {
// find and allocate next token
tokstart = tokend = i;
while (i < len &&
!u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
tokend = i;
ssz = tokend - tokstart;
last = c; // save previous cons cell
c = fl_cons(cvalue_string(ssz), FL_NIL);
// we've done allocation; reload movable pointers
s = cv_data((cvalue_t*)ptr(args[0]));
delim = cv_data((cvalue_t*)ptr(args[1]));
if (ssz) memmove(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz);
// link new cell
if (last == FL_NIL)
first = c; // first time, save first cons
else
((cons_t*)ptr(last))->cdr = c;
// note this tricky condition: if the string ends with a
// delimiter, we need to go around one more time to add an
// empty string. this happens when (i==len && tokend<i)
} while (i < len || (i==len && (tokend!=i)));
fl_free_gc_handles(2);
return first;
}
BUILTIN("string.sub", string_sub)
{
if (nargs != 2)
argcount(nargs, 3);
char *s = tostring(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t i1, i2;
i1 = toulong(args[1]);
if (i1 > len)
bounds_error(args[0], args[1]);
if (nargs == 3) {
i2 = toulong(args[2]);
if (i2 > len)
bounds_error(args[0], args[2]);
}
else {
i2 = len;
}
if (i2 <= i1)
return cvalue_string(0);
value_t ns = cvalue_string(i2-i1);
memmove(cv_data((cvalue_t*)ptr(ns)), &s[i1], i2-i1);
return ns;
}
BUILTIN("string.char", string_char)
{
argcount(nargs, 2);
char *s = tostring(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t i = toulong(args[1]);
if (i >= len)
bounds_error(args[0], args[1]);
size_t sl = u8_seqlen(&s[i]);
if (sl > len || i > len-sl)
bounds_error(args[0], args[1]);
return mk_wchar(u8_nextchar(s, &i));
}
BUILTIN("char.upcase", char_upcase)
{
argcount(nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("wchar", args[0]);
return mk_wchar(towupper(*(int32_t*)cp_data(cp)));
}
BUILTIN("char.downcase", char_downcase)
{
argcount(nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("wchar", args[0]);
return mk_wchar(towlower(*(int32_t*)cp_data(cp)));
}
BUILTIN("char-alphabetic?", char_alphabeticp)
{
argcount(nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("wchar", args[0]);
return iswalpha(*(int32_t*)cp_data(cp)) ? FL_T : FL_F;
}
static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
{
char *p = memchr(s+start, c, len-start);
if (p == nil)
return FL_F;
return size_wrap((size_t)(p - s));
}
BUILTIN("string.find", string_find)
{
char cbuf[8];
size_t start = 0;
if (nargs == 3)
start = toulong(args[2]);
else
argcount(nargs, 2);
char *s = tostring(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
if (start > len)
bounds_error(args[0], args[2]);
char *needle; size_t needlesz;
value_t v = args[1];
cprim_t *cp = (cprim_t*)ptr(v);
if (iscprim(v) && cp_class(cp) == wchartype) {
uint32_t c = *(uint32_t*)cp_data(cp);
if (c <= 0x7f)
return mem_find_byte(s, (char)c, start, len);
needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1);
needle = cbuf;
}
else if (iscprim(v) && cp_class(cp) == bytetype) {
return mem_find_byte(s, *(char*)cp_data(cp), start, len);
}
else if (fl_isstring(v)) {
cvalue_t *cv = (cvalue_t*)ptr(v);
needlesz = cv_len(cv);
needle = (char*)cv_data(cv);
}
else {
type_error("string", args[1]);
}
if (needlesz > len-start)
return FL_F;
else if (needlesz == 1)
return mem_find_byte(s, needle[0], start, len);
else if (needlesz == 0)
return size_wrap(start);
size_t i;
for(i=start; i < len-needlesz+1; i++) {
if (s[i] == needle[0]) {
if (!memcmp(&s[i+1], needle+1, needlesz-1))
return size_wrap(i);
}
}
return FL_F;
}
BUILTIN("string.inc", string_inc)
{
if (nargs < 2 || nargs > 3)
argcount(nargs, 2);
char *s = tostring(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t i = toulong(args[1]);
size_t cnt = 1;
if (nargs == 3)
cnt = toulong(args[2]);
while (cnt--) {
if (i >= len)
bounds_error(args[0], args[1]);
(void)(isutf(s[++i]) || isutf(s[++i]) || isutf(s[++i]) || ++i);
}
return size_wrap(i);
}
BUILTIN("string.dec", string_dec)
{
if (nargs < 2 || nargs > 3)
argcount(nargs, 2);
char *s = tostring(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t i = toulong(args[1]);
size_t cnt = 1;
if (nargs == 3)
cnt = toulong(args[2]);
// note: i is allowed to start at index len
if (i > len)
bounds_error(args[0], args[1]);
while (cnt--) {
if (i == 0)
bounds_error(args[0], args[1]);
(void)(isutf(s[--i]) || isutf(s[--i]) || isutf(s[--i]) || --i);
}
return size_wrap(i);
}
static unsigned long get_radix_arg(value_t arg)
{
unsigned long radix = toulong(arg);
if (radix < 2 || radix > 36)
lerrorf(ArgError, "invalid radix");
return radix;
}
BUILTIN("number->string", number_2_string)
{
if (nargs < 1 || nargs > 2)
argcount(nargs, 2);
value_t n = args[0];
int neg = 0;
uint64_t num;
if (isfixnum(n)) num = numval(n);
else if (!iscprim(n)) type_error("integer", n);
else num = conv_to_uint64(cp_data((cprim_t*)ptr(n)),
cp_numtype((cprim_t*)ptr(n)));
if (numval(fl_compare(args[0],fixnum(0))) < 0) {
num = -num;
neg = 1;
}
unsigned long radix = 10;
if (nargs == 2)
radix = get_radix_arg(args[1]);
char buf[128];
char *str = uint2str(buf, sizeof(buf), num, radix);
if (neg && str > &buf[0])
*(--str) = '-';
return string_from_cstr(str);
}
BUILTIN("string->number", string_2_number)
{
if (nargs < 1 || nargs > 2)
argcount(nargs, 2);
char *str = tostring(args[0]);
value_t n;
unsigned long radix = 0;
if (nargs == 2)
radix = get_radix_arg(args[1]);
if (!isnumtok_base(str, &n, (int)radix))
return FL_F;
return n;
}
BUILTIN("string.isutf8", string_isutf8)
{
argcount(nargs, 1);
char *s = tostring(args[0]);
size_t len = cv_len((cvalue_t*)ptr(args[0]));
return u8_isvalid(s, len) ? FL_T : FL_F;
}