shithub: sl

ref: a70379d7e4b822f532fb0a8ccdd1624a90b64a68
dir: /src/str.c/

View raw version
/*
  string functions
*/
#include "sl.h"
#include "operators.h"
#include "cvalues.h"
#include "print.h"
#include "read.h"
#include "equal.h"
#include "io.h"

sl_purefn
BUILTIN("str?", strp)
{
	argcount(nargs, 1);
	return sl_isstr(args[0]) ? sl_t : sl_nil;
}

BUILTIN("str-length", str_length)
{
	usize start = 0;
	if(nargs < 1 || nargs > 3)
		argcount(nargs, 1);
	if(!sl_isstr(args[0]))
		type_error("str", args[0]);
	usize len = cv_len(ptr(args[0]));
	usize stop = len;
	if(nargs > 1){
		start = tosize(args[1]);
		if(start > len)
			bounds_error(args[0], args[1]);
		if(nargs > 2){
			stop = tosize(args[2]);
			if(stop > len)
				bounds_error(args[0], args[2]);
			if(stop <= start)
				return fixnum(0);
		}
	}
	char *str = cvalue_data(args[0]);
	return size_wrap(u8_charnum(str+start, stop-start));
}

BUILTIN("str-width", str_width)
{
	argcount(nargs, 1);
	if(iscprim(args[0])){
		sl_cprim *cp = ptr(args[0]);
		if(cp_class(cp) == sl_runetype){
			int w = sl_wcwidth(*(Rune*)cp_data(cp));
			return w < 0 ? sl_nil : fixnum(w);
		}
	}
	if(!sl_isstr(args[0]))
		type_error("str", args[0]);
	char *str = tostr(args[0]);
	usize len = cv_len(ptr(args[0]));
	ssize w = u8_strwidth(str, len);
	return w < 0 ? sl_nil : size_wrap(w);
}

BUILTIN("str-reverse", str_reverse)
{
	argcount(nargs, 1);
	if(!sl_isstr(args[0]))
		type_error("str", args[0]);
	usize len = cv_len(ptr(args[0]));
	sl_v ns = cvalue_str(len);
	u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len);
	return ns;
}

BUILTIN("str", str)
{
	if(nargs == 1 && sl_isstr(args[0]))
		return args[0];
	sl_v arg, buf = fn_builtin_buffer(nil, 0);
	sl_gc_handle(&buf);
	sl_ios *s = value2c(sl_ios*, buf);
	sl_v oldpr = sym_value(sl_printreadablysym);
	sl_v oldpp = sym_value(sl_printprettysym);
	set(sl_printreadablysym, sl_nil);
	set(sl_printprettysym, sl_nil);
	int i;
	FOR_ARGS(i, 0, arg, args){
		USED(arg);
		sl_print(s, args[i]);
	}
	set(sl_printreadablysym, oldpr);
	set(sl_printprettysym, oldpp);
	sl_v outp = io_to_str(&buf);
	sl_free_gc_handles(1);
	return outp;
}

BUILTIN("str-split", str_split)
{
	if(nargs < 2)
		argcount(nargs, 1);
	char *s = tostr(args[0]);
	usize len = cv_len(ptr(args[0]));
	// split on whitespace by default
	const char *delim0 = " \t\n\r\v", *delim = delim0;
	usize dlen = 5;
	int n = 1;
	// second is either a :trim or a separator
	if(nargs > n && args[n] != sl_trimsym){
		delim = tostr(args[n]);
		dlen = cv_len(ptr(args[n]));
		n++;
	}
	bool trim = false;
	// it can only be a :trim X now
	if(nargs > n){
		if(args[n] != sl_trimsym)
			lerrorf(sl_errarg, "invalid argument at position %d", n);
		n++;
		if(nargs <= n)
			argcount(nargs, n+1);
		trim = args[n] != sl_nil;
	}
	usize ssz, tokend, tokstart, i = 0;
	sl_v first = sl_nil, c = sl_nil, last;
	usize junk;
	sl_gc_handle(&first);
	sl_gc_handle(&last);

	do{
		// find and allocate next token
		tokstart = tokend = i;
		while(i < len && !u8_memchr((char*)delim, u8_nextmemchar(s, &i), dlen, &junk))
			tokend = i;
		ssz = tokend - tokstart;
		if(ssz == 0 && trim)
			continue;

		last = c; // save previous cons cell
		c = mk_cons(cvalue_str(ssz), sl_nil);

		if(delim != delim0){
			// we've done allocation; reload movable pointers
			s = cvalue_data(args[0]);
			delim = cvalue_data(args[1]);
		}

		if(ssz)
			memmove(cvalue_data(car_(c)), &s[tokstart], ssz);

		// link new cell
		if(last == sl_nil)
			first = c;   // first time, save first cons
		else
			((sl_cons*)ptr(last))->cdr = c;

		// note this tricky condition: if the string ends with a
		// delimiter, we need to go around one more time to add an
		// empty string. this happens when (i == len && tokend < i)
	}while(i < len || (i == len && tokend != i));
	sl_free_gc_handles(2);
	return first;
}

BUILTIN("str-sub", str_sub)
{
	if(nargs != 2)
		argcount(nargs, 3);
	char *s = tostr(args[0]);
	usize lenbytes = cv_len(ptr(args[0]));
	usize startbytes, n, startrune = tosize(args[1]);
	for(startbytes = n = 0; n < startrune && startbytes < lenbytes; n++)
		startbytes += u8_seqlen(s+startbytes);
	if(n != startrune)
		bounds_error(args[0], args[1]);
	usize endbytes = lenbytes;
	if(nargs == 3){
		usize endrune = tosize(args[2]);
		for(endbytes = startbytes; n < endrune && endbytes < lenbytes; n++)
			endbytes += u8_seqlen(s+endbytes);
		if(n != endrune)
			bounds_error(args[0], args[2]);
	}
	sl_v ns = cvalue_str(endbytes-startbytes);
	s = cvalue_data(args[0]); // reload after alloc
	memmove(cvalue_data(ns), s+startbytes, endbytes-startbytes);
	return ns;
}

BUILTIN("str-rune", str_rune)
{
	argcount(nargs, 2);
	char *s = tostr(args[0]);
	usize lenbytes = cv_len(ptr(args[0]));
	usize startbytes, n, startrune = tosize(args[1]);
	for(startbytes = n = 0; n < startrune && startbytes < lenbytes; n++)
		startbytes += u8_seqlen(s+startbytes);
	if(n != startrune || startbytes >= lenbytes)
		bounds_error(args[0], args[1]);
	Rune r;
	chartorune(&r, s+startbytes);
	return mk_rune(r);
}

BUILTIN("rune-upcase", rune_upcase)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return mk_rune(toupperrune(*(Rune*)cp_data(cp)));
}

BUILTIN("rune-downcase", rune_downcase)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return mk_rune(tolowerrune(*(Rune*)cp_data(cp)));
}

BUILTIN("rune-titlecase", rune_titlecase)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return mk_rune(totitlerune(*(Rune*)cp_data(cp)));
}

sl_purefn
BUILTIN("rune-alphabetic?", rune_alphabeticp)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return isalpharune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}

sl_purefn
BUILTIN("rune-lower-case?", rune_lower_casep)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return islowerrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}

sl_purefn
BUILTIN("rune-upper-case?", rune_upper_casep)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return isupperrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}

sl_purefn
BUILTIN("rune-title-case?", rune_title_casep)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return istitlerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}

sl_purefn
BUILTIN("rune-numeric?", rune_numericp)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return isdigitrune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}

sl_purefn
BUILTIN("rune-whitespace?", rune_whitespacep)
{
	argcount(nargs, 1);
	sl_cprim *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != sl_runetype)
		type_error("rune", args[0]);
	return isspacerune(*(Rune*)cp_data(cp)) ? sl_t : sl_nil;
}

BUILTIN("str-find", str_find)
{
	usize startrune = 0;
	if(nargs == 3)
		startrune = tosize(args[2]);
	else
		argcount(nargs, 2);
	char *s = tostr(args[0]);
	usize sbytes = cv_len(ptr(args[0]));
	if(sbytes == 0)
		return sl_nil;

	// needle
	char *nd;
	usize ndbytes;
	sl_v v = args[1];
	char rbuf[UTFmax+1];
	sl_cprim *cp = ptr(v);
	if(iscprim(v) && cp_class(cp) == sl_runetype){
		Rune r = *(Rune*)cp_data(cp);
		ndbytes = runetochar(rbuf, &r);
		nd = rbuf;
		nd[ndbytes] = 0;
	}else if(sl_isstr(v)){
		sl_cv *cv = ptr(v);
		ndbytes = cv_len(cv);
		nd = (char*)cv_data(cv);
	}else{
		type_error("str or rune", args[1]);
	}
	if(ndbytes == 0)
		return size_wrap(startrune);
	if(ndbytes > sbytes)
		return sl_nil;

	usize i, n;
	// first iterate to the starting rune
	for(i = n = 0; n < startrune && i < sbytes; n++)
		i += u8_seqlen(s+i);
	if(n != startrune)
		bounds_error(args[0], fixnum(startrune));
	// now search for the needle
	for(; i < sbytes-ndbytes+1; n++){
		if(s[i] == nd[0] && memcmp(&s[i+1], nd+1, ndbytes-1) == 0)
			return size_wrap(n);
		i += u8_seqlen(s+i);
	}
	return sl_nil;
}

static unsigned long
get_radix_arg(sl_v arg)
{
	unsigned long radix = tosize(arg);
	if(radix < 2 || radix > 36)
		lerrorf(sl_errarg, "invalid radix");
	return radix;
}

BUILTIN("num->str", numb_2_str)
{
	if(nargs < 1 || nargs > 2)
		argcount(nargs, 2);
	sl_v n = args[0];
	bool neg = false;
	u64int num;
	int radix = 10;
	if(nargs == 2)
		radix = get_radix_arg(args[1]);
	if(isfixnum(n))
		num = numval(n);
	else if(iscprim(n)){
		void *data = ptr(n);
		if(cp_numtype(data) < T_FLOAT)
			num = conv_to_u64(cp_data(data), cp_numtype(data));
		else if(radix != 10)
			lerrorf(sl_errarg, "invalid radix with floating point");
		else
			return fn_builtin_str(args, nargs);
	}else if(ismp(n)){
		if(radix != 16 && radix != 10 && radix != 8 && radix != 4 && radix != 2)
			lerrorf(sl_errarg, "invalid radix with bignum");
		mpint *i = tomp(n);
		char *s = mptoa(i, radix, nil, 0);
		assert(s != nil);
		if(radix == 16){ /* FFFF → ffff */
			for(int k = 0; s[k]; k++)
				s[k] = tolower(s[k]);
		}
		n = str_from_cstr(s);
		MEM_FREE(s);
		return n;
	}else{
		type_error("int", n);
	}
	if(numval(sl_compare(args[0], fixnum(0), false)) < 0){
		num = -num;
		neg = true;
	}
	char buf[128], *str = uint2str(buf, sizeof(buf), num, radix);
	if(neg && str > buf)
		*(--str) = '-';
	return str_from_cstr(str);
}

BUILTIN("str->num", str_2_num)
{
	if(nargs < 1 || nargs > 2)
		argcount(nargs, 2);
	char *str = tostr(args[0]);
	sl_v n;
	unsigned long radix = 0;
	if(nargs == 2)
		radix = get_radix_arg(args[1]);
	if(!sl_read_numtok(str, &n, (int)radix))
		return sl_nil;
	return n;
}

sl_purefn
BUILTIN("str-utf8?", str_utf8p)
{
	argcount(nargs, 1);
	char *s = tostr(args[0]);
	usize len = cv_len(ptr(args[0]));
	return u8_isvalid(s, len) ? sl_t : sl_nil;
}