shithub: femtolisp

ref: fdccc6ded272018cf4c5ca040041d3a938b5debd
dir: /string.c/

View raw version
/*
  string functions
*/
#include "llt.h"
#include "flisp.h"
#include "operators.h"
#include "cvalues.h"
#include "print.h"
#include "read.h"
#include "equal.h"
#include "iostream.h"

BUILTIN("string?", stringp)
{
	argcount(nargs, 1);
	return fl_isstring(args[0]) ? FL_T : FL_F;
}

BUILTIN("string-length", string_length)
{
	size_t start = 0;
	if(nargs < 1 || nargs > 3)
		argcount(nargs, 1);
	if(!fl_isstring(args[0]))
		type_error("string", args[0]);
	size_t len = cv_len((cvalue_t*)ptr(args[0]));
	size_t stop = len;
	if(nargs > 1){
		start = toulong(args[1]);
		if(start > len)
			bounds_error(args[0], args[1]);
		if(nargs > 2){
			stop = toulong(args[2]);
			if(stop > len)
				bounds_error(args[0], args[2]);
			if(stop <= start)
				return fixnum(0);
		}
	}
	char *str = cvalue_data(args[0]);
	return size_wrap(u8_charnum(str+start, stop-start));
}

BUILTIN("string-width", string_width)
{
	argcount(nargs, 1);
	if(iscprim(args[0])){
		cprim_t *cp = ptr(args[0]);
		if(cp_class(cp) == runetype){
			int w = wcwidth(*(Rune*)cp_data(cp));
			return w < 0 ? FL_F : fixnum(w);
		}
	}
	return size_wrap(u8_strwidth(tostring(args[0])));
}

BUILTIN("string-reverse", string_reverse)
{
	argcount(nargs, 1);
	if(!fl_isstring(args[0]))
		type_error("string", args[0]);
	size_t len = cv_len(ptr(args[0]));
	value_t ns = cvalue_string(len);
	u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len);
	return ns;
}

BUILTIN("string-encode", string_encode)
{
	argcount(nargs, 1);
	if(iscvalue(args[0])){
		cvalue_t *cv = ptr(args[0]);
		fltype_t *t = cv_class(cv);
		if(t->eltype == runetype){
			size_t nr = cv_len(cv) / sizeof(Rune);
			Rune *r = (Rune*)cv_data(cv);
			size_t nb = runenlen(r, nr);
			value_t str = cvalue_string(nb);
			char *s = cvalue_data(str);
			for(size_t i = 0; i < nr; i++)
				s += runetochar(s, r+i);
			return str;
		}
	}
	type_error("rune array", args[0]);
}

BUILTIN("string-decode", string_decode)
{
	int term = 0;
	if(nargs == 2)
		term = args[1] != FL_F;
	else
		argcount(nargs, 1);
	if(!fl_isstring(args[0]))
		type_error("string", args[0]);
	cvalue_t *cv = ptr(args[0]);
	char *ptr = (char*)cv_data(cv);
	size_t nb = cv_len(cv);
	size_t nc = utfnlen(ptr, nb);
	size_t newsz = nc*sizeof(Rune);
	if(term)
		newsz += sizeof(Rune);
	value_t runestr = cvalue(runestringtype, newsz);
	ptr = cv_data(ptr(args[0]));  // relocatable pointer
	Rune *r = cvalue_data(runestr);
	for(size_t i = 0; i < nb; i++)
		ptr += chartorune(r+i, ptr);
	if(term)
		r[nb] = 0;
	return runestr;
}

extern BUILTIN("buffer", buffer);

BUILTIN("string", string)
{
	if(nargs == 1 && fl_isstring(args[0]))
		return args[0];
	value_t arg, buf = fn_builtin_buffer(nil, 0);
	fl_gc_handle(&buf);
	ios_t *s = value2c(ios_t*, buf);
	int i;
	value_t oldpr = symbol_value(printreadablysym);
	value_t oldpp = symbol_value(printprettysym);
	set(printreadablysym, FL_F);
	set(printprettysym, FL_F);
	FOR_ARGS(i, 0, arg, args){
		USED(arg);
		fl_print(s, args[i]);
	}
	set(printreadablysym, oldpr);
	set(printprettysym, oldpp);
	value_t outp = stream_to_string(&buf);
	fl_free_gc_handles(1);
	return outp;
}

BUILTIN("string-split", string_split)
{
	argcount(nargs, 2);
	char *s = tostring(args[0]);
	char *delim = tostring(args[1]);
	size_t len = cv_len(ptr(args[0]));
	size_t dlen = cv_len(ptr(args[1]));
	size_t ssz, tokend, tokstart, i = 0;
	value_t first = FL_NIL, c = FL_NIL, last;
	size_t junk;
	fl_gc_handle(&first);
	fl_gc_handle(&last);

	do{
		// find and allocate next token
		tokstart = tokend = i;
		while(i < len && !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
			tokend = i;
		ssz = tokend - tokstart;
		last = c; // save previous cons cell
		c = fl_cons(cvalue_string(ssz), FL_NIL);

		// we've done allocation; reload movable pointers
		s = cv_data(ptr(args[0]));
		delim = cv_data(ptr(args[1]));

		if(ssz)
			memmove(cv_data(ptr(car_(c))), &s[tokstart], ssz);

		// link new cell
		if(last == FL_NIL)
			first = c;   // first time, save first cons
		else
			((cons_t*)ptr(last))->cdr = c;

		// note this tricky condition: if the string ends with a
		// delimiter, we need to go around one more time to add an
		// empty string. this happens when (i == len && tokend < i)
	}while(i < len || (i == len && (tokend != i)));
	fl_free_gc_handles(2);
	return first;
}

BUILTIN("string-sub", string_sub)
{
	if(nargs != 2)
		argcount(nargs, 3);
	char *s = tostring(args[0]);
	size_t lenbytes = cv_len((cvalue_t*)ptr(args[0]));
	size_t startbytes, n, startchar = toulong(args[1]);
	for(startbytes = n = 0; n < startchar && startbytes < lenbytes; n++)
		startbytes += u8_seqlen(s+startbytes);
	if(n != startchar)
		bounds_error(args[0], args[1]);
	size_t endbytes = lenbytes;
	if(nargs == 3){
		size_t endchar = toulong(args[2]);
		for(endbytes = startbytes; n < endchar && endbytes < lenbytes; n++)
			endbytes += u8_seqlen(s+endbytes);
		if(n != endchar)
			bounds_error(args[0], args[2]);
	}
	if(endbytes == startbytes)
		return symbol_value(emptystringsym);
	value_t ns = cvalue_string(endbytes-startbytes);
	memmove(cv_data(ptr(ns)), s+startbytes, endbytes-startbytes);
	return ns;
}

BUILTIN("string-char", string_char)
{
	argcount(nargs, 2);
	char *s = tostring(args[0]);
	size_t lenbytes = cv_len(ptr(args[0]));
	size_t startbytes, n, startchar = toulong(args[1]);
	for(startbytes = n = 0; n < startchar && startbytes < lenbytes; n++)
		startbytes += u8_seqlen(s+startbytes);
	if(n != startchar)
		bounds_error(args[0], args[1]);
	Rune r;
	chartorune(&r, s+startbytes);
	return mk_rune(r);
}

BUILTIN("char-upcase", char_upcase)
{
	argcount(nargs, 1);
	cprim_t *cp = (cprim_t*)ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return mk_rune(toupperrune(*(Rune*)cp_data(cp)));
}

BUILTIN("char-downcase", char_downcase)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return mk_rune(tolowerrune(*(Rune*)cp_data(cp)));
}

BUILTIN("char-titlecase", char_titlecase)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return mk_rune(totitlerune(*(Rune*)cp_data(cp)));
}

BUILTIN("char-alphabetic?", char_alphabeticp)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return isalpharune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}

BUILTIN("char-lower-case?", char_lower_casep)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return islowerrune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}

BUILTIN("char-upper-case?", char_upper_casep)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return isupperrune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}

BUILTIN("char-title-case?", char_title_casep)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return istitlerune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}

BUILTIN("char-numeric?", char_numericp)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return isdigitrune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}

BUILTIN("char-whitespace?", char_whitespacep)
{
	argcount(nargs, 1);
	cprim_t *cp = ptr(args[0]);
	if(!iscprim(args[0]) || cp_class(cp) != runetype)
		type_error("rune", args[0]);
	return isspacerune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}

BUILTIN("string-find", string_find)
{
	char cbuf[UTFmax+1];
	size_t start = 0;
	if(nargs == 3)
		start = toulong(args[2]);
	else
		argcount(nargs, 2);
	char *s = tostring(args[0]);
	size_t len = cv_len(ptr(args[0]));
	if(start > len)
		bounds_error(args[0], args[2]);
	char *needle; size_t needlesz;

	value_t v = args[1];
	cprim_t *cp = ptr(v);
	if(iscprim(v) && cp_class(cp) == runetype){
		Rune r = *(Rune*)cp_data(cp);
		needlesz = runetochar(cbuf, &r);
		needle = cbuf;
		needle[needlesz] = 0;
	}else if(iscprim(v) && cp_class(cp) == bytetype){
		needlesz = 1;
		needle = cbuf;
		needle[0] = *(char*)cp_data(cp);
		needle[needlesz] = 0;
	}else if(fl_isstring(v)){
		cvalue_t *cv = (cvalue_t*)ptr(v);
		needlesz = cv_len(cv);
		needle = (char*)cv_data(cv);
	}else{
		type_error("string", args[1]);
	}
	if(needlesz > len-start)
		return FL_F;
	if(needlesz == 0)
		return size_wrap(start);
	size_t i;
	for(i = start; i < len-needlesz+1; i++){
		if(s[i] == needle[0] && memcmp(&s[i+1], needle+1, needlesz-1) == 0)
			return size_wrap(i);
	}
	return FL_F;
}

static unsigned long
get_radix_arg(value_t arg)
{
	unsigned long radix = toulong(arg);
	if(radix < 2 || radix > 36)
		lerrorf(ArgError, "invalid radix");
	return radix;
}

BUILTIN("number->string", number_2_string)
{
	if(nargs < 1 || nargs > 2)
		argcount(nargs, 2);
	value_t n = args[0];
	int neg = 0;
	uint64_t num;
	if(isfixnum(n))
		num = numval(n);
	else if(!iscprim(n))
		type_error("integer", n);
	else
		num = conv_to_uint64(cp_data(ptr(n)), cp_numtype(ptr(n)));
	if(numval(fl_compare(args[0], fixnum(0))) < 0){
		num = -num;
		neg = 1;
	}
	unsigned long radix = 10;
	if(nargs == 2)
		radix = get_radix_arg(args[1]);
	char buf[128];
	char *str = uint2str(buf, sizeof(buf), num, radix);
	if(neg && str > &buf[0])
		*(--str) = '-';
	return string_from_cstr(str);
}

BUILTIN("string->number", string_2_number)
{
	if(nargs < 1 || nargs > 2)
		argcount(nargs, 2);
	char *str = tostring(args[0]);
	value_t n;
	unsigned long radix = 0;
	if(nargs == 2)
		radix = get_radix_arg(args[1]);
	if(!isnumtok_base(str, &n, (int)radix))
		return FL_F;
	return n;
}

BUILTIN("string-utf8?", string_utf8p)
{
	argcount(nargs, 1);
	char *s = tostring(args[0]);
	size_t len = cv_len((cvalue_t*)ptr(args[0]));
	return u8_isvalid(s, len) ? FL_T : FL_F;
}