shithub: femtolisp

Download patch

ref: b4c113c4fc16ea6c3b2adacc60f096d2b7c8decc
parent: b47f06a31e7449c8968b54db9d921fd6cce6bc7c
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Oct 22 14:42:23 EDT 2024

utf8: remove unused functions

--- a/utf8.c
+++ b/utf8.c
@@ -191,20 +191,6 @@
 	return 0;
 }
 
-/* charnum => byte offset */
-size_t
-u8_offset(const char *s, size_t charnum)
-{
-	size_t i = 0;
-
-	while(charnum > 0){
-		if(s[i++] & 0x80)
-			(void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
-		charnum--;
-	}
-	return i;
-}
-
 /* byte offset => charnum */
 size_t
 u8_charnum(const char *s, size_t offset)
@@ -219,27 +205,7 @@
 	return charnum;
 }
 
-/* number of characters in NUL-terminated string */
 size_t
-u8_strlen(const char *s)
-{
-	size_t count = 0;
-	size_t i = 0, lasti;
-
-	while(1) {
-		lasti = i;
-		while(s[i] > 0)
-			i++;
-		count += (i-lasti);
-		if(s[i++] == 0)
-			break;
-		(void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
-		count++;
-	}
-	return count;
-}
-
-size_t
 u8_strwidth(const char *s)
 {
 	uint32_t ch;
@@ -307,18 +273,6 @@
 	return ch - offsetsFromUTF8[sz-1];
 }
 
-void
-u8_inc(const char *s, size_t *i)
-{
-	(void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i));
-}
-
-void
-u8_dec(const char *s, size_t *i)
-{
-	(void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
-}
-
 int
 octal_digit(char c)
 {
@@ -347,70 +301,6 @@
 	return c;
 }
 
-/* assumes that src points to the character after a backslash
-   returns number of input characters processed, 0 if error */
-size_t
-u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
-{
-	assert(ssz > 0);
-	uint32_t ch;
-	char digs[10];
-	int dno = 0, ndig;
-	size_t i = 1;
-	char c0 = str[0];
-
-	if(octal_digit(c0)){
-		i = 0;
-		do{
-			digs[dno++] = str[i++];
-		}while(i < ssz && octal_digit(str[i]) && dno < 3);
-		digs[dno] = '\0';
-		ch = strtol(digs, nil, 8);
-	}else if((c0 == 'x' && (ndig = 2)) || (c0 == 'u' && (ndig = 4)) || (c0 == 'U' && (ndig = 8))){
-		while(i<ssz && hex_digit(str[i]) && dno < ndig)
-			digs[dno++] = str[i++];
-		if(dno == 0)
-			return 0;
-		digs[dno] = '\0';
-		ch = strtol(digs, nil, 16);
-	}else{
-		ch = (uint32_t)read_escape_control_char(c0);
-	}
-	*dest = ch;
-
-	return i;
-}
-
-/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
-   example: u8_unescape(mybuf, 256, "hello\\u220e")
-   note the double backslash is needed if called on a C string literal */
-size_t
-u8_unescape(char *buf, size_t sz, const char *src)
-{
-	size_t c = 0, amt;
-	uint32_t ch;
-	char temp[4];
-
-	while(*src && c < sz){
-		if(*src == '\\'){
-			src++;
-			amt = u8_read_escape_sequence(src, 1000, &ch);
-		}else{
-			ch = (uint32_t)*src;
-			amt = 1;
-		}
-		src += amt;
-		amt = u8_wc_toutf8(temp, ch);
-		if(amt > sz-c)
-			break;
-		memmove(&buf[c], temp, amt);
-		c += amt;
-	}
-	if(c < sz)
-		buf[c] = '\0';
-	return c;
-}
-
 static inline int
 buf_put2c(char *buf, const char *src)
 {
@@ -483,25 +373,6 @@
 }
 
 char *
-u8_strchr(const char *s, uint32_t ch, size_t *charn)
-{
-	size_t i = 0, lasti = 0;
-	uint32_t c;
-
-	*charn = 0;
-	while(s[i]){
-		c = u8_nextchar(s, &i);
-		if(c == ch){
-			/* it's const for us, but not necessarily the caller */
-			return (char*)&s[lasti];
-		}
-		lasti = i;
-		(*charn)++;
-	}
-	return nil;
-}
-
-char *
 u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn)
 {
 	size_t i = 0, lasti = 0;
@@ -524,77 +395,6 @@
 		(*charn)++;
 	}
 	return nil;
-}
-
-char *
-u8_memrchr(const char *s, uint32_t ch, size_t sz)
-{
-	size_t i = sz-1, tempi = 0;
-	uint32_t c;
-
-	if(sz == 0)
-		return nil;
-
-	while(i && !isutf(s[i]))
-		i--;
-
-	while(1){
-		tempi = i;
-		c = u8_nextmemchar(s, &tempi);
-		if(c == ch)
-			return (char*)&s[i];
-		if(i == 0)
-			break;
-		tempi = i;
-		u8_dec(s, &i);
-		if(i > tempi)
-			break;
-	}
-	return nil;
-}
-
-size_t
-u8_vprintf(const char *fmt, va_list ap)
-{
-	size_t cnt, sz, nc, needfree = 0;
-	char *buf, tmp[512];
-	uint32_t *wcs;
-
-	sz = 512;
-	buf = tmp;
-	cnt = vsnprintf(buf, sz, fmt, ap);
-	if((ssize_t)cnt < 0)
-		return 0;
-	if(cnt >= sz){
-		buf = (char*)malloc(cnt + 1);
-		needfree = 1;
-		vsnprintf(buf, cnt+1, fmt, ap);
-	}
-	wcs = (uint32_t*)malloc((cnt+1) * sizeof(uint32_t));
-	nc = u8_toucs(wcs, cnt+1, buf, cnt);
-	wcs[nc] = 0;
-#if defined(__plan9__)
-	print("%S", (Rune*)wcs);
-#else
-	printf("%ls", (wchar_t*)wcs);
-#endif
-	free(wcs);
-	if(needfree)
-		free(buf);
-	return nc;
-}
-
-size_t
-u8_printf(const char *fmt, ...)
-{
-	size_t cnt;
-	va_list args;
-
-	va_start(args, fmt);
-	cnt = u8_vprintf(fmt, args);
-
-	va_end(args);
-	return cnt;
 }
 
 /* based on the valid_utf8 routine from the PCRE library by Philip Hazel
--- a/utf8.h
+++ b/utf8.h
@@ -15,9 +15,6 @@
 /* single character to UTF-8, returns # bytes written */
 size_t u8_wc_toutf8(char *dest, uint32_t ch);
 
-/* character number to byte offset */
-size_t u8_offset(const char *str, size_t charnum);
-
 /* byte offset to character number */
 size_t u8_charnum(const char *s, size_t offset);
 
@@ -27,12 +24,6 @@
 /* next character without NUL character terminator */
 uint32_t u8_nextmemchar(const char *s, size_t *i);
 
-/* move to next character */
-void u8_inc(const char *s, size_t *i);
-
-/* move to previous character */
-void u8_dec(const char *s, size_t *i);
-
 /* returns length of next utf-8 sequence */
 size_t u8_seqlen(const char *s);
 
@@ -44,19 +35,11 @@
 
 char read_escape_control_char(char c);
 
-/* assuming src points to the character after a backslash, read an
-   escape sequence, storing the result in dest and returning the number of
-   input characters processed */
-size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
-
 /* given a wide character, convert it to an ASCII escape sequence stored in
    buf, where buf is "sz" bytes. returns the number of characters output.
    sz must be at least 3. */
 int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
 
-/* convert a string "src" containing escape sequences to UTF-8 */
-size_t u8_unescape(char *buf, size_t sz, const char *src);
-
 /* convert UTF-8 "src" to escape sequences.
 
    sz is buf size in bytes. must be at least 12.
@@ -79,27 +62,12 @@
 int octal_digit(char c);
 int hex_digit(char c);
 
-/* return a pointer to the first occurrence of ch in s, or nil if not
-   found. character index of found character returned in *charn. */
-char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
-
 /* same as the above, but searches a buffer of a given size instead of
    a NUL-terminated string. */
 char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
 
-char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
-
-/* count the number of characters in a UTF-8 string */
-size_t u8_strlen(const char *s);
-
 /* number of columns occupied by a string */
 size_t u8_strwidth(const char *s);
-
-/* printf where the format string and arguments may be in UTF-8.
-   you can avoid this function and just use ordinary printf() if the current
-   locale is UTF-8. */
-size_t u8_vprintf(const char *fmt, va_list ap);
-size_t u8_printf(const char *fmt, ...);
 
 /* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
 int u8_isvalid(const char *str, int length);