ref: b4c113c4fc16ea6c3b2adacc60f096d2b7c8decc
parent: b47f06a31e7449c8968b54db9d921fd6cce6bc7c
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Oct 22 14:42:23 EDT 2024
utf8: remove unused functions
--- a/utf8.c
+++ b/utf8.c
@@ -191,20 +191,6 @@
return 0;
}
-/* charnum => byte offset */
-size_t
-u8_offset(const char *s, size_t charnum)
-{
- size_t i = 0;
-
- while(charnum > 0){
- if(s[i++] & 0x80)
- (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
- charnum--;
- }
- return i;
-}
-
/* byte offset => charnum */
size_t
u8_charnum(const char *s, size_t offset)
@@ -219,27 +205,7 @@
return charnum;
}
-/* number of characters in NUL-terminated string */
size_t
-u8_strlen(const char *s)
-{
- size_t count = 0;
- size_t i = 0, lasti;
-
- while(1) {
- lasti = i;
- while(s[i] > 0)
- i++;
- count += (i-lasti);
- if(s[i++] == 0)
- break;
- (void)(isutf(s[++i]) || isutf(s[++i]) || ++i);
- count++;
- }
- return count;
-}
-
-size_t
u8_strwidth(const char *s)
{
uint32_t ch;
@@ -307,18 +273,6 @@
return ch - offsetsFromUTF8[sz-1];
}
-void
-u8_inc(const char *s, size_t *i)
-{
- (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i));
-}
-
-void
-u8_dec(const char *s, size_t *i)
-{
- (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
-}
-
int
octal_digit(char c)
{
@@ -347,70 +301,6 @@
return c;
}
-/* assumes that src points to the character after a backslash
- returns number of input characters processed, 0 if error */
-size_t
-u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
-{
- assert(ssz > 0);
- uint32_t ch;
- char digs[10];
- int dno = 0, ndig;
- size_t i = 1;
- char c0 = str[0];
-
- if(octal_digit(c0)){
- i = 0;
- do{
- digs[dno++] = str[i++];
- }while(i < ssz && octal_digit(str[i]) && dno < 3);
- digs[dno] = '\0';
- ch = strtol(digs, nil, 8);
- }else if((c0 == 'x' && (ndig = 2)) || (c0 == 'u' && (ndig = 4)) || (c0 == 'U' && (ndig = 8))){
- while(i<ssz && hex_digit(str[i]) && dno < ndig)
- digs[dno++] = str[i++];
- if(dno == 0)
- return 0;
- digs[dno] = '\0';
- ch = strtol(digs, nil, 16);
- }else{
- ch = (uint32_t)read_escape_control_char(c0);
- }
- *dest = ch;
-
- return i;
-}
-
-/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
- example: u8_unescape(mybuf, 256, "hello\\u220e")
- note the double backslash is needed if called on a C string literal */
-size_t
-u8_unescape(char *buf, size_t sz, const char *src)
-{
- size_t c = 0, amt;
- uint32_t ch;
- char temp[4];
-
- while(*src && c < sz){
- if(*src == '\\'){
- src++;
- amt = u8_read_escape_sequence(src, 1000, &ch);
- }else{
- ch = (uint32_t)*src;
- amt = 1;
- }
- src += amt;
- amt = u8_wc_toutf8(temp, ch);
- if(amt > sz-c)
- break;
- memmove(&buf[c], temp, amt);
- c += amt;
- }
- if(c < sz)
- buf[c] = '\0';
- return c;
-}
-
static inline int
buf_put2c(char *buf, const char *src)
{
@@ -483,25 +373,6 @@
}
char *
-u8_strchr(const char *s, uint32_t ch, size_t *charn)
-{
- size_t i = 0, lasti = 0;
- uint32_t c;
-
- *charn = 0;
- while(s[i]){
- c = u8_nextchar(s, &i);
- if(c == ch){
- /* it's const for us, but not necessarily the caller */
- return (char*)&s[lasti];
- }
- lasti = i;
- (*charn)++;
- }
- return nil;
-}
-
-char *
u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn)
{
size_t i = 0, lasti = 0;
@@ -524,77 +395,6 @@
(*charn)++;
}
return nil;
-}
-
-char *
-u8_memrchr(const char *s, uint32_t ch, size_t sz)
-{
- size_t i = sz-1, tempi = 0;
- uint32_t c;
-
- if(sz == 0)
- return nil;
-
- while(i && !isutf(s[i]))
- i--;
-
- while(1){
- tempi = i;
- c = u8_nextmemchar(s, &tempi);
- if(c == ch)
- return (char*)&s[i];
- if(i == 0)
- break;
- tempi = i;
- u8_dec(s, &i);
- if(i > tempi)
- break;
- }
- return nil;
-}
-
-size_t
-u8_vprintf(const char *fmt, va_list ap)
-{
- size_t cnt, sz, nc, needfree = 0;
- char *buf, tmp[512];
- uint32_t *wcs;
-
- sz = 512;
- buf = tmp;
- cnt = vsnprintf(buf, sz, fmt, ap);
- if((ssize_t)cnt < 0)
- return 0;
- if(cnt >= sz){
- buf = (char*)malloc(cnt + 1);
- needfree = 1;
- vsnprintf(buf, cnt+1, fmt, ap);
- }
- wcs = (uint32_t*)malloc((cnt+1) * sizeof(uint32_t));
- nc = u8_toucs(wcs, cnt+1, buf, cnt);
- wcs[nc] = 0;
-#if defined(__plan9__)
- print("%S", (Rune*)wcs);
-#else
- printf("%ls", (wchar_t*)wcs);
-#endif
- free(wcs);
- if(needfree)
- free(buf);
- return nc;
-}
-
-size_t
-u8_printf(const char *fmt, ...)
-{
- size_t cnt;
- va_list args;
-
- va_start(args, fmt);
- cnt = u8_vprintf(fmt, args);
-
- va_end(args);
- return cnt;
}
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
--- a/utf8.h
+++ b/utf8.h
@@ -15,9 +15,6 @@
/* single character to UTF-8, returns # bytes written */
size_t u8_wc_toutf8(char *dest, uint32_t ch);
-/* character number to byte offset */
-size_t u8_offset(const char *str, size_t charnum);
-
/* byte offset to character number */
size_t u8_charnum(const char *s, size_t offset);
@@ -27,12 +24,6 @@
/* next character without NUL character terminator */
uint32_t u8_nextmemchar(const char *s, size_t *i);
-/* move to next character */
-void u8_inc(const char *s, size_t *i);
-
-/* move to previous character */
-void u8_dec(const char *s, size_t *i);
-
/* returns length of next utf-8 sequence */
size_t u8_seqlen(const char *s);
@@ -44,19 +35,11 @@
char read_escape_control_char(char c);
-/* assuming src points to the character after a backslash, read an
- escape sequence, storing the result in dest and returning the number of
- input characters processed */
-size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
-
/* given a wide character, convert it to an ASCII escape sequence stored in
buf, where buf is "sz" bytes. returns the number of characters output.
sz must be at least 3. */
int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
-/* convert a string "src" containing escape sequences to UTF-8 */
-size_t u8_unescape(char *buf, size_t sz, const char *src);
-
/* convert UTF-8 "src" to escape sequences.
sz is buf size in bytes. must be at least 12.
@@ -79,27 +62,12 @@
int octal_digit(char c);
int hex_digit(char c);
-/* return a pointer to the first occurrence of ch in s, or nil if not
- found. character index of found character returned in *charn. */
-char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
-
/* same as the above, but searches a buffer of a given size instead of
a NUL-terminated string. */
char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
-char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
-
-/* count the number of characters in a UTF-8 string */
-size_t u8_strlen(const char *s);
-
/* number of columns occupied by a string */
size_t u8_strwidth(const char *s);
-
-/* printf where the format string and arguments may be in UTF-8.
- you can avoid this function and just use ordinary printf() if the current
- locale is UTF-8. */
-size_t u8_vprintf(const char *fmt, va_list ap);
-size_t u8_printf(const char *fmt, ...);
/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
int u8_isvalid(const char *str, int length);