shithub: neatroff

ref: b66f7af07003a601e2e125e2c2926586c8664571
dir: /char.c/

View raw version
/* reading characters and escapes */
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <stdio.h>
#include "roff.h"

/* return the length of a utf-8 character based on its first byte */
int utf8len(int c)
{
	if (~c & 0xc0)		/* ASCII or invalid */
		return c > 0;
	if (~c & 0x20)
		return 2;
	if (~c & 0x10)
		return 3;
	if (~c & 0x08)
		return 4;
	return 1;
}

/* return nonzero if s is a single utf-8 character */
int utf8one(char *s)
{
	return !s[utf8len((unsigned char) *s)];
}

/* read a utf-8 character from s and copy it to d */
int utf8read(char **s, char *d)
{
	int l = utf8len((unsigned char) **s);
	int i;
	for (i = 0; i < l; i++)
		d[i] = (*s)[i];
	d[l] = '\0';
	*s += l;
	return l;
}

/* read a utf-8 character with next() and copy it to s */
int utf8next(char *s, int (*next)(void))
{
	int c = next();
	int l = utf8len(c);
	int i;
	if (c < 0)
		return 0;
	s[0] = c;
	for (i = 1; i < l; i++)
		s[i] = next();
	s[l] = '\0';
	return l;
}

/* read quoted arguments of escape sequences (ESC_Q) */
char *quotednext(int (*next)(void), void (*back)(int))
{
	char delim[GNLEN], cs[GNLEN];
	struct sbuf sb;
	char d[GNLEN];
	charnext(delim, next, back);
	sbuf_init(&sb);
	while (charnext_delim(cs, next, back, delim) >= 0) {
		charnext_str(d, cs);
		sbuf_append(&sb, d);
	}
	return sbuf_out(&sb);
}

/* read unquoted arguments of escape sequences (ESC_P) */
char *unquotednext(int cmd, int (*next)(void), void (*back)(int))
{
	int c = next();
	struct sbuf sb;
	sbuf_init(&sb);
	if (cmd == 's' && (c == '-' || c == '+')) {
		cmd = c;
		sbuf_add(&sb, c);
		c = next();
	}
	if (c == '(') {
		sbuf_add(&sb, next());
		sbuf_add(&sb, next());
	} else if (!n_cp && c == '[') {
		c = next();
		while (c > 0 && c != '\n' && c != ']') {
			sbuf_add(&sb, c);
			c = next();
		}
	} else {
		sbuf_add(&sb, c);
		if (n_cp && cmd == 's' && c >= '1' && c <= '3') {
			c = next();
			if (isdigit(c))
				sbuf_add(&sb, c);
			else
				back(c);
		}
	}
	return sbuf_out(&sb);
}

/*
 * read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
 *
 * character	returned	contents of c
 * x		'\0'		x
 * \4x		c_ni		\4x
 * \\x		'\\'		\\x
 * \\(xy	'('		xy
 * \\[xyz]	'['		xyz
 * \\C'xyz'	'C'		xyz
 */
int charnext(char *c, int (*next)(void), void (*back)(int))
{
	int l, n;
	if (!utf8next(c, next))
		return -1;
	if (c[0] == c_ni) {
		utf8next(c + 1, next);
		return c_ni;
	}
	if (c[0] == c_ec) {
		utf8next(c + 1, next);
		if (c[1] == '(') {
			l = utf8next(c, next);
			l += utf8next(c + l, next);
			return '(';
		} else if (!n_cp && c[1] == '[') {
			l = 0;
			n = next();
			while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
				c[l++] = n;
				n = next();
			}
			c[l] = '\0';
			return '[';
		} else if (c[1] == 'C') {
			char *chr = quotednext(next, back);
			snprintf(c, GNLEN, "%s", chr);
			free(chr);
			return 'C';
		}
		return '\\';
	}
	return '\0';
}

/* like nextchar(), but return -1 if delim was read */
int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
{
	int t = charnext(c, next, back);
	return strcmp(c, delim) ? t : -1;
}

/* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
void charnext_str(char *d, char *c)
{
	int c0 = (unsigned char) c[0];
	if (c0 == c_ec || c0 == c_ni || !c[1] || utf8one(c)) {
		strcpy(d, c);
		return;
	}
	if (!c[2] && utf8len(c0) == 1)
		sprintf(d, "%c(%s", c_ec, c);
	else
		sprintf(d, "%cC'%s'", c_ec, c);
}

/* like charnext() for string buffers */
int charread(char **s, char *c)
{
	int ret;
	sstr_push(*s);
	ret = charnext(c, sstr_next, sstr_back);
	*s = sstr_pop();
	return ret;
}

/* like charnext_delim() for string buffers */
int charread_delim(char **s, char *c, char *delim)
{
	int ret;
	sstr_push(*s);
	ret = charnext_delim(c, sstr_next, sstr_back, delim);
	*s = sstr_pop();
	return ret;
}

/* read quoted arguments; this is called only for internal neatroff strings */
static void quotedread(char **sp, char *d)
{
	char *s = *sp;
	int q = *s++;
	while (*s && *s != q)
		*d++ = *s++;
	if (*s == q)
		s++;
	*d = '\0';
	*sp = s;
}

/* read unquoted arguments; this is called only for internal neatroff strings */
static void unquotedread(char **sp, char *d)
{
	char *s = *sp;
	if (*s == '(') {
		s++;
		*d++ = *s++;
		*d++ = *s++;
	} else if (!n_cp && *s == '[') {
		s++;
		while (*s && *s != ']')
			*d++ = *s++;
		if (*s == ']')
			s++;
	} else {
		*d++ = *s++;
	}
	*d = '\0';
	*sp = s;
}

/*
 * read a glyph or an escape sequence
 *
 * This function reads from s either an output troff request
 * (only the ones emitted by wb.c) or a glyph name and updates
 * s.  The return value is the name of the troff request (the
 * argument is copied into d) or zero for glyph names (it is
 * copied into d).  Returns -1 when the end of s is reached.
 * Note that to d, a pointer to a static array is assigned.
 */
int escread(char **s, char **d)
{
	static char buf[1 << 12];
	char *r;
	if (!**s)
		return -1;
	r = buf;
	*d = buf;
	utf8read(s, r);
	if (r[0] == c_ec) {
		utf8read(s, r + 1);
		if (r[1] == '(') {
			utf8read(s, r);
			utf8read(s, r + strlen(r));
		} else if (!n_cp && r[1] == '[') {
			while (**s && **s != ']')
				*r++ = *(*s)++;
			*r = '\0';
			if (**s == ']')
				(*s)++;
		} else if (strchr("CDfhmsvXx<>", r[1])) {
			int c = r[1];
			r[0] = '\0';
			if (strchr(ESC_P, c))
				unquotedread(s, r);
			if (strchr(ESC_Q, c))
				quotedread(s, r);
			return c == 'C' ? 0 : c;
		}
	} else if (r[0] == c_ni) {
		utf8read(s, r + 1);
	}
	return 0;
}

/*
 * string streams: provide next()/back() interface for string buffers
 *
 * Functions like charnext() require a next()/back() interface
 * for reading input streams.  In order to provide this interface
 * for string buffers, the following functions can be used:
 *
 *   sstr_push(s);
 *   charnext(c, sstr_next, sstr_back);
 *   sstr_pop();
 *
 * The calls to sstr_push()/sstr_pop() may be nested.
 */
static char *sstr_bufs[NSSTR];	/* buffer stack */
static int sstr_n;		/* numbers of items in sstr_bufs[] */
static char *sstr_s;		/* current buffer */

void sstr_push(char *s)
{
	sstr_bufs[sstr_n++] = sstr_s;
	sstr_s = s;
}

char *sstr_pop(void)
{
	char *ret = sstr_s;
	sstr_s = sstr_bufs[--sstr_n];
	return ret;
}

int sstr_next(void)
{
	return *sstr_s ? (unsigned char) *sstr_s++ : -1;
}

void sstr_back(int c)
{
	sstr_s--;
}