ref: 9a38d7e0bd004edf84d475df485ce81a00aaa442
dir: /char.c/
/* reading characters and escapes */
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "roff.h"
/* return the length of a utf-8 character based on its first byte */
int utf8len(int c)
{
if (c > 0 && c <= 0x7f)
return 1;
if (c >= 0xfc)
return 6;
if (c >= 0xf8)
return 5;
if (c >= 0xf0)
return 4;
if (c >= 0xe0)
return 3;
if (c >= 0xc0)
return 2;
return c != 0;
}
/* return nonzero if s is a single utf-8 character */
int utf8one(char *s)
{
return !s[utf8len((unsigned char) *s)];
}
/* read a utf-8 character from s and copy it to d */
int utf8read(char **s, char *d)
{
int l = utf8len((unsigned char) **s);
int i;
for (i = 0; i < l; i++)
d[i] = (*s)[i];
d[l] = '\0';
*s += l;
return l;
}
/* read a utf-8 character with next() and copy it to s */
int utf8next(char *s, int (*next)(void))
{
int c = next();
int l = utf8len(c);
int i;
if (c < 0)
return 0;
s[0] = c;
for (i = 1; i < l; i++)
s[i] = next();
s[l] = '\0';
return l;
}
/* read quoted arguments of escape sequences (ESC_Q) */
void quotednext(char *d, int (*next)(void), void (*back)(int))
{
char delim[GNLEN], cs[GNLEN];
charnext(delim, next, back);
while (charnext_delim(cs, next, back, delim) >= 0) {
charnext_str(d, cs);
d = strchr(d, '\0');
}
}
/* read unquoted arguments of escape sequences (ESC_P) */
void unquotednext(char *d, int cmd, int (*next)(void), void (*back)(int))
{
int c = next();
if (cmd == 's' && (c == '-' || c == '+')) {
cmd = c;
*d++ = c;
c = next();
}
if (c == '(') {
*d++ = next();
*d++ = next();
} else if (!n_cp && c == '[') {
c = next();
while (c > 0 && c != '\n' && c != ']') {
*d++ = c;
c = next();
}
} else {
*d++ = c;
if (cmd == 's' && c >= '1' && c <= '3') {
c = next();
if (isdigit(c))
*d++ = c;
else
back(c);
}
}
*d = '\0';
}
/*
* read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
*
* character returned contents of c
* x '\0' x
* \4x c_ni \4x
* \\x '\\' \\x
* \\(xy '(' xy
* \\[xyz] '[' xyz
* \\C'xyz' 'C' xyz
*/
int charnext(char *c, int (*next)(void), void (*back)(int))
{
int l, n;
if (!utf8next(c, next))
return -1;
if (c[0] == c_ni) {
utf8next(c + 1, next);
return c_ni;
}
if (c[0] == c_ec) {
utf8next(c + 1, next);
if (c[1] == '(') {
l = utf8next(c, next);
l += utf8next(c + l, next);
return '(';
} else if (!n_cp && c[1] == '[') {
l = 0;
n = next();
while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
c[l++] = n;
n = next();
}
c[l] = '\0';
return '[';
} else if (c[1] == 'C') {
quotednext(c, next, back);
return 'C';
}
return '\\';
}
return '\0';
}
/* like nextchar(), but return -1 if delim was read */
int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
{
int t = charnext(c, next, back);
return strcmp(c, delim) ? t : -1;
}
/* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
void charnext_str(char *d, char *c)
{
int c0 = (unsigned char) c[0];
if (c0 == c_ec || c0 == c_ni || !c[1] || utf8one(c)) {
strcpy(d, c);
return;
}
if (!c[2] && utf8len(c0) == 1)
sprintf(d, "%c(%s", c_ec, c);
else
sprintf(d, "%cC'%s'", c_ec, c);
}
/* like charnext() for string buffers */
int charread(char **s, char *c)
{
int ret;
sstr_push(*s);
ret = charnext(c, sstr_next, sstr_back);
*s = sstr_pop();
return ret;
}
/* like charnext_delim() for string buffers */
int charread_delim(char **s, char *c, char *delim)
{
int ret;
sstr_push(*s);
ret = charnext_delim(c, sstr_next, sstr_back, delim);
*s = sstr_pop();
return ret;
}
/* read quoted arguments; this is called only for internal neatroff strings */
static void quotedread(char **sp, char *d)
{
char *s = *sp;
int q = *s++;
while (*s && *s != q)
*d++ = *s++;
if (*s == q)
s++;
*d = '\0';
*sp = s;
}
/* read unquoted arguments; this is called only for internal neatroff strings */
static void unquotedread(char **sp, char *d)
{
char *s = *sp;
if (*s == '(') {
s++;
*d++ = *s++;
*d++ = *s++;
} else if (!n_cp && *s == '[') {
s++;
while (*s && *s != ']')
*d++ = *s++;
if (*s == ']')
s++;
} else {
*d++ = *s++;
}
*d = '\0';
*sp = s;
}
/*
* read a glyph or an escape sequence
*
* This function reads from s either an output troff request
* (only the ones emitted by wb.c) or a glyph name and updates
* s. The return value is the name of the troff request (the
* argument is copied into d) or zero for glyph names (it is
* copied into d). Returns -1 when the end of s is reached.
*/
int escread(char **s, char *d)
{
char *r = d;
if (!**s)
return -1;
utf8read(s, d);
if (d[0] == c_ec) {
utf8read(s, d + 1);
if (d[1] == '(') {
utf8read(s, d);
utf8read(s, d + strlen(d));
} else if (!n_cp && d[1] == '[') {
while (**s && **s != ']')
*r++ = *(*s)++;
if (**s == ']')
(*s)++;
} else if (strchr("CDfhmsvXx", d[1])) {
int c = d[1];
d[0] = '\0';
if (strchr(ESC_P, c))
unquotedread(s, d);
if (strchr(ESC_Q, c))
quotedread(s, d);
return c == 'C' ? 0 : c;
}
}
if (d[0] == c_ni)
utf8read(s, d + 1);
return 0;
}
/*
* string streams: provide next()/back() interface for string buffers
*
* Functions like charnext() require a next()/back() interface
* for reading input streams. In order to provide this interface
* for string buffers, the following functions can be used:
*
* sstr_push(s);
* charnext(c, sstr_next, sstr_back);
* sstr_pop();
*
* The calls to sstr_push()/sstr_pop() may be nested.
*/
static char *sstr_bufs[NSSTR]; /* buffer stack */
static int sstr_n; /* numbers of items in sstr_bufs[] */
static char *sstr_s; /* current buffer */
void sstr_push(char *s)
{
sstr_bufs[sstr_n++] = sstr_s;
sstr_s = s;
}
char *sstr_pop(void)
{
char *ret = sstr_s;
sstr_s = sstr_bufs[--sstr_n];
return ret;
}
int sstr_next(void)
{
return *sstr_s ? (unsigned char) *sstr_s++ : -1;
}
void sstr_back(int c)
{
sstr_s--;
}