ref: bb2279e43d905ca8383d5dec5eee57feec5462b1
dir: /js-lex.c/
#include "js.h"
#define nelem(a) (sizeof (a) / sizeof (a)[0])
struct {
const char *string;
js_Token token;
} keywords[] = {
{"abstract", JS_ABSTRACT},
{"boolean", JS_BOOLEAN},
{"break", JS_BREAK},
{"byte", JS_BYTE},
{"case", JS_CASE},
{"catch", JS_CATCH},
{"char", JS_CHAR},
{"class", JS_CLASS},
{"const", JS_CONST},
{"continue", JS_CONTINUE},
{"debugger", JS_DEBUGGER},
{"default", JS_DEFAULT},
{"delete", JS_DELETE},
{"do", JS_DO},
{"double", JS_DOUBLE},
{"else", JS_ELSE},
{"enum", JS_ENUM},
{"export", JS_EXPORT},
{"extends", JS_EXTENDS},
{"false", JS_FALSE},
{"final", JS_FINAL},
{"finally", JS_FINALLY},
{"float", JS_FLOAT},
{"for", JS_FOR},
{"function", JS_FUNCTION},
{"goto", JS_GOTO},
{"if", JS_IF},
{"implements", JS_IMPLEMENTS},
{"import", JS_IMPORT},
{"in", JS_IN},
{"instanceof", JS_INSTANCEOF},
{"int", JS_INT},
{"interface", JS_INTERFACE},
{"long", JS_LONG},
{"native", JS_NATIVE},
{"new", JS_NEW},
{"null", JS_NULL},
{"package", JS_PACKAGE},
{"private", JS_PRIVATE},
{"protected", JS_PROTECTED},
{"public", JS_PUBLIC},
{"return", JS_RETURN},
{"short", JS_SHORT},
{"static", JS_STATIC},
{"super", JS_SUPER},
{"switch", JS_SWITCH},
{"synchronized", JS_SYNCHRONIZED},
{"this", JS_THIS},
{"throw", JS_THROW},
{"throws", JS_THROWS},
{"transient", JS_TRANSIENT},
{"true", JS_TRUE},
{"try", JS_TRY},
{"typeof", JS_TYPEOF},
{"var", JS_VAR},
{"void", JS_VOID},
{"volatile", JS_VOLATILE},
{"while", JS_WHILE},
{"with", JS_WITH},
};
const char *tokenstrings[] = {
"ERROR", "EOF", "(identifier)", "null", "true", "false", "(number)",
"(string)", "(regexp)", "\\n", "{", "}", "(", ")", "[", "]", ".", ";",
",", "<", ">", "<=", ">=", "==", "!=", "===", "!==", "+", "-", "*",
"%", "++", "--", "<<", ">>", ">>>", "&", "|", "^", "!", "~", "&&",
"||", "?", ":", "=", "+=", "-=", "*=", "%=", "<<=", ">>=", ">>>=",
"&=", "|=", "^=", "/", "/=", "break", "case", "catch", "continue",
"default", "delete", "do", "else", "finally", "for", "function", "if",
"in", "instanceof", "new", "return", "switch", "this", "throw", "try",
"typeof", "var", "void", "while", "with", "abstract", "boolean",
"byte", "char", "class", "const", "debugger", "double", "enum",
"export", "extends", "final", "float", "goto", "implements", "import",
"int", "interface", "long", "native", "package", "private",
"protected", "public", "short", "static", "super", "synchronized",
"throws", "transient", "volatile",
};
const char *js_tokentostring(js_Token t)
{
return tokenstrings[t];
}
static inline js_Token findkeyword(const char *s)
{
int m, l, r;
int c;
l = 0;
r = nelem(keywords) - 1;
while (l <= r) {
m = (l + r) >> 1;
c = strcmp(s, keywords[m].string);
if (c < 0)
r = m - 1;
else if (c > 0)
l = m + 1;
else
return keywords[m].token;
}
return JS_IDENTIFIER;
}
static inline int iswhite(int c)
{
return c == 0x9 || c == 0xb || c == 0xc || c == 0x20 || c == 0xa0;
}
static inline int isnewline(c)
{
return c == 0xa || c == 0xd || c == 0x2028 || c == 0x2029;
}
#define GETC() *(*sp)++
#define UNGETC() (*sp)--
#define LOOK(x) (**sp == x ? *(*sp)++ : 0)
static inline void lexlinecomment(const char **sp)
{
int c = GETC();
while (!isnewline(c))
c = GETC();
UNGETC();
}
static inline int lexcomment(const char **sp)
{
while (1) {
int c = GETC();
if (c == '*') {
while (c == '*')
c = GETC();
if (c == '/')
return 0;
} else if (c == 0) {
return -1;
}
}
}
static inline int isidentifierstart(int c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '$' || c == '_';
}
static inline int isidentifierpart(int c)
{
return (c >= '0' && c <= '9') || isidentifierstart(c);
}
static inline int isdec(int c)
{
return (c >= '0' && c <= '9');
}
static inline int ishex(int c)
{
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
static inline int tohex(int c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'a' && c <= 'f')
return c - 'a' + 0xa;
if (c >= 'A' && c <= 'F')
return c - 'A' + 0xa;
return 0;
}
static inline js_Token lexhex(const char **sp, double *yynumber)
{
int c = GETC();
double n = 0;
if (!ishex(c))
return JS_ERROR;
do {
n = n * 16 + tohex(c);
c = GETC();
} while (ishex(c));
UNGETC();
*yynumber = n;
return JS_NUMBER;
}
static inline double lexinteger(const char **sp)
{
int c = GETC();
double n = 0;
while (isdec(c)) {
n = n * 10 + (c - '0');
c = GETC();
}
UNGETC();
return n;
}
static inline double lexfraction(const char **sp)
{
int c = GETC();
double n = 0;
double d = 1;
while (isdec(c)) {
n = n * 10 + (c - '0');
d = d * 10;
c = GETC();
}
UNGETC();
return n / d;
}
static inline js_Token lexnumber(int c, const char **sp, double *yynumber)
{
double i, f, e;
if (c == '0' && (LOOK('x') || LOOK('X')))
return lexhex(sp, yynumber);
UNGETC();
i = lexinteger(sp);
f = 0;
if (LOOK('.'))
f = lexfraction(sp);
e = 0;
if (LOOK('e') || LOOK('E')) {
if (LOOK('-'))
e = -lexinteger(sp);
else if (LOOK('+'))
e = lexinteger(sp);
else
e = lexinteger(sp);
}
*yynumber = (i + f) * pow(10, e);
return JS_NUMBER;
}
static inline int lexescape(const char **sp)
{
int c = GETC();
int x, y, z, w;
switch (c) {
case '0': return 0;
case 'u':
x = tohex(GETC());
y = tohex(GETC());
z = tohex(GETC());
w = tohex(GETC());
return (x << 12) | (y << 8) | (z << 4) | w;
case 'x':
x = tohex(GETC());
y = tohex(GETC());
return (x << 4) | y;
case '\'': return '\'';
case '"': return '"';
case '\\': return '\\';
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: return c;
}
}
static inline js_Token lexstring(int q, const char **sp, char *yytext, size_t yylen)
{
char *p = yytext;
int c = GETC();
while (c != q) {
if (c == 0 || isnewline(c))
return JS_ERROR;
if (c == '\\')
c = lexescape(sp);
if (p - yytext >= yylen)
return JS_ERROR;
*p++ = c;
c = GETC();
}
*p = 0;
return JS_STRING;
}
js_Token js_lex(js_State *J, const char **sp, char *yytext, size_t yylen, double *yynumber)
{
int c = GETC();
while (c) {
while (iswhite(c))
c = GETC();
if (isnewline(c))
return JS_NEWLINE;
if (c == '/') {
c = GETC();
if (c == '/') {
lexlinecomment(sp);
} else if (c == '*') {
if (lexcomment(sp))
return JS_ERROR;
} else if (c == '=') {
return JS_SLASH_EQ;
} else {
UNGETC();
return JS_SLASH;
}
}
if (isidentifierstart(c)) {
char *p = yytext;
do {
if (p - yytext >= yylen)
return JS_ERROR;
*p++ = c;
c = GETC();
} while (isidentifierpart(c));
UNGETC();
*p = 0;
return findkeyword(yytext);
}
if ((c >= '0' && c <= '9') || c == '.')
return lexnumber(c, sp, yynumber);
if (c == '\'' || c == '"')
return lexstring(c, sp, yytext, yylen);
switch (c) {
case '{': return JS_LCURLY;
case '}': return JS_RCURLY;
case '(': return JS_LPAREN;
case ')': return JS_RPAREN;
case '[': return JS_LSQUARE;
case ']': return JS_RSQUARE;
case '.': return JS_PERIOD;
case ';': return JS_SEMICOLON;
case ',': return JS_COMMA;
case '<':
if (LOOK('<')) {
if (LOOK('='))
return JS_LT_LT_EQ;
return JS_LT_LT;
}
if (LOOK('='))
return JS_LT_EQ;
return JS_LT;
case '>':
if (LOOK('>')) {
if (LOOK('>')) {
if (LOOK('='))
return JS_GT_GT_GT_EQ;
return JS_GT_GT_GT;
}
if (LOOK('='))
return JS_GT_GT_EQ;
return JS_GT_GT;
}
if (LOOK('='))
return JS_GT_EQ;
return JS_GT;
case '=':
if (LOOK('=')) {
if (LOOK('='))
return JS_EQ_EQ_EQ;
return JS_EQ_EQ;
}
return JS_EQ;
case '!':
if (LOOK('=')) {
if (LOOK('='))
return JS_EXCL_EQ_EQ;
return JS_EXCL_EQ;
}
return JS_EXCL;
case '+':
if (LOOK('+'))
return JS_PLUS_PLUS;
if (LOOK('='))
return JS_PLUS_EQ;
return JS_PLUS;
case '-':
if (LOOK('-'))
return JS_MINUS_MINUS;
if (LOOK('='))
return JS_MINUS_EQ;
return JS_MINUS;
case '*':
if (LOOK('='))
return JS_STAR_EQ;
return JS_STAR;
case '%':
if (LOOK('='))
return JS_PERCENT_EQ;
return JS_PERCENT;
case '&':
if (LOOK('&'))
return JS_AND_AND;
if (LOOK('='))
return JS_AND_EQ;
return JS_AND;
case '|':
if (LOOK('|'))
return JS_BAR_BAR;
if (LOOK('='))
return JS_BAR_EQ;
return JS_BAR;
case '^':
if (LOOK('='))
return JS_HAT_EQ;
return JS_HAT;
case '~': return JS_TILDE;
case '?': return JS_QUESTION;
case ':': return JS_COLON;
}
c = GETC();
}
return JS_EOF;
}