ref: 71f88717024cd5434ff7ee541a112ece86082121
parent: 6dc4ce65999d9f26703c4728ed983fbec90635c1
author: ISSOtm <eldredhabert0@gmail.com>
date: Thu Jul 23 09:49:45 EDT 2020
Implement more functionality Macro arg detection, first emitted tokens, primitive (bad) column counting
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -43,10 +43,11 @@
void lexer_ToggleStringExpansion(bool enable);
char const *lexer_GetFileName(void);
-unsigned int lexer_GetLineNo(void);
+uint32_t lexer_GetLineNo(void);
+uint32_t lexer_GetColNo(void);
void lexer_DumpStringExpansions(void);
int yylex(void);
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
- char **capture, size_t *size, char const *name);
+ char const **capture, size_t *size, char const *name);
#endif /* RGBDS_ASM_LEXER_H */
--- a/src/asm/asmy.y
+++ b/src/asm/asmy.y
@@ -604,7 +604,7 @@
rept : T_POP_REPT uconst {uint32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
+ char const *body;
size_t size;
lexer_SkipToBlockEnd(T_POP_REPT, T_POP_ENDR, T_POP_ENDR,
&body, &size, "REPT block");
@@ -614,7 +614,7 @@
macrodef : T_LABEL ':' T_POP_MACRO {int32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
+ char const *body;
size_t size;
lexer_SkipToBlockEnd(T_POP_MACRO, T_POP_ENDM, T_POP_ENDM,
&body, &size, "macro definition");
--- a/src/asm/fstack.c
+++ b/src/asm/fstack.c
@@ -250,7 +250,8 @@
pLastFile = pLastFile->next;
}
- fprintf(stderr, "%s(%" PRId32 ")", lexer_GetFileName(), lexer_GetLineNo());
+ fprintf(stderr, "%s(%" PRId32 ",%" PRId32 ")",
+ lexer_GetFileName(), lexer_GetLineNo(), lexer_GetColNo());
}
void fstk_DumpToStr(char *buf, size_t buflen)
--- a/src/asm/globlex.c
+++ /dev/null
@@ -1,698 +1,0 @@
-/*
- * This file is part of RGBDS.
- *
- * Copyright (c) 1997-2018, Carsten Sorensen and RGBDS contributors.
- *
- * SPDX-License-Identifier: MIT
- */
-
-#include <math.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "asm/asm.h"
-#include "asm/lexer.h"
-#include "asm/macro.h"
-#include "asm/main.h"
-#include "asm/rpn.h"
-#include "asm/section.h"
-#include "asm/warning.h"
-
-#include "helpers.h"
-
-#include "asmy.h"
-
-bool oDontExpandStrings;
-int32_t nGBGfxID = -1;
-int32_t nBinaryID = -1;
-
-static int32_t gbgfx2bin(char ch)
-{- int32_t i;
-
- for (i = 0; i <= 3; i++) {- if (CurrentOptions.gbgfx[i] == ch)
- return i;
- }
-
- return 0;
-}
-
-static int32_t binary2bin(char ch)
-{- int32_t i;
-
- for (i = 0; i <= 1; i++) {- if (CurrentOptions.binary[i] == ch)
- return i;
- }
-
- return 0;
-}
-
-static int32_t char2bin(char ch)
-{- if (ch >= 'a' && ch <= 'f')
- return (ch - 'a' + 10);
-
- if (ch >= 'A' && ch <= 'F')
- return (ch - 'A' + 10);
-
- if (ch >= '0' && ch <= '9')
- return (ch - '0');
-
- return 0;
-}
-
-typedef int32_t(*x2bin) (char ch);
-
-static int32_t ascii2bin(char *s)
-{- char *start = s;
- uint32_t radix = 10;
- uint32_t result = 0;
- x2bin convertfunc = char2bin;
-
- switch (*s) {- case '$':
- radix = 16;
- s++;
- convertfunc = char2bin;
- break;
- case '&':
- radix = 8;
- s++;
- convertfunc = char2bin;
- break;
- case '`':
- radix = 4;
- s++;
- convertfunc = gbgfx2bin;
- break;
- case '%':
- radix = 2;
- s++;
- convertfunc = binary2bin;
- break;
- default:
- /* Handle below */
- break;
- }
-
- const uint32_t max_q = UINT32_MAX / radix;
- const uint32_t max_r = UINT32_MAX % radix;
-
- if (*s == '\0') {- /*
- * There are no digits after the radix prefix
- * (or the string is empty, which shouldn't happen).
- */
- error("Invalid integer constant\n");- } else if (radix == 4) {- int32_t size = 0;
- int32_t c;
-
- while (*s != '\0') {- c = convertfunc(*s++);
- result = result * 2 + ((c & 2) << 7) + (c & 1);
- size++;
- }
-
- /*
- * Extending a graphics constant longer than 8 pixels,
- * the Game Boy tile width, produces a nonsensical result.
- */
- if (size > 8) {- warning(WARNING_LARGE_CONSTANT, "Graphics constant '%s' is too long\n",
- start);
- }
- } else {- bool overflow = false;
-
- while (*s != '\0') {- int32_t digit = convertfunc(*s++);
-
- if (result > max_q
- || (result == max_q && digit > max_r)) {- overflow = true;
- }
- result = result * radix + digit;
- }
-
- if (overflow)
- warning(WARNING_LARGE_CONSTANT, "Integer constant '%s' is too large\n",
- start);
- }
-
- return result;
-}
-
-uint32_t ParseFixedPoint(char *s, uint32_t size)
-{- uint32_t i;
- uint32_t dot = 0;
-
- for (i = 0; i < size; i++) {- if (s[i] == '.') {- dot++;
-
- if (dot == 2)
- break;
- }
- }
-
- yyskipbytes(i);
-
- yylval.nConstValue = (int32_t)(atof(s) * 65536);
-
- return 1;
-}
-
-uint32_t ParseNumber(char *s, uint32_t size)
-{- char dest[256];
-
- if (size > 255)
- fatalerror("Number token too long\n");-
- strncpy(dest, s, size);
- dest[size] = 0;
- yylval.nConstValue = ascii2bin(dest);
-
- yyskipbytes(size);
-
- return 1;
-}
-
-/*
- * If the symbol name ends before the end of the macro arg,
- * return a pointer to the rest of the macro arg.
- * Otherwise, return NULL.
- */
-char const *AppendMacroArg(char whichArg, char *dest, size_t *destIndex)
-{- char const *marg;
-
- if (whichArg == '@')
- marg = macro_GetUniqueIDStr();
- else if (whichArg >= '1' && whichArg <= '9')
- marg = macro_GetArg(whichArg - '0');
- else
- fatalerror("Invalid macro argument '\\%c' in symbol\n", whichArg);-
- if (!marg)
- fatalerror("Macro argument '\\%c' not defined\n", whichArg);-
- char ch;
-
- while ((ch = *marg) != 0) {- if ((ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z')
- || (ch >= '0' && ch <= '9')
- || ch == '_'
- || ch == '@'
- || ch == '#'
- || ch == '.') {- if (*destIndex >= MAXSYMLEN)
- fatalerror("Symbol too long\n");-
- dest[*destIndex] = ch;
- (*destIndex)++;
- } else {- return marg;
- }
-
- marg++;
- }
-
- return NULL;
-}
-
-uint32_t ParseSymbol(char *src, uint32_t size)
-{- char dest[MAXSYMLEN + 1];
- size_t srcIndex = 0;
- size_t destIndex = 0;
- char const *rest = NULL;
-
- while (srcIndex < size) {- char ch = src[srcIndex++];
-
- if (ch == '\\') {- /*
- * We don't check if srcIndex is still less than size,
- * but that can only fail to be true when the
- * following char is neither '@' nor a digit.
- * In that case, AppendMacroArg() will catch the error.
- */
- ch = src[srcIndex++];
-
- rest = AppendMacroArg(ch, dest, &destIndex);
- /* If the symbol's end was in the middle of the token */
- if (rest)
- break;
- } else {- if (destIndex >= MAXSYMLEN)
- fatalerror("Symbol too long\n");- dest[destIndex++] = ch;
- }
- }
-
- dest[destIndex] = 0;
-
- /* Tell the lexer we read all bytes that we did */
- yyskipbytes(srcIndex);
-
- /*
- * If an escape's expansion left some chars after the symbol's end,
- * such as the `::` in a `Backup\1` expanded to `BackupCamX::`,
- * put those into the buffer.
- * Note that this NEEDS to be done after the `yyskipbytes` above.
- */
- if (rest)
- yyunputstr(rest);
-
- /* If the symbol is an EQUS, expand it */
- if (!oDontExpandStrings) {- struct Symbol const *sym = sym_FindSymbol(dest);
-
- if (sym && sym->type == SYM_EQUS) {- char const *s;
-
- lex_BeginStringExpansion(dest);
-
- /* Feed the symbol's contents into the buffer */
- yyunputstr(s = sym_GetStringValue(sym));
-
- /* Lines inserted this way shall not increase lexer_GetLineNo() */
- while (*s) {- if (*s++ == '\n')
- lexer_GetLineNo()--;
- }
- return 0;
- }
- }
-
- strcpy(yylval.tzSym, dest);
- return 1;
-}
-
-uint32_t PutMacroArg(char *src, uint32_t size)
-{- char const *s;
-
- yyskipbytes(size);
- if ((size == 2 && src[1] >= '1' && src[1] <= '9')) {- s = macro_GetArg(src[1] - '0');
-
- if (s != NULL)
- yyunputstr(s);
- else
- error("Macro argument '\\%c' not defined\n", src[1]);- } else {- error("Invalid macro argument '\\%c'\n", src[1]);- }
- return 0;
-}
-
-uint32_t PutUniqueID(char *src, uint32_t size)
-{- (void)src;
- char const *s;
-
- yyskipbytes(size);
-
- s = macro_GetUniqueIDStr();
-
- if (s != NULL)
- yyunputstr(s);
- else
- error("Macro unique label string not defined\n");-
- return 0;
-}
-
-enum {- T_LEX_MACROARG = 3000,
- T_LEX_MACROUNIQUE
-};
-
-const struct sLexInitString lexer_strings[] = {- {"adc", T_Z80_ADC},- {"add", T_Z80_ADD},- {"and", T_Z80_AND},- {"bit", T_Z80_BIT},- {"call", T_Z80_CALL},- {"ccf", T_Z80_CCF},- {"cpl", T_Z80_CPL},- {"cp", T_Z80_CP},- {"daa", T_Z80_DAA},- {"dec", T_Z80_DEC},- {"di", T_Z80_DI},- {"ei", T_Z80_EI},- {"halt", T_Z80_HALT},- {"inc", T_Z80_INC},- {"jp", T_Z80_JP},- {"jr", T_Z80_JR},- {"ld", T_Z80_LD},- {"ldi", T_Z80_LDI},- {"ldd", T_Z80_LDD},- {"ldio", T_Z80_LDIO},- {"ldh", T_Z80_LDIO},- {"nop", T_Z80_NOP},- {"or", T_Z80_OR},- {"pop", T_Z80_POP},- {"push", T_Z80_PUSH},- {"res", T_Z80_RES},- {"reti", T_Z80_RETI},- {"ret", T_Z80_RET},- {"rlca", T_Z80_RLCA},- {"rlc", T_Z80_RLC},- {"rla", T_Z80_RLA},- {"rl", T_Z80_RL},- {"rrc", T_Z80_RRC},- {"rrca", T_Z80_RRCA},- {"rra", T_Z80_RRA},- {"rr", T_Z80_RR},- {"rst", T_Z80_RST},- {"sbc", T_Z80_SBC},- {"scf", T_Z80_SCF},- {"set", T_POP_SET},- {"sla", T_Z80_SLA},- {"sra", T_Z80_SRA},- {"srl", T_Z80_SRL},- {"stop", T_Z80_STOP},- {"sub", T_Z80_SUB},- {"swap", T_Z80_SWAP},- {"xor", T_Z80_XOR},-
- {"nz", T_CC_NZ},- {"z", T_CC_Z},- {"nc", T_CC_NC},- /* Handled in list of registers */
- /* { "c", T_TOKEN_C }, */-
- {"hli", T_MODE_HL_INC},- {"hld", T_MODE_HL_DEC},- {"$ff00+c", T_MODE_HW_C},- {"$ff00 + c", T_MODE_HW_C},- {"af", T_MODE_AF},- {"bc", T_MODE_BC},- {"de", T_MODE_DE},- {"hl", T_MODE_HL},- {"sp", T_MODE_SP},-
- {"a", T_TOKEN_A},- {"b", T_TOKEN_B},- {"c", T_TOKEN_C},- {"d", T_TOKEN_D},- {"e", T_TOKEN_E},- {"h", T_TOKEN_H},- {"l", T_TOKEN_L},-
- {"||", T_OP_LOGICOR},- {"&&", T_OP_LOGICAND},- {"==", T_OP_LOGICEQU},- {">", T_OP_LOGICGT},- {"<", T_OP_LOGICLT},- {">=", T_OP_LOGICGE},- {"<=", T_OP_LOGICLE},- {"!=", T_OP_LOGICNE},- {"!", T_OP_LOGICNOT},- {"|", T_OP_OR},- {"^", T_OP_XOR},- {"&", T_OP_AND},- {"<<", T_OP_SHL},- {">>", T_OP_SHR},- {"+", T_OP_ADD},- {"-", T_OP_SUB},- {"*", T_OP_MUL},- {"/", T_OP_DIV},- {"%", T_OP_MOD},- {"~", T_OP_NOT},-
- {"def", T_OP_DEF},-
- {"fragment", T_POP_FRAGMENT},- {"bank", T_OP_BANK},- {"align", T_OP_ALIGN},-
- {"round", T_OP_ROUND},- {"ceil", T_OP_CEIL},- {"floor", T_OP_FLOOR},- {"div", T_OP_FDIV},- {"mul", T_OP_FMUL},- {"sin", T_OP_SIN},- {"cos", T_OP_COS},- {"tan", T_OP_TAN},- {"asin", T_OP_ASIN},- {"acos", T_OP_ACOS},- {"atan", T_OP_ATAN},- {"atan2", T_OP_ATAN2},-
- {"high", T_OP_HIGH},- {"low", T_OP_LOW},- {"isconst", T_OP_ISCONST},-
- {"strcmp", T_OP_STRCMP},- {"strin", T_OP_STRIN},- {"strsub", T_OP_STRSUB},- {"strlen", T_OP_STRLEN},- {"strcat", T_OP_STRCAT},- {"strupr", T_OP_STRUPR},- {"strlwr", T_OP_STRLWR},-
- {"include", T_POP_INCLUDE},- {"printt", T_POP_PRINTT},- {"printi", T_POP_PRINTI},- {"printv", T_POP_PRINTV},- {"printf", T_POP_PRINTF},- {"export", T_POP_EXPORT},- {"xdef", T_POP_XDEF},- {"global", T_POP_GLOBAL},- {"ds", T_POP_DS},- {"db", T_POP_DB},- {"dw", T_POP_DW},- {"dl", T_POP_DL},- {"section", T_POP_SECTION},- {"purge", T_POP_PURGE},-
- {"rsreset", T_POP_RSRESET},- {"rsset", T_POP_RSSET},-
- {"incbin", T_POP_INCBIN},- {"charmap", T_POP_CHARMAP},- {"newcharmap", T_POP_NEWCHARMAP},- {"setcharmap", T_POP_SETCHARMAP},- {"pushc", T_POP_PUSHC},- {"popc", T_POP_POPC},-
- {"fail", T_POP_FAIL},- {"warn", T_POP_WARN},- {"fatal", T_POP_FATAL},- {"assert", T_POP_ASSERT},- {"static_assert", T_POP_STATIC_ASSERT},-
- {"macro", T_POP_MACRO},- /* Not needed but we have it here just to protect the name */
- {"endm", T_POP_ENDM},- {"shift", T_POP_SHIFT},-
- {"rept", T_POP_REPT},- /* Not needed but we have it here just to protect the name */
- {"endr", T_POP_ENDR},-
- {"load", T_POP_LOAD},- {"endl", T_POP_ENDL},-
- {"if", T_POP_IF},- {"else", T_POP_ELSE},- {"elif", T_POP_ELIF},- {"endc", T_POP_ENDC},-
- {"union", T_POP_UNION},- {"nextu", T_POP_NEXTU},- {"endu", T_POP_ENDU},-
- {"wram0", T_SECT_WRAM0},- {"vram", T_SECT_VRAM},- {"romx", T_SECT_ROMX},- {"rom0", T_SECT_ROM0},- {"hram", T_SECT_HRAM},- {"wramx", T_SECT_WRAMX},- {"sram", T_SECT_SRAM},- {"oam", T_SECT_OAM},-
- {"rb", T_POP_RB},- {"rw", T_POP_RW},- {"equ", T_POP_EQU},- {"equs", T_POP_EQUS},-
- /* Handled before in list of CPU instructions */
- /* {"set", T_POP_SET}, */- {"=", T_POP_EQUAL},-
- {"pushs", T_POP_PUSHS},- {"pops", T_POP_POPS},- {"pusho", T_POP_PUSHO},- {"popo", T_POP_POPO},-
- {"opt", T_POP_OPT},-
- {NULL, 0}-};
-
-const struct sLexFloat tNumberToken = {- ParseNumber,
- T_NUMBER
-};
-
-const struct sLexFloat tFixedPointToken = {- ParseFixedPoint,
- T_NUMBER
-};
-
-const struct sLexFloat tIDToken = {- ParseSymbol,
- T_ID
-};
-
-const struct sLexFloat tMacroArgToken = {- PutMacroArg,
- T_LEX_MACROARG
-};
-
-const struct sLexFloat tMacroUniqueToken = {- PutUniqueID,
- T_LEX_MACROUNIQUE
-};
-
-void setup_lexer(void)
-{- uint32_t id;
-
- lex_Init();
- lex_AddStrings(lexer_strings);
-
- //Macro arguments
-
- id = lex_FloatAlloc(&tMacroArgToken);
- lex_FloatAddFirstRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '1', '9');
- id = lex_FloatAlloc(&tMacroUniqueToken);
- lex_FloatAddFirstRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '@', '@');
-
- //Decimal constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddRange(id, '0', '9');
-
- //Binary constants
-
- id = lex_FloatAlloc(&tNumberToken);
- nBinaryID = id;
- lex_FloatAddFirstRange(id, '%', '%');
- lex_FloatAddSecondRange(id, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddSecondRange(id, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
- lex_FloatAddRange(id, CurrentOptions.binary[0],
- CurrentOptions.binary[0]);
- lex_FloatAddRange(id, CurrentOptions.binary[1],
- CurrentOptions.binary[1]);
-
- //Octal constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '&', '&');
- lex_FloatAddSecondRange(id, '0', '7');
- lex_FloatAddRange(id, '0', '7');
-
- //Gameboy gfx constants
-
- id = lex_FloatAlloc(&tNumberToken);
- nGBGfxID = id;
- lex_FloatAddFirstRange(id, '`', '`');
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[0],
- CurrentOptions.gbgfx[0]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[1],
- CurrentOptions.gbgfx[1]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[2],
- CurrentOptions.gbgfx[2]);
- lex_FloatAddSecondRange(id, CurrentOptions.gbgfx[3],
- CurrentOptions.gbgfx[3]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[0], CurrentOptions.gbgfx[0]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[1], CurrentOptions.gbgfx[1]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[2], CurrentOptions.gbgfx[2]);
- lex_FloatAddRange(id, CurrentOptions.gbgfx[3], CurrentOptions.gbgfx[3]);
-
- //Hex constants
-
- id = lex_FloatAlloc(&tNumberToken);
- lex_FloatAddFirstRange(id, '$', '$');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddSecondRange(id, 'A', 'F');
- lex_FloatAddSecondRange(id, 'a', 'f');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, 'A', 'F');
- lex_FloatAddRange(id, 'a', 'f');
-
- //ID 's
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, 'a', 'z');
- lex_FloatAddFirstRange(id, 'A', 'Z');
- lex_FloatAddFirstRange(id, '_', '_');
- lex_FloatAddSecondRange(id, '.', '.');
- lex_FloatAddSecondRange(id, 'a', 'z');
- lex_FloatAddSecondRange(id, 'A', 'Z');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '_', '_');
- lex_FloatAddSecondRange(id, '\\', '\\');
- lex_FloatAddSecondRange(id, '@', '@');
- lex_FloatAddSecondRange(id, '#', '#');
- lex_FloatAddRange(id, '.', '.');
- lex_FloatAddRange(id, 'a', 'z');
- lex_FloatAddRange(id, 'A', 'Z');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, '_', '_');
- lex_FloatAddRange(id, '\\', '\\');
- lex_FloatAddRange(id, '@', '@');
- lex_FloatAddRange(id, '#', '#');
-
- //Local ID
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, '.', '.');
- lex_FloatAddSecondRange(id, 'a', 'z');
- lex_FloatAddSecondRange(id, 'A', 'Z');
- lex_FloatAddSecondRange(id, '_', '_');
- lex_FloatAddRange(id, 'a', 'z');
- lex_FloatAddRange(id, 'A', 'Z');
- lex_FloatAddRange(id, '0', '9');
- lex_FloatAddRange(id, '_', '_');
- lex_FloatAddRange(id, '\\', '\\');
- lex_FloatAddRange(id, '@', '@');
- lex_FloatAddRange(id, '#', '#');
-
- // "@"
-
- id = lex_FloatAlloc(&tIDToken);
- lex_FloatAddFirstRange(id, '@', '@');
-
- //Fixed point constants
-
- id = lex_FloatAlloc(&tFixedPointToken);
- lex_FloatAddFirstRange(id, '.', '.');
- lex_FloatAddFirstRange(id, '0', '9');
- lex_FloatAddSecondRange(id, '.', '.');
- lex_FloatAddSecondRange(id, '0', '9');
- lex_FloatAddRange(id, '.', '.');
- lex_FloatAddRange(id, '0', '9');
-}
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -30,6 +30,13 @@
/* This caps the size of buffer reads, and according to POSIX, passing more than SSIZE_MAX is UB */
static_assert(LEXER_BUF_SIZE <= SSIZE_MAX);
+struct Expansion {+ uint8_t distance; /* How far the expansion's beginning is from the current position */
+ char const *contents;
+ size_t len;
+ struct Expansion *parent;
+};
+
struct LexerState {char const *path;
@@ -37,7 +44,7 @@
bool isMmapped;
union { struct { /* If mmap()ed */- char *ptr;
+ char *ptr; /* Technically `const` during the lexer's execution */
off_t size;
off_t offset;
};
@@ -44,7 +51,6 @@
struct { /* Otherwise */int fd;
size_t index; /* Read index into the buffer */
- size_t nbChars; /* Number of chars in front of the buffer */
char buf[LEXER_BUF_SIZE]; /* Circular buffer */
};
};
@@ -52,12 +58,17 @@
/* Common state */
enum LexerMode mode;
bool atLineStart;
- unsigned int lineNo;
+ uint32_t lineNo;
+ uint32_t colNo;
+
bool capturing; /* Whether the text being lexed should be captured */
size_t captureSize; /* Amount of text captured */
char *captureBuf; /* Buffer to send the captured text to if non-NULL */
size_t captureCapacity; /* Size of the buffer above */
+
+ size_t nbChars; /* Number of chars of lookahead, for processing expansions */
bool expandStrings;
+ struct Expansion *expansion;
};
struct LexerState *lexerState = NULL;
@@ -116,14 +127,18 @@
/* Sometimes mmap() fails or isn't available, so have a fallback */
lseek(state->fd, 0, SEEK_SET);
state->index = 0;
- state->nbChars = 0;
}
state->mode = LEXER_NORMAL;
- state->atLineStart = true;
+ state->atLineStart = true; /* yylex() will init colNo due to this */
state->lineNo = 0;
+
state->capturing = false;
state->captureBuf = NULL;
+
+ state->nbChars = 0;
+ state->expandStrings = true;
+ state->expansion = NULL;
return state;
}
@@ -164,28 +179,50 @@
/* If at any point we need more than 255 characters of lookahead, something went VERY wrong. */
static int peek(uint8_t distance)
{+ if (distance >= LEXER_BUF_SIZE)
+ fatalerror("Internal lexer error: buffer has insufficient size for peeking (%u >= %u)\n",+ distance, LEXER_BUF_SIZE);
+
if (lexerState->isMmapped) {if (lexerState->offset + distance >= lexerState->size)
return EOF;
+
+ if (!lexerState->capturing) {+ bool escaped = false;
+
+ while (lexerState->nbChars < distance && !escaped) {+ char c = lexerState->ptr[lexerState->offset
+ + lexerState->nbChars++];
+
+ if (escaped) {+ escaped = false;
+ if ((c >= '1' && c <= '9') || c == '@')
+ fatalerror("Macro arg expansion is not implemented yet\n");+ } else if (c == '\\') {+ escaped = true;
+ }
+ }
+ }
+
return lexerState->ptr[lexerState->offset + distance];
}
if (lexerState->nbChars <= distance) {/* Buffer isn't full enough, read some chars in */
+ size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
/* Compute the index we'll start writing to */
size_t writeIndex = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
- size_t target = LEXER_BUF_SIZE - lexerState->nbChars; /* Aim: making the buf full */
- ssize_t nbCharsRead = 0;
+ ssize_t nbCharsRead = 0, totalCharsRead = 0;
#define readChars(size) do { \nbCharsRead = read(lexerState->fd, &lexerState->buf[writeIndex], (size)); \
if (nbCharsRead == -1) \
fatalerror("Error while reading \"%s\": %s\n", lexerState->path, errno); \+ totalCharsRead += nbCharsRead; \
writeIndex += nbCharsRead; \
if (writeIndex == LEXER_BUF_SIZE) \
writeIndex = 0; \
- lexerState->nbChars += nbCharsRead; /* Count all those chars in */ \
target -= nbCharsRead; \
} while (0)
@@ -201,6 +238,40 @@
#undef readChars
+ /* Do not perform expansions when capturing */
+ if (!lexerState->capturing) {+ /* Scan the newly-inserted chars for any expansions */
+ bool escaped = false;
+ size_t index = (lexerState->index + lexerState->nbChars) % LEXER_BUF_SIZE;
+
+ for (ssize_t i = 0; i < totalCharsRead; i++) {+ char c = lexerState->buf[index++];
+
+ if (escaped) {+ escaped = false;
+ if ((c >= '1' && c <= '9') || c == '@')
+ fatalerror("Macro arg expansion is not implemented yet\n");+ } else if (c == '\\') {+ escaped = true;
+ }
+ if (index == LEXER_BUF_SIZE) /* Wrap around buffer */
+ index = 0;
+ }
+
+ /*
+ * If last char read was a backslash, pretend we didn't read it; this is
+ * important, otherwise we may miss an expansion that straddles refills
+ */
+ if (escaped) {+ totalCharsRead--;
+ /* However, if that prevents having enough characters, error out */
+ if (lexerState->nbChars + totalCharsRead <= distance)
+ fatalerror("Internal lexer error: cannot read far enough due to backslash\n");+ }
+ }
+
+ lexerState->nbChars += totalCharsRead;
+
/* If there aren't enough chars even after refilling, give up */
if (lexerState->nbChars <= distance)
return EOF;
@@ -231,6 +302,8 @@
if (lexerState->index >= LEXER_BUF_SIZE)
lexerState->index %= LEXER_BUF_SIZE;
}
+
+ lexerState->colNo += distance;
}
static int nextChar(void)
@@ -250,11 +323,16 @@
return lexerState->path;
}
-unsigned int lexer_GetLineNo(void)
+uint32_t lexer_GetLineNo(void)
{return lexerState->lineNo;
}
+uint32_t lexer_GetColNo(void)
+{+ return lexerState->colNo;
+}
+
void lexer_DumpStringExpansions(void)
{/* TODO */
@@ -278,6 +356,20 @@
case '\t':
break;
+ /* Handle single-char tokens */
+ case '+':
+ return T_OP_ADD;
+ case '-':
+ return T_OP_SUB;
+
+ /* Handle accepted single chars */
+ case '[':
+ case ']':
+ case '(':+ case ')':
+ case ',':
+ return c;
+
case EOF:
/* Captures end at their buffer's boundary no matter what */
if (!lexerState->capturing) {@@ -288,6 +380,7 @@
default:
error("Unknown character '%c'\n");}
+ lexerState->atLineStart = false;
}
}
@@ -298,8 +391,10 @@
int yylex(void)
{- if (lexerState->atLineStart)
+ if (lexerState->atLineStart) {lexerState->lineNo++;
+ lexerState->colNo = 0;
+ }
static int (* const lexerModeFuncs[])(void) = {[LEXER_NORMAL] = yylex_NORMAL,
@@ -316,7 +411,7 @@
}
void lexer_SkipToBlockEnd(int blockStartToken, int blockEndToken, int endToken,
- char **capture, size_t *size, char const *name)
+ char const **capture, size_t *size, char const *name)
{lexerState->capturing = true;
lexerState->captureSize = 0;
--- a/src/asm/main.c
+++ b/src/asm/main.c
@@ -483,6 +483,13 @@
fprintf(dependfile, "%s: %s\n", tzTargetFileName, tzMainfile);
}
+ /* Init lexer; important to do first, since that's what provides the file name, line, etc */
+ struct LexerState *state = lexer_OpenFile(tzMainfile);
+
+ if (!state)
+ fatalerror("Failed to open main file!\n");+ lexer_SetState(state);
+
nStartClock = clock();
nTotalLines = 0;
@@ -490,11 +497,6 @@
sym_Init();
sym_SetExportAll(exportall);
fstk_Init(tzMainfile);
- struct LexerState *state = lexer_OpenFile(tzMainfile);
-
- if (!state)
- fatalerror("Failed to open main file!");- lexer_SetState(state);
opt_ParseDefines();
charmap_New("main", NULL);--
⑨