ref: 7ac8bd6e240618fbfacfe9f0987bf089b7721b9c
parent: be2572edca1eb97f5ca801d4e4380e757b04f314
author: Rangi <remy.oukaour+rangi42@gmail.com>
date: Sun Apr 18 16:25:09 EDT 2021
Return a marker token at the end of any buffer Removes the lexer hack mentioned in #778
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -81,6 +81,7 @@
uint32_t lineNo;
char *body;
size_t size;
+ bool unterminated;
};
char const *lexer_GetFileName(void);
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -354,6 +354,7 @@
uint32_t colNo;
int lastToken;
int nextToken;
+ bool isAtEOF;
struct IfStack *ifStack;
@@ -378,6 +379,7 @@
state->atLineStart = true; /* yylex() will init colNo due to this */
state->lastToken = T_EOF;
state->nextToken = 0;
+ state->isAtEOF = false;
state->ifStack = NULL;
@@ -2278,11 +2280,13 @@
int yylex(void)
{
-restart:
- if (lexerState->atLineStart && lexerStateEOL) {
+ if (lexerStateEOL) {
lexer_SetState(lexerStateEOL);
lexerStateEOL = NULL;
}
+ /* `lexer_SetState` updates `lexerState`, so check for EOF after it */
+ if (lexerState->isAtEOF)
+ return T_EOF;
if (lexerState->atLineStart) {
/* Newlines read within an expansion should not increase the line count */
if (!lexerState->expansions)
@@ -2299,23 +2303,19 @@
int token = lexerModeFuncs[lexerState->mode]();
if (token == T_EOF) {
- if (lexerState->lastToken != T_NEWLINE) {
- dbgPrint("Forcing EOL at EOF\n");
- token = T_NEWLINE;
- } else {
- /* Try to switch to new buffer; if it succeeds, scan again */
- dbgPrint("Reached EOF!\n");
- /* Captures end at their buffer's boundary no matter what */
- if (!lexerState->capturing) {
- if (!yywrap())
- goto restart;
+ /* Try to switch to new buffer; if it succeeds, scan again */
+ dbgPrint("Reached EOB!\n");
+ /* Captures end at their buffer's boundary no matter what */
+ if (!lexerState->capturing) {
+ if (yywrap()) {
dbgPrint("Reached end of input.\n");
- return T_EOF;
+ lexerState->isAtEOF = true;
}
+ token = T_EOB;
}
}
lexerState->lastToken = token;
- lexerState->atLineStart = token == T_NEWLINE;
+ lexerState->atLineStart = token == T_NEWLINE || token == T_EOB;
return token;
}
@@ -2338,6 +2338,7 @@
void lexer_CaptureRept(struct CaptureBody *capture)
{
+ capture->unterminated = false;
capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture();
@@ -2372,7 +2373,6 @@
* We know we have read exactly "ENDR", not e.g. an EQUS
*/
lexerState->captureSize -= strlen("ENDR");
- lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
goto finish;
}
level--;
@@ -2383,6 +2383,7 @@
for (;;) {
if (c == EOF) {
error("Unterminated REPT/FOR block\n");
+ capture->unterminated = true;
goto finish;
} else if (c == '\n' || c == '\r') {
handleCRLF(c);
@@ -2404,6 +2405,7 @@
void lexer_CaptureMacroBody(struct CaptureBody *capture)
{
+ capture->unterminated = false;
capture->lineNo = lexer_GetLineNo();
char *captureStart = startCapture();
@@ -2434,7 +2436,6 @@
* We know we have read exactly "ENDM", not e.g. an EQUS
*/
lexerState->captureSize -= strlen("ENDM");
- lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
goto finish;
}
}
@@ -2443,6 +2444,7 @@
for (;;) {
if (c == EOF) {
error("Unterminated macro definition\n");
+ capture->unterminated = true;
goto finish;
} else if (c == '\n' || c == '\r') {
handleCRLF(c);
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -646,6 +646,7 @@
%type <expr> op_mem_ind
%type <assertType> assert_type
+%token T_EOB "end of buffer"
%token T_EOF 0 "end of file"
%start asmfile
@@ -654,14 +655,13 @@
asmfile : lines
;
-/*
- * The lexer adds T_NEWLINE at the end of the file if one was not
- * already present, so we can rely on it to end a line.
- */
lines : %empty
| lines line
;
+endofline : T_NEWLINE | T_EOB
+;
+
plain_directive : label
| label cpu_command
| label macro
@@ -669,9 +669,9 @@
| assignment_directive
;
-line : plain_directive T_NEWLINE
+line : plain_directive endofline
| line_directive /* Directives that manage newlines themselves */
- | error T_NEWLINE { /* Continue parsing the next line on a syntax error */
+ | error endofline { /* Continue parsing the next line on a syntax error */
fstk_StopRept();
}
;
@@ -686,6 +686,7 @@
| rept
| for
| break
+ | include
| if
/* It's important that all of these require being at line start for `skipIfBlock` */
| elif
@@ -807,8 +808,7 @@
| equs
;
-directive : include
- | endc
+directive : endc
| println
| printf
@@ -988,8 +988,10 @@
rept : T_POP_REPT uconst T_NEWLINE {
lexer_CaptureRept(&captureBody);
- } T_NEWLINE {
- fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
+ } endofline {
+ if (!captureBody.unterminated)
+ fstk_RunRept($2, captureBody.lineNo, captureBody.body,
+ captureBody.size);
}
;
@@ -999,9 +1001,10 @@
lexer_ToggleStringExpansion(true);
} T_COMMA for_args T_NEWLINE {
lexer_CaptureRept(&captureBody);
- } T_NEWLINE {
- fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo,
- captureBody.body, captureBody.size);
+ } endofline {
+ if (!captureBody.unterminated)
+ fstk_RunFor($3, $6.start, $6.stop, $6.step, captureBody.lineNo,
+ captureBody.body, captureBody.size);
}
for_args : const {
@@ -1021,7 +1024,7 @@
}
;
-break : T_POP_BREAK T_NEWLINE {
+break : label T_POP_BREAK endofline {
if (fstk_Break())
lexer_SetMode(LEXER_SKIP_TO_ENDR);
}
@@ -1033,13 +1036,17 @@
lexer_ToggleStringExpansion(true);
} T_NEWLINE {
lexer_CaptureMacroBody(&captureBody);
- } T_NEWLINE {
- sym_AddMacro($3, captureBody.lineNo, captureBody.body, captureBody.size);
+ } endofline {
+ if (!captureBody.unterminated)
+ sym_AddMacro($3, captureBody.lineNo, captureBody.body,
+ captureBody.size);
}
| T_LABEL T_COLON T_POP_MACRO T_NEWLINE {
lexer_CaptureMacroBody(&captureBody);
- } T_NEWLINE {
- sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
+ } endofline {
+ if (!captureBody.unterminated)
+ sym_AddMacro($1, captureBody.lineNo, captureBody.body,
+ captureBody.size);
}
;
@@ -1162,8 +1169,8 @@
export_list_entry : scoped_id { sym_Export($1); }
;
-include : T_POP_INCLUDE string {
- fstk_RunInclude($2);
+include : label T_POP_INCLUDE string endofline {
+ fstk_RunInclude($3);
if (failedOnMissingInclude)
YYACCEPT;
}
--- a/test/asm/block-comment-termination-error.err
+++ b/test/asm/block-comment-termination-error.err
@@ -1,5 +1,5 @@
ERROR: block-comment-termination-error.asm(1):
Unterminated block comment
ERROR: block-comment-termination-error.asm(1):
- syntax error, unexpected newline
+ syntax error, unexpected end of buffer
error: Assembly aborted (2 errors)!
--- a/test/asm/code-after-endm-endr-endc.err
+++ b/test/asm/code-after-endm-endr-endc.err
@@ -1,15 +1,15 @@
ERROR: code-after-endm-endr-endc.asm(6):
- syntax error, unexpected PRINTLN, expecting newline
+ syntax error, unexpected PRINTLN, expecting newline or end of buffer
ERROR: code-after-endm-endr-endc.asm(7):
Macro "mac" not defined
ERROR: code-after-endm-endr-endc.asm(12):
- syntax error, unexpected PRINTLN, expecting newline
+ syntax error, unexpected PRINTLN, expecting newline or end of buffer
ERROR: code-after-endm-endr-endc.asm(17):
syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(19):
- syntax error, unexpected PRINTLN, expecting newline
+ syntax error, unexpected PRINTLN, expecting newline or end of buffer
ERROR: code-after-endm-endr-endc.asm(23):
syntax error, unexpected PRINTLN, expecting newline
ERROR: code-after-endm-endr-endc.asm(25):
- syntax error, unexpected PRINTLN, expecting newline
+ syntax error, unexpected PRINTLN, expecting newline or end of buffer
error: Assembly aborted (7 errors)!
--- a/test/asm/nested-macrodef.err
+++ b/test/asm/nested-macrodef.err
@@ -3,5 +3,5 @@
ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
Unterminated macro definition
ERROR: nested-macrodef.asm(27):
- syntax error, unexpected identifier, expecting newline
+ Macro "inner" not defined
error: Assembly aborted (2 errors)!