ref: df16e64fc63d7975009d90acea75bd2fa2b9eef4
parent: a4ebb878585504294b4e9d67d4f85355815a0e02
	author: Rangi <remy.oukaour+rangi42@gmail.com>
	date: Tue Jan 12 16:30:13 EST 2021
	
Handle MACRO and REPT/FOR bodies differently Fixes #697
--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -63,12 +63,18 @@
void lexer_SetMode(enum LexerMode mode);
void lexer_ToggleStringExpansion(bool enable);
+struct CaptureBody {+ uint32_t lineNo;
+ char *body;
+ size_t size;
+};
+
char const *lexer_GetFileName(void);
uint32_t lexer_GetLineNo(void);
uint32_t lexer_GetColNo(void);
void lexer_DumpStringExpansions(void);
int yylex(void);
-void lexer_CaptureRept(char **capture, size_t *size);
-void lexer_CaptureMacroBody(char **capture, size_t *size);
+void lexer_CaptureRept(struct CaptureBody *capture);
+void lexer_CaptureMacroBody(struct CaptureBody *capture);
#endif /* RGBDS_ASM_LEXER_H */
--- a/src/asm/fstack.c
+++ b/src/asm/fstack.c
@@ -413,9 +413,8 @@
memcpy(dest, macro->name, macroNameLen + 1);
newContext((struct FileStackNode *)fileInfo);
- /* Line minus 1 because buffer begins with a newline */
contextStack->lexerState = lexer_OpenFileView(macro->macro, macro->macroSize,
- macro->fileLine - 1);
+ macro->fileLine);
if (!contextStack->lexerState)
 		fatalerror("Failed to set up lexer for macro invocation\n");lexer_SetStateAtEOL(contextStack->lexerState);
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -995,10 +995,21 @@
lexerState->disableMacroArgs = true;
lexerState->disableInterpolation = true;
 	for (;;) {-		switch (nextChar()) {+ int c = nextChar();
+
+		switch (c) {case EOF:
 			error("Unterminated block comment\n");goto finish;
+ case '\r':
+ /* Handle CRLF before nextLine() since shiftChars updates colNo */
+ if (peek(0) == '\n')
+ shiftChars(1);
+ /* fallthrough */
+ case '\n':
+ if (!lexerState->expansions || lexerState->expansions->distance)
+ nextLine();
+ continue;
case '/':
 			if (peek(0) == '*') {warning(WARNING_NESTED_COMMENT,
@@ -2194,8 +2205,10 @@
}
}
-void lexer_CaptureRept(char **capture, size_t *size)
+void lexer_CaptureRept(struct CaptureBody *capture)
 {+ capture->lineNo = lexer_GetLineNo();
+
char *captureStart = startCapture();
unsigned int level = 0;
int c;
@@ -2228,14 +2241,7 @@
* We know we have read exactly "ENDR", not e.g. an EQUS
*/
 					lexerState->captureSize -= strlen("ENDR");- /* Read (but don't capture) until EOL or EOF */
- lexerState->capturing = false;
-					do {- c = nextChar();
- } while (c != EOF && c != '\r' && c != '\n');
- /* Handle Windows CRLF */
- if (c == '\r' && peek(0) == '\n')
- shiftChars(1);
+ lexerState->lastToken = T_POP_ENDR; // Force EOL at EOF
goto finish;
}
level--;
@@ -2246,7 +2252,6 @@
 		for (;;) { 			if (c == EOF) { 				error("Unterminated REPT/FOR block\n");- lexerState->capturing = false;
goto finish;
 			} else if (c == '\n' || c == '\r') {if (c == '\r' && peek(0) == '\n')
@@ -2258,18 +2263,21 @@
}
finish:
- assert(!lexerState->capturing);
- *capture = captureStart;
- *size = lexerState->captureSize;
+ capture->body = captureStart;
+ capture->size = lexerState->captureSize;
+ lexerState->capturing = false;
lexerState->captureBuf = NULL;
lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false;
+ lexerState->atLineStart = false;
}
-void lexer_CaptureMacroBody(char **capture, size_t *size)
+void lexer_CaptureMacroBody(struct CaptureBody *capture)
 {+ capture->lineNo = lexer_GetLineNo();
+
char *captureStart = startCapture();
- int c = peek(0);
+ int c;
/* If the file is `mmap`ed, we need not to unmap it to keep access to the macro */
if (lexerState->isMmapped)
@@ -2276,58 +2284,51 @@
lexerState->isReferenced = true;
/*
- * Due to parser internals, it does not read the EOL after the T_POP_MACRO before calling
- * this. Thus, we need to keep one in the buffer afterwards.
- * (Note that this also means the captured buffer begins with a newline and maybe comment)
+ * Due to parser internals, it reads the EOL after the expression before calling this.
+ * Thus, we don't need to keep one in the buffer afterwards.
* The following assertion checks that.
*/
- assert(!lexerState->atLineStart);
+ assert(lexerState->atLineStart);
 	for (;;) {+ nextLine();
+ /* We're at line start, so attempt to match an `ENDM` token */
+		do { /* Discard initial whitespace */+ c = nextChar();
+ } while (isWhitespace(c));
+ /* Now, try to match `ENDM` as a **whole** identifier */
+		if (startsIdentifier(c)) {+			switch (readIdentifier(c)) {+ case T_POP_ENDM:
+ /*
+ * The ENDM has been captured, but we don't want it!
+ * We know we have read exactly "ENDM", not e.g. an EQUS
+ */
+				lexerState->captureSize -= strlen("ENDM");+ lexerState->lastToken = T_POP_ENDM; // Force EOL at EOF
+ goto finish;
+ }
+ }
+
/* Just consume characters until EOL or EOF */
 		for (;;) { 			if (c == EOF) { 				error("Unterminated macro definition\n");- lexerState->capturing = false;
goto finish;
-			} else if (c == '\n') {- break;
-			} else if (c == '\r') {- if (peek(0) == '\n')
+			} else if (c == '\n' || c == '\r') {+ if (c == '\r' && peek(0) == '\n')
shiftChars(1);
break;
}
c = nextChar();
}
-
- /* We're at line start, attempt to match a `label: MACRO` line or `ENDM` token */
-		do { /* Discard initial whitespace */- c = nextChar();
- } while (isWhitespace(c));
- /* Now, try to match `ENDM` as a **whole** identifier */
-		if (startsIdentifier(c)) {-			if (readIdentifier(c) == T_POP_ENDM) {- /* Read (but don't capture) until EOL or EOF */
- lexerState->capturing = false;
-				do {- c = peek(0);
- if (c == EOF || c == '\r' || c == '\n')
- break;
- shiftChars(1);
- } while (c != EOF && c != '\r' && c != '\n');
- /* Handle Windows CRLF */
- if (c == '\r' && peek(1) == '\n')
- shiftChars(1);
- goto finish;
- }
- }
- nextLine();
}
finish:
- assert(!lexerState->capturing);
- *capture = captureStart;
-	*size = lexerState->captureSize - strlen("ENDM");+ capture->body = captureStart;
+ capture->size = lexerState->captureSize;
+ lexerState->capturing = false;
lexerState->captureBuf = NULL;
lexerState->disableMacroArgs = false;
lexerState->disableInterpolation = false;
+ lexerState->atLineStart = false;
}
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -36,10 +36,12 @@
#include "linkdefs.h"
#include "platform.h" // strncasecmp, strdup
-uint32_t nListCountEmpty;
-int32_t nPCOffset;
-bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
+int32_t nPCOffset; /* Read by rpn_Symbol */
+static uint32_t nListCountEmpty;
+static bool executeElseBlock; /* If this is set, ELIFs cannot be executed anymore */
+static struct CaptureBody captureBody; /* Captures a REPT/FOR or MACRO */
+
static void upperstring(char *dest, char const *src)
 {while (*src)
@@ -596,17 +598,21 @@
| label cpu_command T_NEWLINE
| label macro T_NEWLINE
| label simple_pseudoop T_NEWLINE
- | pseudoop T_NEWLINE
- | conditional /* May not necessarily be followed by a newline, see below */
+ | assignment_pseudoop T_NEWLINE
+ | entire_line /* Commands that manage newlines themselves */
;
/*
- * For "logistical" reasons, conditionals must manage newlines themselves.
+ * For "logistical" reasons, these commands must manage newlines themselves.
* This is because we need to switch the lexer's mode *after* the newline has been read,
* and to avoid causing some grammar conflicts (token reducing is finicky).
* This is DEFINITELY one of the more FRAGILE parts of the codebase, handle with care.
*/
-conditional : if
+entire_line : macrodef
+ | rept
+ | for
+ | break
+ | if
/* It's important that all of these require being at line start for `skipIfBlock` */
| elif
| else
@@ -699,13 +705,13 @@
}
;
-pseudoop : equ
+/* These commands start with a T_LABEL. */
+assignment_pseudoop : equ
| set
| rb
| rw
| rl
| equs
- | macrodef
;
simple_pseudoop : include
@@ -733,10 +739,7 @@
| pushc
| popc
| load
- | rept
- | for
| shift
- | break
| fail
| warn
| assert
@@ -851,21 +854,18 @@
 		| T_POP_ENDL	{ out_EndLoadSection(); };
-rept		: T_POP_REPT uconst {- uint32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
- size_t size;
- lexer_CaptureRept(&body, &size);
- fstk_RunRept($2, nDefinitionLineNo, body, size);
+rept		: T_POP_REPT uconst T_NEWLINE {+ lexer_CaptureRept(&captureBody);
+		} T_NEWLINE {+ fstk_RunRept($2, captureBody.lineNo, captureBody.body, captureBody.size);
}
;
-for		: T_POP_FOR T_ID T_COMMA for_args {- uint32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
- size_t size;
- lexer_CaptureRept(&body, &size);
- fstk_RunFor($2, $4.start, $4.stop, $4.step, nDefinitionLineNo, body, size);
+for		: T_POP_FOR T_ID T_COMMA for_args T_NEWLINE {+ lexer_CaptureRept(&captureBody);
+		} T_NEWLINE {+ fstk_RunFor($2, $4.start, $4.stop, $4.step, captureBody.lineNo,
+ captureBody.body, captureBody.size);
}
 for_args	: const {@@ -885,18 +885,16 @@
}
;
-break		: T_POP_BREAK		{+break		: T_POP_BREAK T_NEWLINE {if (fstk_Break())
lexer_SetMode(LEXER_SKIP_TO_ENDR);
}
;
-macrodef	: T_LABEL T_COLON T_POP_MACRO {- int32_t nDefinitionLineNo = lexer_GetLineNo();
- char *body;
- size_t size;
- lexer_CaptureMacroBody(&body, &size);
- sym_AddMacro($1, nDefinitionLineNo, body, size);
+macrodef	: T_LABEL T_COLON T_POP_MACRO T_NEWLINE {+ lexer_CaptureMacroBody(&captureBody);
+		} T_NEWLINE {+ sym_AddMacro($1, captureBody.lineNo, captureBody.body, captureBody.size);
}
;
--- a/test/asm/break.err
+++ b/test/asm/break.err
@@ -2,5 +2,5 @@
done 5
warning: break.asm(17): [-Wuser]
OK
-FATAL: break.asm(18) -> break.asm::REPT~1(23):
+FATAL: break.asm(18) -> break.asm::REPT~1(22):
Ended block with 1 unterminated IF construct
--- a/test/asm/invalid-utf-8.asm
+++ b/test/asm/invalid-utf-8.asm
@@ -1,5 +1,6 @@
; This test tries to pass invalid UTF-8 through a macro argument
; to exercise the lexer's reportGarbageChar
-m:MACRO \1
+m:MACRO
+ \1
ENDM
m ��
--- a/test/asm/invalid-utf-8.err
+++ b/test/asm/invalid-utf-8.err
@@ -1,5 +1,5 @@
-ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
Unknown character 0xCF
-ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+ERROR: invalid-utf-8.asm(6) -> invalid-utf-8.asm::m(4):
Unknown character 0xD3
error: Assembly aborted (2 errors)!
--- a/test/asm/nested-macrodef.err
+++ b/test/asm/nested-macrodef.err
@@ -1,5 +1,7 @@
warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
Nested macros shouldn't work, whose argument would be \1?
-ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(25):
+ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
Unterminated macro definition
-error: Assembly aborted (1 errors)!
+ERROR: nested-macrodef.asm(27):
+ syntax error, unexpected identifier, expecting newline
+error: Assembly aborted (2 errors)!
--- /dev/null
+++ b/test/asm/nested-macrodef.simple.err
@@ -1,0 +1,7 @@
+warning: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(22): [-Wuser]
+ Nested macros shouldn't work, whose argument would be \1?
+ERROR: nested-macrodef.asm(26) -> nested-macrodef.asm::outer(24):
+ Unterminated macro definition
+ERROR: nested-macrodef.asm(27):
+ syntax error
+error: Assembly aborted (2 errors)!
--
⑨