shithub: scc

Download patch

ref: afec1bc0e0ba8999c706adf6438cd74d18715a88
parent: f826b55f340d6d1806060e40c898ec746a96da02
author: Roberto E. Vargas Caballero <k0ga@shike2.com>
date: Tue Apr 5 10:04:12 EDT 2022

cc1: Use control characters for #, ## and macro parameters

We were using the ascii characters $,@ and # for them and it had the drawback
that the preprocessor could not work with text containing these characters
in unexpected places. Using control characters remove that problem and
it enables the use of these new tokens in the lexer itself.

--- a/src/cmd/cc/cc1/cc1.h
+++ b/src/cmd/cc/cc1/cc1.h
@@ -129,8 +129,11 @@
 	RESTRICT   = 1 << 1,
 	VOLATILE   = 1 << 2,
 	INLINE     = 1 << 3,
-	TQUALIFIER = 1 << 7,      /* this value is picked outside of ASCII range */
-	TYPE,
+	TQUALIFIER = 1 << 7,
+	MACROPAR   = 17,
+	CONCAT     = 18,
+	STRINGIZE  = 19,
+	TYPE       = 129,
 	IDEN,
 	SCLASS,
 	CONSTANT,
--- a/src/cmd/cc/cc1/cpp.c
+++ b/src/cmd/cc/cc1/cpp.c
@@ -242,7 +242,7 @@
 			bufsiz -= size;
 			bp += size;
 			break;
-		case '$':
+		case CONCAT:
 			/* token concatenation operator */
 			while (bp[-1] == ' ')
 				--bp, ++bufsiz;
@@ -249,7 +249,7 @@
 			while (s[1] == ' ')
 				++s;
 			break;
-		case '#':
+		case STRINGIZE:
 			/* stringfier operator */
 			arg = mp->arglist[atoi(s += 2)];
 			s += 2;
@@ -275,7 +275,7 @@
 			*bp++ = '"';
 
 			break;
-		case '@':
+		case MACROPAR:
 			/* parameter substitution */
 			arg = mp->arglist[atoi(++s)];
 			size = expandarg(arg, bp, bufsiz);
@@ -393,10 +393,11 @@
 getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz)
 {
 	Symbol **argp;
+	int siz;
 	size_t len;
 	int prevc = 0, ispar;
 
-	if (yytoken == '$') {
+	if (yytoken == CONCAT) {
 		cpperror("'##' cannot appear at either ends of a macro expansion");
 		return 0;
 	}
@@ -409,11 +410,13 @@
 					break;
 			}
 			if (argp != &args[nargs]) {
-				sprintf(yytext, "@%02d@", (int) (argp - args));
+				siz = argp - args;
+				sprintf(yytext,
+				        "%c%02d%c", MACROPAR, siz, MACROPAR);
 				ispar = 1;
 			}
 		}
-		if (prevc == '#' && !ispar) {
+		if (prevc == STRINGIZE && !ispar) {
 			cpperror("'#' is not followed by a macro parameter");
 			return 0;
 		}
@@ -424,9 +427,8 @@
 			cpperror("macro too long");
 			return 0;
 		}
-		/* $ token is generated by ## */
-		if (yytoken == '$') {
-			*bp++ = '$';
+		if (yytoken == CONCAT || yytoken == STRINGIZE) {
+			*bp++ = yytoken;
 			 --bufsiz;
 		} else {
 			memcpy(bp, yytext, len);
@@ -433,7 +435,7 @@
 			bp += len;
 			bufsiz -= len;
 		}
-		if ((prevc = yytoken) != '#') {
+		if ((prevc = yytoken) != STRINGIZE) {
 			*bp++ = ' ';
 			--bufsiz;
 		}
--- a/src/cmd/cc/cc1/lex.c
+++ b/src/cmd/cc/cc1/lex.c
@@ -755,7 +755,7 @@
 		t = follow('=', NE, '!');
 		break;
 	case '#':
-		t = follow('#', '$', '#');
+		t = follow('#', CONCAT, STRINGIZE);
 		break;
 	case '-':
 		t = minus();