ref: 30e8cd88ec77114dd0bf67d26364dcbb6bc1ebf0
parent: f7ecd7063b03eb0604490bb2e3d335cbf3873813
author: Roberto E. Vargas Caballero <k0ga@shike2.com>
date: Tue Sep 12 04:12:37 EDT 2017
[cc1] Add warning about invalid input character Ctype functions have undefined behaviour out of the range of unsigned char, and they are only meanful in the character set defined by the current locale, that in our case is the "C" locale. The "C" locale warranties that all the characters are representable in the range of positive chars. The value returned by getchar() is always a number in the range of unsigned char or EOF, so the use of ctype functions is safe in readchar() and it will generate the expected behaviour. After that point (and after showing the proper warning) the value is stored in a char array, and using that value from the char array will produce a sign extension if char is signed and the value is negative, and the resultant value will be an invalid value for ctype functions, and the result will depend of the implementation of the libraries used to compile scc. The shame of UB is yours!!!
--- a/cc1/lex.c
+++ b/cc1/lex.c
@@ -198,6 +198,10 @@
case '\n':
newline();
break;
+ default:
+ if (!isprint(c) && !ispunct(c))
+ warn("invalid input character. The shame of UB is yours");
+ break;
}
return c;