ref: 7eeec043baf3b51f0ddcf408410a689832bb89ed
parent: e8603799ceaa3ef49d8a193b315f2cac94bb358f
author: Jacob Moody <moody@posixcafe.org>
date: Fri Nov 17 19:31:21 EST 2023
lex unicode correctly
--- a/n.y
+++ b/n.y
@@ -48,7 +48,7 @@
long ival;
}
-%token FUNC DEF IF FOR MOD USE OR AND NOTEQ EQ SHIFTL SHIFTR STRUCT ELSE
+%token FUNC DEF IF FOR MOD USE OR AND NOTEQ EQ SHIFTL SHIFTR STRUCT ELSE ARROWR ARROWL
%token TYPE NAME NUM
%token <sval> NAME TYPE;
@@ -87,13 +87,13 @@
| type '!'
return
-: '-' '>' type
+: ARROWR type
|
unary
: NUM
| NAME
-| '<' '-' NAME
+| ARROWL NAME
| '(' expr ')'
sufexpr
@@ -153,7 +153,7 @@
| IF '(' expr ')' stmt
arg
-: NAME type
+: NAME ':' type
args
:
@@ -180,6 +180,10 @@
">>", SHIFTL,
"struct", STRUCT,
"else", ELSE,
+ "->", ARROWR,
+ "→", ARROWR,
+ "<-", ARROWL,
+ "←", ARROWL,
};
Biobuf *bin;
@@ -189,7 +193,7 @@
{
int c;
- c = Bgetc(bin);
+ c = Bgetrune(bin);
if(c == Beof){
goteof = 1;
return -1;
@@ -202,25 +206,26 @@
void
ungetc(void)
{
- Bungetc(bin);
+ Bungetrune(bin);
}
void
wordlex(char *dst, int n)
{
- int c;
+ Rune c;
+ char *e;
- while(--n > 0){
+ for(e = dst + n - UTFmax; dst < e;){
c = getch();
if((c >= Runeself)
|| isalnum(c)){
- *dst++ = c;
+ dst += runetochar(dst, &c);
continue;
}
ungetc();
break;
}
- if(n <= 0)
+ if(dst > e - UTFmax*2)
yyerror("symbol buffer overrun");
*dst = '\0';
}