shithub: hammer

Download patch

ref: 7eeec043baf3b51f0ddcf408410a689832bb89ed
parent: e8603799ceaa3ef49d8a193b315f2cac94bb358f
author: Jacob Moody <moody@posixcafe.org>
date: Fri Nov 17 19:31:21 EST 2023

lex unicode correctly

--- a/n.y
+++ b/n.y
@@ -48,7 +48,7 @@
 	long ival;
 }
 
-%token FUNC DEF IF FOR MOD USE OR AND NOTEQ EQ SHIFTL SHIFTR STRUCT ELSE
+%token FUNC DEF IF FOR MOD USE OR AND NOTEQ EQ SHIFTL SHIFTR STRUCT ELSE ARROWR ARROWL
 %token TYPE NAME NUM
 
 %token	<sval>	NAME TYPE;
@@ -87,13 +87,13 @@
 |	type '!'
 
 return
-:	'-' '>' type
+:	ARROWR type
 |
 
 unary
 :	NUM
 |	NAME
-|	'<' '-' NAME
+|	ARROWL NAME
 |	'(' expr ')'
 
 sufexpr
@@ -153,7 +153,7 @@
 |	IF '(' expr ')' stmt
 
 arg
-:	NAME type
+:	NAME ':' type
 
 args
 :	
@@ -180,6 +180,10 @@
 	">>", SHIFTL,
 	"struct", STRUCT,
 	"else", ELSE,
+	"->", ARROWR,
+	"→", ARROWR,
+	"<-", ARROWL,
+	"←", ARROWL,
 };
 
 Biobuf *bin;
@@ -189,7 +193,7 @@
 {
 	int c;
 
-	c = Bgetc(bin);
+	c = Bgetrune(bin);
 	if(c == Beof){
 		goteof = 1;
 		return -1;
@@ -202,25 +206,26 @@
 void
 ungetc(void)
 {
-	Bungetc(bin);
+	Bungetrune(bin);
 }
 
 void
 wordlex(char *dst, int n)
 {
-	int c;
+	Rune c;
+	char *e;
 
-	while(--n > 0){
+	for(e = dst + n - UTFmax; dst < e;){
 		c = getch();
 		if((c >= Runeself)
 		|| isalnum(c)){
-			*dst++ = c;
+			dst += runetochar(dst, &c);
 			continue;
 		}
 		ungetc();
 		break;
 	}
-	if(n <= 0)
+	if(dst > e - UTFmax*2)
 		yyerror("symbol buffer overrun");
 	*dst = '\0';
 }