shithub: brokentoys

Download patch

ref: 5961a8b6d2f95e29c0b7b0b450eb988763b12c40
parent: 76ada2d705f21dd5ed753fdafe00849afa7f6edd
author: rodri <rgl@antares-labs.eu>
date: Mon Mar 17 12:59:35 EDT 2025

tokenizer.c: bio(2) line tokenizer

--- /dev/null
+++ b/tokenizer.c
@@ -1,0 +1,103 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+enum {
+	TSTR,
+	TNUM,
+	TNL,
+};
+
+typedef struct Token Token;
+typedef struct Tokenizer Tokenizer;
+
+struct Token
+{
+	int t;
+	char *s;
+	double v;
+};
+
+struct Tokenizer
+{
+	Biobuf *in;
+	Token tok;
+	char *line;
+	char *f[10];
+	int nf;
+	int cur;
+};
+
+static char *
+getline(Biobuf *b)
+{
+	char *l;
+
+	if((l = Brdstr(b, '\n', 1)) == nil)
+		return nil;
+	return l;
+}
+
+#define isnum(c) ((c)>='0'&&(c)<='9')
+
+static int
+lex(Tokenizer *t)
+{
+	int new;
+
+	new = t->line == nil;
+
+	if(t->cur < t->nf){
+		if(isnum(t->f[t->cur][0])){
+			t->tok.t = TNUM;
+			t->tok.v = strtod(t->f[t->cur++], nil);
+		}else{
+			t->tok.t = TSTR;
+			t->tok.s = t->f[t->cur++];
+		}
+	}else{
+		free(t->line);
+		if(!new){
+			t->line = nil;
+			t->tok.t = TNL;
+			return TNL;
+		}
+		if((t->line = getline(t->in)) == nil){
+			werrstr("could not read a line");
+			return -1;
+		}
+		t->nf = tokenize(t->line, t->f, nelem(t->f));
+		t->cur = 0;
+		t->tok.t = lex(t);
+	}
+	return t->tok.t;
+}
+
+void
+main(int, char**)
+{
+	Biobuf *bin;
+	Tokenizer t;
+
+	memset(&t, 0, sizeof t);
+
+	if((bin = Bfdopen(0, OREAD)) == nil)
+		sysfatal("Bfdopen: %r");
+	t.in = bin;
+	while(lex(&t) >= 0){
+		switch(t.tok.t){
+		case TSTR:
+			print("str: %s\n", t.tok.s);
+			break;
+		case TNUM:
+			print("num: %g\n", t.tok.v);
+			break;
+		case TNL:
+			print("nl\n");
+			break;
+		default: sysfatal("isotope escaped");
+		}
+	}
+	free(t.line);
+	exits(nil);
+}