shithub: gemnine

Download patch

ref: 54f4cf61dada165e94b3cd61f72a7003092e9867
parent: 78425573e39c42b2624834ad832f804996aa32d4
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Mon Aug 24 05:53:59 EDT 2020

use webfs url parsing

--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+[a0125678vqki].out
+*.[o0125678vqki]
--- /dev/null
+++ b/gemnine.h
@@ -1,0 +1,28 @@
+typedef struct Url Url;
+
+struct Url
+{
+	char *full;
+	char *scheme;
+	char *user;
+	char *pass;
+	char *host;
+	char *port;
+	char *path;
+	char *query;
+	char *fragment;
+};
+
+#pragma varargck type "U" Url*
+
+int	Efmt(Fmt*);
+int	Nfmt(Fmt*);
+int	Mfmt(Fmt*);
+int Ufmt(Fmt *f);
+Url *urlparse(Url *from, char *s);
+int matchurl(Url *u, Url *s);
+void freeurl(Url *u);
+char *Upath(Url *u);
+
+void *emalloc(int n);
+char *estrdup(char *s);
--- a/main.c
+++ b/main.c
@@ -4,16 +4,10 @@
 #include <bio.h>
 #include <ctype.h>
 #include <plumb.h>
+#include "gemnine.h"
 
-typedef struct Url Url;
 typedef struct Response Response;
 
-struct Url {
-	char *url;
-	char *server;
-	char *port;
-};
-
 struct Response {
 	Url *url;
 	char *mime;
@@ -24,102 +18,7 @@
 
 #pragma varargck type "E" char*
 
-char *
-urlto(Url *url, char *u)
-{
-	char *e, *trail;
-	int len;
-
-	if((len = strlen(u)) < 1)
-		return "";
-	trail = (len > 1 && u[len-1] == '/') ? "/" : "";
-
-	if(*u == '/'){
-		if(u[1] == '/') /* no protocol */
-			return smprint("gemini://%s%s", cleanname(u+2), trail);
-
-		/* absolute url, no scheme */
-		return strcmp(url->port, "1965") == 0 ?
-			smprint("gemini://%s%s%s", url->server, cleanname(u), trail) :
-			smprint("gemini://%s:%s%s%s", url->server, url->port, cleanname(u), trail);
-	}
-
-	/* with scheme */
-	if((e = strpbrk(u, ":/")) != nil && e[0] == ':' && e[1] == '/' && e[2] == '/'){
-		e[2] = 0;
-		e = cleanname(e+3);
-		return smprint("%s/%s%s", u, e, trail);
-	}
-
-	/* chars not allowed */
-	if(strpbrk(u, ":") != nil)
-		return strdup(u);
-
-	/* relative, no scheme */
-	len = strlen(url->url);
-	if(url->url[len-1] == '/'){ /* easy */
-		u = smprint("%s%s%s", url->url, u, trail);
-	}else{
-		/* replace the last element */
-		if((e = strrchr(url->url, '/')) != nil && e[-1] != '/')
-			len = e - url->url;
-		u = smprint("%.*s/%s%s", len, url->url, u, trail);
-	}
-	if((e = strchr(strchr(u, ':') + 3, '/')) != nil)
-		cleanname(e);
-	return u;
-}
-
-Url *
-parseurl(char *url)
-{
-	char *server, *port, *s, *e;
-	Url *u;
-
-	url = strdup(url);
-	if((s = strpbrk(url, ":/")) != nil && s[0] == ':' && s[1] == '/' && s[2] == '/'){
-		server = s + 3;
-	}else{
-		s = smprint("gemini://%s", url);
-		free(url);
-		url = s;
-		server = s + 9;
-	}
-
-	port = strdup("1965");
-	if((e = strpbrk(server, ":/")) != nil){
-		s = mallocz(e-server+1, 1);
-		memmove(s, server, e-server);
-		server = s;
-		if(*e == ':'){
-			port = strdup(e+1);
-			if((e = strchr(port, '/')) != nil)
-				*e = 0;
-		}
-	}else{
-		server = strdup(server);
-	}
-
-	u = calloc(1, sizeof(*u));
-	u->url = url;
-	u->server = server;
-	u->port = port;
-
-	return u;
-}
-
 void
-freeurl(Url *u)
-{
-	if(u != nil){
-		free(u->url);
-		free(u->server);
-		free(u->port);
-		free(u);
-	}
-}
-
-void
 freeresponse(Response *r)
 {
 	if(r != nil){
@@ -132,26 +31,28 @@
 }
 
 Response *
-request(char *url)
+request(Url *url)
 {
 	Thumbprint *th;
 	Response *r;
-	char *s, buf[1024];
+	char *s, buf[1024], *port;
 	TLSconn conn;
 	int i, ok, len, oldfd;
+	Url *u;
 
 	r = calloc(1, sizeof(*r));
 	r->fd = -1;
-	if((r->url = parseurl(url)) == nil)
-		goto err;
+	r->url = url;
 
-	if((r->fd = dial(netmkaddr(r->url->server, "tcp", r->url->port), nil, nil, nil)) < 0){
+	if((port = url->port) == nil)
+		port = "1965";
+	if((r->fd = dial(netmkaddr(url->host, "tcp", port), nil, nil, nil)) < 0){
 		werrstr("dial: %r");
 		goto err;
 	}
 	th = initThumbprints("/sys/lib/ssl/gemini", nil, "x509");
 	memset(&conn, 0, sizeof(conn));
-	conn.serverName = r->url->server;
+	conn.serverName = r->url->host;
 	oldfd = r->fd;
 	r->fd = tlsClient(oldfd, &conn);
 	close(oldfd);
@@ -171,7 +72,7 @@
 		}
 	}
 
-	fprint(r->fd, "%s\r\n", r->url->url);
+	fprint(r->fd, "%s\r\n", r->url->full);
 	for(len = 0; len < sizeof(buf)-1; len++){
 		if((i = read(r->fd, buf+len, 1)) < 0){
 			werrstr("read: %r");
@@ -195,14 +96,17 @@
 		s++;
 
 	if(r->status >= 10 && r->status < 20){ /* input */
-		r->prompt = strdup(s);
+		r->prompt = estrdup(s);
 	}else if(r->status >= 20 && r->status < 30){ /* success */
-		r->mime = strdup(s[0] ? s : "text/gemini");
+		r->mime = estrdup(s[0] ? s : "text/gemini");
 	}else if(r->status >= 30 && r->status < 40){ /* redirect */
-		s = urlto(r->url, s);
+		if((u = urlparse(r->url, s)) == nil){
+			werrstr("invalid redirect url");
+			goto err;
+		}
 		freeresponse(r);
-		r = request(s);
-		free(s);
+		if((r = request(u)) == nil)
+			freeurl(u);
 	}else if(r->status >= 40 && r->status < 50){
 		werrstr("temporary failure: %s", s);
 		goto err;
@@ -218,30 +122,11 @@
 
 err:
 	if(r != nil && r->url != nil)
-		werrstr("%q: %r", r->url->url);
+		werrstr("%U: %r", r->url);
 	freeresponse(r);
 	return nil;
 }
 
-int
-Efmt(Fmt *f)
-{
-	char *s;
-
-	s = va_arg(f->args, char*);
-	for(; *s; s++){
-		if(*s == '%' && isxdigit(s[1]) && isxdigit(s[2])){
-			fmtprint(f, "%%%c%c", toupper(s[1]), toupper(s[2]));
-			s += 2;
-		}else if(isalnum(*s) || strchr(".-_~!$&'()*,;=/:@ \n", *s) == nil){
-			fmtprint(f, "%c", *s);
-		}else{
-			fmtprint(f, "%%%.2X", *s & 0xff);
-		}
-	}
-	return 0;
-}
-
 char *
 readall(int fd)
 {
@@ -301,7 +186,8 @@
 main(int argc, char **argv)
 {
 	Response *r;
-	char *s, *t, *u, *url;
+	char *s, *t, *u;
+	Url *url, *x;
 	int len, wait, pl, fd;
 	Plumbmsg *m;
 	Biobuf out, body;
@@ -318,8 +204,14 @@
 		exits("usage");
 	}
 
-	fmtinstall('E', Efmt);
 	quotefmtinstall();
+	fmtinstall('U', Ufmt);
+	fmtinstall('N', Nfmt);
+	fmtinstall(']', Mfmt);
+	fmtinstall('E', Efmt);
+	fmtinstall('[', encodefmt);
+	fmtinstall('H', encodefmt);
+
 	Binit(&out, 1, OWRITE);
 	pl = -1;
 
@@ -328,7 +220,7 @@
 	if(wait){
 		if(pl >= 0 || (pl = plumbopen("gemini", OREAD)) >= 0){
 			if((m = plumbrecv(pl)) != nil){
-				url = strdup(m->data);
+				url = urlparse(nil, estrdup(m->data));
 				plumbfree(m);
 			}else{
 				exits(nil);
@@ -337,7 +229,7 @@
 			sysfatal("plumbopen: %r");
 		}
 	}else{
-		url = strdup(argv[0]);
+		url = urlparse(nil, estrdup(argv[0]));
 	}
 
 nextreq:
@@ -357,8 +249,10 @@
 				print("%s\n", r->prompt);
 				s = readall(0);
 				free(url);
-				url = smprint("%s?%E", r->url->url, s);
+				t = smprint("%s?%E", r->url->full, s);
 				free(s);
+				url = urlparse(nil, t);
+				free(t);
 				freeresponse(r);
 				close(fd);
 				goto nextreq;
@@ -382,9 +276,9 @@
 						*t++ = 0;
 					else
 						t = "";
-					u = urlto(r->url, u);
-					Bprint(&out, "→ %s %s\n", u, t);
-					free(u);
+					x = urlparse(r->url, u);
+					Bprint(&out, "→ %U %s\n", x, t);
+					freeurl(x);
 				}else{
 					Bprint(&out, "%s\n", s);
 				}
--- a/mkfile
+++ b/mkfile
@@ -4,8 +4,13 @@
 
 BIN=/$objtype/bin
 
+HFILES=\
+	gemnine.h\
+
 OFILES=\
 	main.$O\
+	url.$O\
+	util.$O\
 
 UPDATE=\
 	$HFILES\
--- /dev/null
+++ b/url.c
@@ -1,0 +1,426 @@
+/* this is a copy from webfs */
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include "gemnine.h"
+
+enum {
+	Domlen = 256,
+};
+
+typedef struct {
+	char *s1;
+	char *s2;
+}Str2;
+
+#pragma varargck type "E" Str2
+#pragma varargck type "N" char*
+#pragma varargck type "]" char*
+
+static char reserved[] = "%:/?#[]@!$&'()*+,;=";
+
+static int
+dhex(char c)
+{
+	if('0' <= c && c <= '9')
+		return c-'0';
+	if('a' <= c && c <= 'f')
+		return c-'a'+10;
+	if('A' <= c && c <= 'F')
+		return c-'A'+10;
+	return 0;
+}
+
+static char*
+unescape(char *s, char *spec)
+{
+	char *r, *w;
+	uchar x;
+
+	if(s == nil)
+		return s;
+	for(r=w=s; x = *r; r++){
+		if(x == '%' && isxdigit(r[1]) && isxdigit(r[2])){
+			x = (dhex(r[1])<<4)|dhex(r[2]);
+			if(spec && strchr(spec, x)){
+				*w++ = '%';
+				*w++ = toupper(r[1]);
+				*w++ = toupper(r[2]);
+			}
+			else
+				*w++ = x;
+			r += 2;
+			continue;
+		}
+		*w++ = x;
+	}
+	*w = 0;
+	return s;
+}
+
+int
+Efmt(Fmt *f)
+{
+	char *s, *spec;
+	Str2 s2;
+
+	s2 = va_arg(f->args, Str2);
+	s = s2.s1;
+	spec = s2.s2;
+	for(; *s; s++)
+		if(*s == '%' && isxdigit(s[1]) && isxdigit(s[2])){
+			fmtprint(f, "%%%c%c", toupper(s[1]), toupper(s[2]));
+			s += 2;
+		}
+		else if(isalnum(*s) || strchr(".-_~!$&'()*,;=", *s) || strchr(spec, *s))
+			fmtprint(f, "%c", *s);
+		else
+			fmtprint(f, "%%%.2X", *s & 0xff);
+	return 0;
+}
+
+int
+Nfmt(Fmt *f)
+{
+	char d[Domlen], *s;
+
+	s = va_arg(f->args, char*);
+	if(utf2idn(s, d, sizeof(d)) >= 0)
+		s = d;
+	fmtprint(f, "%s", s);
+	return 0;
+}
+
+int
+Mfmt(Fmt *f)
+{
+	char *s = va_arg(f->args, char*);
+	fmtprint(f, (*s != '[' && strchr(s, ':') != nil)? "[%s]" : "%s", s);
+	return 0;
+}
+
+int
+Ufmt(Fmt *f)
+{
+	char *s;
+	Url *u;
+
+	if((u = va_arg(f->args, Url*)) == nil)
+		return fmtprint(f, "nil");
+	if(u->scheme)
+		fmtprint(f, "%s:", u->scheme);
+	if(u->user || u->host)
+		fmtprint(f, "//");
+	if(u->user){
+		fmtprint(f, "%E", (Str2){u->user, ""});
+		if(u->pass)
+			fmtprint(f, ":%E", (Str2){u->pass, ""});
+		fmtprint(f, "@");
+	}
+	if(u->host){
+		fmtprint(f, "%]", u->host);
+		if(u->port)
+			fmtprint(f, ":%s", u->port);
+	}
+	if(s = Upath(u))
+		fmtprint(f, "%E", (Str2){s, "/:@+"});
+	if(u->query)
+		fmtprint(f, "?%E", (Str2){u->query, "/:@"});
+	if(u->fragment)
+		fmtprint(f, "#%E", (Str2){u->fragment, "/:@?+"});
+	return 0;
+}
+
+char*
+Upath(Url *u)
+{
+	if(u){
+		if(u->path)
+			return u->path;
+		if(u->user || u->host)
+			return "/";
+	}
+	return nil;
+}
+
+static char*
+remdot(char *s)
+{
+	char *b, *d, *p;
+	int dir, n;
+
+	dir = 1;
+	b = d = s;
+	if(*s == '/')
+		s++;
+	for(; s; s = p){
+		if(p = strchr(s, '/'))
+			*p++ = 0;
+		if(*s == '.' && ((s[1] == 0) || (s[1] == '.' && s[2] == 0))){
+			if(s[1] == '.')
+				while(d > b)
+					if(*--d == '/')
+						break;
+			dir = 1;
+			continue;
+		} else
+			dir = (p != nil);
+		if((n = strlen(s)) > 0)
+			memmove(d+1, s, n);
+		*d++ = '/';
+		d += n;
+	}
+	if(dir)
+		*d++ = '/';
+	*d = 0;
+	return b;
+}
+
+static char*
+abspath(char *s, char *b)
+{
+	char *x, *a;
+
+	if(b && *b){
+		if(s == nil || *s == 0)
+			return estrdup(b);
+		if(*s != '/' && (x = strrchr(b, '/'))){
+			a = emalloc((x - b) + strlen(s) + 4);
+			sprint(a, "%.*s/%s", utfnlen(b, x - b), b, s);
+			return remdot(a);
+		}
+	}
+	if(s && *s){
+		if(*s != '/')
+			return estrdup(s);
+		a = emalloc(strlen(s) + 4);
+		sprint(a, "%s", s);
+		return remdot(a);
+	}
+	return nil;
+}
+
+static void
+pstrdup(char **p)
+{
+	if(p == nil || *p == nil)
+		return;
+	if(**p == 0){
+		*p = nil;
+		return;
+	}
+	*p = estrdup(*p);
+}
+
+static char*
+mklowcase(char *s)
+{
+	char *cp;
+	Rune r;
+
+	if(s == nil)
+		return s;
+	cp = s;
+	while(*cp != 0){
+		chartorune(&r, cp);
+		r = tolowerrune(r);
+		cp += runetochar(cp, &r);
+	}
+	return s;
+}
+
+static Url *
+saneurl(Url *u)
+{
+	if(u == nil || u->scheme == nil || u->host == nil || Upath(u) == nil){
+		freeurl(u);
+		return nil;
+	}
+	if(u->port){
+		/* remove default ports */
+		switch(atoi(u->port)){
+		case 21:	if(!strcmp(u->scheme, "ftp"))	 goto Defport; break;
+		case 70:	if(!strcmp(u->scheme, "gopher")) goto Defport; break;
+		case 80:	if(!strcmp(u->scheme, "http"))	 goto Defport; break;
+		case 443:	if(!strcmp(u->scheme, "https"))	 goto Defport; break;
+		case 1965:  if(!strcmp(u->scheme, "gemini")) goto Defport; break;
+		default:	if(!strcmp(u->scheme, u->port))	 goto Defport; break;
+		Defport:
+			free(u->port);
+			u->port = nil;
+		}
+	}
+	return u;
+}
+
+Url*
+urlparse(Url *b, char *s)
+{
+	char *t, *p, *x, *y;
+	Url *u;
+
+	if(s == nil)
+		s = "";
+	t = nil;
+	s = p = estrdup(s);
+	u = emalloc(sizeof(*u));
+	for(; *p; p++){
+		if(*p == ':'){
+			if(p == s)
+				break;
+			*p++ = 0;
+			u->scheme = s;
+			b = nil;
+			goto Abs;
+		}
+		if(!isalpha(*p))
+			if((p == s) || ((!isdigit(*p) && strchr("+-.", *p) == nil)))
+				break;
+	}
+	p = s;
+	if(b){
+		switch(*p){
+		case 0:
+			memmove(u, b, sizeof(*u));
+			goto Out;
+		case '#':
+			memmove(u, b, sizeof(*u));
+			u->fragment = p+1;
+			goto Out;
+		case '?':
+			memmove(u, b, sizeof(*u));
+			u->fragment = u->query = nil;
+			break;
+		case '/':
+			if(p[1] == '/'){
+				u->scheme = b->scheme;
+				b = nil;
+				break;
+			}
+		default:
+			memmove(u, b, sizeof(*u));
+			u->fragment = u->query = u->path = nil;
+			break;
+		}
+	}
+Abs:
+	if(x = strchr(p, '#')){
+		*x = 0;
+		u->fragment = x+1;
+	}
+	if(x = strchr(p, '?')){
+		*x = 0;
+		u->query = x+1;
+	}
+	if(p[0] == '/' && p[1] == '/'){
+		p += 2;
+		if(x = strchr(p, '/')){
+			u->path = t = abspath(x, Upath(b));
+			*x = 0;
+		}
+		if(x = strchr(p, '@')){
+			*x = 0;
+			if(y = strchr(p, ':')){
+				*y = 0;
+				u->pass = y+1;
+			}
+			u->user = p;
+			p = x+1;
+		}
+		if((x = strrchr(p, ']')) == nil)
+			x = p;
+		if(x = strrchr(x, ':')){
+			*x = 0;
+			u->port = x+1;
+		}
+		if(x = strchr(p, '[')){
+			p = x+1;
+			if(y = strchr(p, ']'))
+				*y = 0;
+		}
+		u->host = p;
+	} else {
+		u->path = t = abspath(p, Upath(b));
+	}
+Out:
+	pstrdup(&u->scheme);
+	pstrdup(&u->user);
+	pstrdup(&u->pass);
+	pstrdup(&u->host);
+	pstrdup(&u->port);
+	pstrdup(&u->path);
+	pstrdup(&u->query);
+	pstrdup(&u->fragment);
+	free(s);
+	free(t);
+
+	/* the + character encodes space only in query part */
+	if(s = u->query)
+		while(s = strchr(s, '+'))
+			*s++ = ' ';
+
+	if(s = u->host){
+		t = emalloc(Domlen);
+		if(idn2utf(s, t, Domlen) >= 0){
+			u->host = estrdup(t);
+			free(s);
+		}
+		free(t);
+	}
+
+	unescape(u->user, nil);
+	unescape(u->pass, nil);
+	unescape(u->path, reserved);
+	unescape(u->query, reserved);
+	unescape(u->fragment, reserved);
+	mklowcase(u->scheme);
+	mklowcase(u->host);
+	mklowcase(u->port);
+
+	if((u = saneurl(u)) != nil)
+		u->full = smprint("%U", u);
+
+	return u;
+}
+
+int
+matchurl(Url *u, Url *s)
+{
+	if(u){
+		char *a, *b;
+
+		if(s == nil)
+			return 0;
+		if(u->scheme && (s->scheme == nil || strcmp(u->scheme, s->scheme)))
+			return 0;
+		if(u->user && (s->user == nil || strcmp(u->user, s->user)))
+			return 0;
+		if(u->host && (s->host == nil || strcmp(u->host, s->host)))
+			return 0;
+		if(u->port && (s->port == nil || strcmp(u->port, s->port)))
+			return 0;
+		if(a = Upath(u)){
+			b = Upath(s);
+			if(b == nil || strncmp(a, b, strlen(a)))
+				return 0;
+		}
+	}
+	return 1;
+}
+
+void
+freeurl(Url *u)
+{
+	if(u == nil)
+		return;
+	free(u->full);
+	free(u->scheme);
+	free(u->user);
+	free(u->pass);
+	free(u->host);
+	free(u->port);
+	free(u->path);
+	free(u->query);
+	free(u->fragment);
+	free(u);
+}
--- /dev/null
+++ b/util.c
@@ -1,0 +1,28 @@
+#include <u.h>
+#include <libc.h>
+#include "gemnine.h"
+
+void *
+emalloc(int n)
+{
+	void *v;
+	if((v = malloc(n)) == nil) {
+		fprint(2, "out of memory allocating %d\n", n);
+		sysfatal("mem");
+	}
+	setmalloctag(v, getcallerpc(&n));
+	memset(v, 0, n);
+	return v;
+}
+
+char *
+estrdup(char *s)
+{
+	char *t;
+	if((t = strdup(s)) == nil) {
+		fprint(2, "out of memory in strdup(%.10s)\n", s);
+		sysfatal("mem");
+	}
+	setmalloctag(t, getcallerpc(&t));
+	return t;
+}