shithub: xmltools

Download patch

ref: f896677cdcd52890b3bc215d655c7bea323a4755
author: sirjofri <sirjofri@sirjofri.de>
date: Mon Jul 8 11:41:38 EDT 2024

adds files

--- /dev/null
+++ b/README
@@ -1,0 +1,18 @@
+XML tools
+
+This package requires libxml from 9atom
+
+
+XQ: xml query
+
+cat file.xml | xq '/path'
+xq -f file.xml '/path'
+
+path is an XPath (but not everything is supported).
+
+Supported XPath features:
+
+- @attr: /hello/world/@attr
+- text(): /hello/world/text()
+- [@attr='value']: /hello/world[@attr='second']/stuff
+- [2]: /hello/world[2]/stuff
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,8 @@
+</$objtype/mkfile
+
+BIN=/$objtype/bin
+TARG=xq
+OFILES=\
+	xq.$O\
+
+</sys/src/cmd/mkmany
--- /dev/null
+++ b/test/mkfile
@@ -1,0 +1,6 @@
+</$objtype/mkfile
+
+TEST=\
+	xq\
+
+</sys/src/cmd/mktest
--- /dev/null
+++ b/test/xq.rc
@@ -1,0 +1,75 @@
+#!/bin/rc
+
+rfork en
+ramfs
+cd ..
+
+flagfmt='e:extended, c:console'
+if (! ifs=() eval `{aux/getflags $*}) {
+	aux/usage
+	exit usage
+}
+
+nl='
+'
+
+cat <<EOF >/tmp/test.xml
+<?xml?>
+<hello hattr="hval">
+	<world wattr="wval" wattr2="bla">
+		Free text
+		<stuff sattr="sval"/>
+	</world>
+	<world wattr="wval2">
+		Another free text
+		<stuff sattr="sval2"/>
+	</world>
+</hello>
+EOF
+
+fn testxq{
+	# hack to print test cases more correct
+	p=`{echo $"1 | sed 's/''''/''/g'}
+	# hack to make nested quotes in test cases more intuitive
+	n=`{echo $"1 | sed 's/''/''''/g'}
+	c=`{echo $"cmd ''''^$"n^''' >/tmp/out >[2]/tmp/err'}
+	eval $"c
+	if (~ $#extended 1) {
+		echo $nl^'expect:' $"p $nl^$"2
+		cat /tmp/out /tmp/err
+	}
+	r=`{cat /tmp/out}
+	if (~ $#r 0)
+		r=`{cat /tmp/err}
+	if (~ $"2 $"r) {
+		if (~ $#extended 1)
+			echo '→ success' $"p
+	}
+	if not {
+		echo '→ failed' $"1
+	}
+}
+
+# first test expects data from pipe
+cmd='cat /tmp/test.xml | 6.xq '
+testxq '/hello/world' '<world wattr=''wval'' wattr2=''bla'' />'
+
+# remaining tests read from file directly
+cmd='6.xq -f /tmp/test.xml '
+
+testxq '/hello/world' '<world wattr=''wval'' wattr2=''bla'' />'
+testxq '/hello/world/@wattr' 'wval'
+testxq '/hello/world/text()' 'Free text'
+testxq '/hello/world[@wattr=''wval2'']/text()' 'Another free text'
+testxq '/hello/world[@wattr=''wval2'']/stuff' '<stuff sattr=''sval2'' />'
+testxq '/hello/world[@wattr=''none'']' 'not found'
+testxq '/hello//stuff/@sattr' 'sval'
+testxq '/hello/world[2]' '<world wattr=''wval2'' />'
+testxq '/hello/world[2]/stuff' '<stuff sattr=''sval2'' />'
+
+if (~ $#console 0)
+	exit
+
+echo '
+enter console, ^D to exit'
+6.xq -f /tmp/test.xml
--- /dev/null
+++ b/xq.c
@@ -1,0 +1,238 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <bio.h>
+#include <regexp.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s file\n", argv0);
+	exits("usage");
+}
+
+char Enotfound[] = "not found\n";
+char Einvalidsyntax[] = "invalid syntax\n";
+
+void
+printattr(Elem *e, char *attr)
+{
+	Attr *a;
+	
+	for (a = e->attrs; a; a = a->next) {
+		if (strcmp(a->name, attr) == 0) {
+			print("%s\n", a->value);
+			return;
+		}
+	}
+}
+
+void
+printtext(Elem *e)
+{
+	print("%s\n", e->pcdata);
+}
+
+void
+printelem(Elem *e)
+{
+	Attr *a;
+	
+	print("<%s", e->name);
+	for (a = e->attrs; a; a = a->next) {
+		print(" %s='%s'", a->name, a->value);
+	}
+	print(" />\n");
+}
+
+Reprog *fattr = nil;
+Reprog *fnum = nil;
+
+Elem*
+getfiltered(Elem *e, char *s, char **q)
+{
+	Resub match[3];
+	Elem *el;
+	char *attr, *val;
+	char *new;
+	int id, i;
+	
+	if (!fattr)
+		fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
+	if (!fnum)
+		fnum = regcomp("\\[([0-9]+)\\]");
+	
+//	fprint(2, "e: %s\nq: %s\n", e->name, s);
+	
+	memset(match, 0, 3*sizeof(Resub));
+	if (regexec(fattr, s, match, 3)) {
+		*match[0].sp = 0;
+		new = match[0].ep;
+		
+		attr = match[1].sp;
+		*match[1].ep = 0;
+		
+		val = match[2].sp;
+		*match[2].ep = 0;
+		el = xmllook(e, s, attr, val);
+		if (!el) {
+			fprint(2, Enotfound);
+			return nil;
+		}
+		
+		/* new path has to start with the self element */
+		attr = strrchr(s, '/');
+		if (!attr) {
+			fprint(2, Einvalidsyntax);
+			return nil;
+		}
+		attr++;
+		i = strlen(attr);
+		new -= i;
+		memmove(new, attr, i);
+		return getfiltered(el, new, q);
+	}
+	memset(match, 0, 3*sizeof(Resub));
+	if (regexec(fnum, s, match, 3)) {
+		*match[0].sp = 0;
+		new = match[0].ep;
+		
+		*match[1].ep = 0;
+		id = atoi(match[1].sp);
+		
+		attr = strrchr(s, '/');
+		if (!attr) {
+			fprint(2, Einvalidsyntax);
+			return nil;
+		}
+		
+		*attr = 0;
+		attr++;
+		
+		el = xmllook(e, s, nil, nil);
+		if (!el) {
+			fprint(2, Enotfound);
+			return nil;
+		}
+		
+		i = 0;
+		for (el = el->child; el; el = el->next) {
+			if (strcmp(el->name, attr) == 0) {
+				i++;
+				if (i == id) {
+					/* new path has to start with the self element */
+					i = strlen(attr);
+					new -= i;
+					memmove(new, attr, i);
+					return getfiltered(el, new, q);
+				}
+			}
+		}
+		fprint(2, Enotfound);
+		return nil;
+	}
+	/* simple checks for obvious syntax errors, if nothing matches */
+	if (strpbrk(s, "[]=\n")) {
+		fprint(2, Einvalidsyntax);
+		return nil;
+	}
+	
+	*q = s;
+	return e;
+}
+
+void
+query(char *q, Xml *x)
+{
+	Elem *e;
+	char *at;
+	char *text;
+	
+	e = getfiltered(x->root, q, &q);
+	if (!e) {
+		return;
+	}
+	
+	at = strstr(q, "/@");
+	if (at) {
+		*at = 0;
+		at += 2;
+	}
+	
+	text = strstr(q, "/text()");
+	if (text) {
+		*text = 0;
+	}
+	
+	e = xmllook(e, q, at, nil);
+	if (!e) {
+		fprint(2, Enotfound);
+		return;
+	}
+	
+	if (text) {
+		printtext(e);
+		return;
+	}
+	
+	if (at) {
+		printattr(e, at);
+		return;
+	}
+	
+	printelem(e);
+}
+
+char prompt[] = "X: ";
+
+void
+main(int argc, char **argv)
+{
+	Xml *x;
+	int fd;
+	char *file = nil;
+	char *q;
+	Biobuf *bin;
+	
+	ARGBEGIN{
+	case 'f':
+		file = EARGF(usage());
+		break;
+	default:
+		break;
+	}ARGEND;
+	
+	fd = 0;
+	if (file) {
+		fd = open(file, OREAD);
+		if (fd < 0)
+			sysfatal("error opening file: %r");
+	}
+	
+	x = xmlparse(fd, 8192, Fcrushwhite);
+	if (!x)
+		sysfatal("error parsing file");
+	
+	if (argc) {
+		q = argv[0];
+		query(q, x);
+		exits(nil);
+	}
+	
+	bin = Bfdopen(0, OREAD);
+	if (!bin)
+		sysfatal("error: %r");
+	
+	print(prompt);
+	while (q = Brdstr(bin, '\n', 1)) {
+		if (!q)
+			exits(nil);
+		if (*q == 0) {
+			free(q);
+			continue;
+		}
+		query(q, x);
+		free(q);
+		print(prompt);
+	}
+}