shithub: xml-9atom

Download patch

ref: e864dc493153ca5083018c94a3737165613ef0d5
parent: 17b3ae4f1a1ce71b26bd2d383c9be513af29bb66
author: sirjofri <sirjofri@sirjofri.de>
date: Thu Jul 11 11:29:18 EDT 2024

adds first libxpath functionality

diff: cannot open b/libxpath//null: file does not exist: 'b/libxpath//null'
--- a/README
+++ b/README
@@ -10,6 +10,10 @@
 - xb:
   /$objtype/bin/xb
   /sys/man/1/xb
+- libxpath: (not from 9atom)
+  /sys/include/xpath.h
+  /$objtype/lib/libxpath.a
+  /sys/man/2/xpath
 
 Installation:
 
@@ -21,3 +25,17 @@
 mk nuke
 
 This will remove all the installed files.
+
+
+Libxpath
+
+currently supported rules:
+
+- root path: /path/from/root
+- attribute path: /path/to/@attribute
+- text path: /path/to/text()
+- attribute filter: /path/to[@attribute='value']/filtered
+- select all path: /path/to//all/children
+- numbered element: /path/to/second[2]/element
+
+There are probably bugs.
--- /dev/null
+++ b/libxpath/mkfile
@@ -1,0 +1,12 @@
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libxpath.a
+
+OFILES=\
+	xmllookpath.$O\
+
+HFILES=\
+	/sys/include/xml.h\
+	/sys/include/xpath.h\
+
+</sys/src/cmd/mksyslib
--- /dev/null
+++ b/libxpath/xmllookpath.c
@@ -1,0 +1,301 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include <regexp.h>
+
+Reprog *fattr = nil;
+Reprog *fnum = nil;
+Reprog *fattrend = nil;
+
+static int
+attrmatches(Elem *e, char *attr, char *value)
+{
+	Attr *a;
+	for (a = e->attrs; a; a = a->next) {
+		if (strcmp(a->name, attr) == 0
+		 && strcmp(a->value, value) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+bufsize(int m)
+{
+	int b = 32;
+	return (m/b + 1) * b;
+}
+
+static void
+dbgprintnode(Elem *e)
+{
+	Attr *a;
+	fprint(2, "<%s", e->name);
+	for (a = e->attrs; a; a = a->next)
+		fprint(2, " %s='%s'", a->name, a->value);
+	fprint(2, " />");
+}
+
+static void
+appendresult(XpResult *a, XpResult b)
+{
+	int n;
+	
+	if (b.num < 1)
+		return;
+	if (!a->type) {
+		*a = b;
+		goto Out;
+	}
+	if (a->type != b.type)
+		sysfatal("error: incompatible type");
+	n = a->num + b.num;
+	switch (a->type) {
+	case XTelem:
+		if (n >= a->size) {
+			a->elems = realloc(a->elems, bufsize(n) * sizeof(Elem*));
+		}
+		memcpy(&a->elems[a->num], b.elems, b.num * sizeof(Elem*));
+		a->num = n;
+		free(b.elems);
+		break;
+	case XTstring:
+		if (n >= a->size) {
+			a->strings = realloc(a->strings, bufsize(n) * sizeof(char*));
+		}
+		memcpy(&a->strings[a->num], b.strings, b.num * sizeof(char*));
+		a->num = n;
+		free(b.strings);
+		break;
+	}
+	
+Out:
+	if (xmldebug) {
+		fprint(2, "appendresult:\n");
+		fprint(2, "  type: %s\n", a->type == XTelem ? "elems" : "string");
+		switch (a->type) {
+		case XTelem:
+			for (n = 0; n < a->num; n++) {
+				fprint(2, "  e: ");
+				dbgprintnode(a->elems[n]);
+				fprint(2, "\n");
+			}
+			break;
+		case XTstring:
+			for (n = 0; n < a->num; n++) {
+				fprint(2, "  s: %s\n", a->strings[n]);
+			}
+		}
+	}
+}
+
+static void
+buildsinglestring(XpResult *a, char *s)
+{
+	a->type = XTstring;
+	a->num = a->size = 1;
+	a->strings = malloc(sizeof(char*));
+	a->strings[0] = s;
+}
+
+static void
+buildsingleelem(XpResult *a, Elem *e)
+{
+	a->type = XTelem;
+	a->num = a->size = 1;
+	a->elems = malloc(sizeof(Elem*));
+	a->elems[0] = e;
+}
+
+static char*
+catchallpath(char *path, char *new, int catchall)
+{
+	if (!catchall)
+		return path;
+	path--;
+	*path = '/';
+	path--;
+	*path = '/';
+	if (new) {
+		new--;
+		*new = '/';
+	}
+	return path;
+}
+
+/*
+ * search for element using XPath, starting at ep.
+ */
+XpResult
+xmllookpath(Elem *ep, char *path)
+{
+	Resub match[3];
+	Elem *el, *rel;
+	Attr *a;
+	char *attr, *val;
+	char *new;
+	int id, i;
+	int isroot;
+	char *s;
+	XpResult r, nr, mr;
+	int catchall;
+	int newcatchall;
+	
+	if (!fattr)
+		fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
+	if (!fnum)
+		fnum = regcomp("\\[([0-9]+)\\]");
+	if (!fattrend)
+		fattrend = regcomp("@(.+)$");
+	
+	if (xmldebug) {
+		fprint(2, "xmllookpath: %s %s\n", ep->name, path);
+	}
+	
+	memset(&r, 0, sizeof(XpResult));
+	
+	if (!path || !*path) {
+		if (xmldebug)
+			fprint(2, "  final, return %s\n", ep->name);
+		buildsingleelem(&r, ep);
+		return r;
+	}
+	
+	/* handle starting '/' as document root and '//' as catchall */
+	isroot = 0;
+	catchall = 0;
+	if (path[0] == '/') {
+		if (path[1] == '/') {
+			/* catchall */
+			catchall = 1;
+			path += 2;
+		} else {
+			/* root */
+			isroot = 1;
+			path++;
+		}
+	}
+	if (isroot) {
+		while (ep->parent)
+			ep = ep->parent;
+	}
+	
+	newcatchall = 0;
+	new = strchr(catchall ? path + 2 : path, '/');
+	if (new) {
+		*new = 0;
+		new++;
+		if (new[0] == '/') {
+			newcatchall = 1;
+			new++;
+		}
+	}
+	
+	if (xmldebug) {
+		fprint(2, "  query is root: %d\n", isroot);
+		fprint(2, "  query is catchall: %d\n", catchall);
+		fprint(2, "  query is newcatchall: %d\n", newcatchall);
+		fprint(2, "  testing path part: %s\n", path);
+		fprint(2, "  new path part: %s\n", new);
+	}
+	
+	if (catchall) {
+		if (xmldebug)
+			fprint(2, "  rule catchall matches: %s\n", path);
+		for (el = ep->child; el; el = el->next) {
+			nr = xmllookpath(el, path);
+			if (nr.type) {
+				if (xmldebug)
+					fprint(2, "    found element\n");
+				for (i = 0; i < nr.num; i++) {
+					appendresult(&r, xmllookpath(nr.elems[i], new));
+				}
+				free(nr.elems);
+				continue;
+			}
+			if (xmldebug)
+				fprint(2, "    found child element\n");
+			appendresult(&r, xmllookpath(el, catchallpath(path, new, catchall)));
+		}
+		return r;
+	}
+	memset(match, 0, 3*sizeof(Resub));
+	if (regexec(fattr, path, match, 3)) {
+		if (xmldebug)
+			fprint(2, "  rule [a=b] matches: %s\n", path);
+		*match[0].sp = 0;
+		attr = match[1].sp;
+		*match[1].ep = 0;
+		val = match[2].sp;
+		*match[2].ep = 0;
+		
+		for (el = ep->child; el; el = el->next) {
+			if (!attrmatches(el, attr, val))
+				continue;
+			appendresult(&r, xmllookpath(el, new));
+		}
+		return r;
+	}
+	memset(match, 0, 3*sizeof(Resub));
+	if (regexec(fnum, path, match, 3)) {
+		if (xmldebug)
+			fprint(2, "  rule [n] matches: %s\n", path);
+		*match[0].sp = 0;
+		*match[1].ep = 0;
+		id = atoi(match[1].sp);
+		
+		i = 0;
+		for (el = ep->child; el; el = el->next) {
+			if (strcmp(el->name, path) != 0)
+				continue;
+			i++;
+			if (i == id) {
+				return xmllookpath(el, new);
+			}
+		}
+		return r;
+	}
+	memset(match, 0, 3*sizeof(Resub));
+	if (regexec(fattrend, path, match, 3)) {
+		if (xmldebug)
+			fprint(2, "  rule @attr matches: %s - %s\n", ep->name, path);
+		*match[1].ep = 0;
+		attr = match[1].sp;
+		for (a = ep->attrs; a; a = a->next) {
+			if (strcmp(a->name, attr) != 0)
+				continue;
+			buildsinglestring(&r, a->value);
+			if (xmldebug)
+				fprint(2, "    value: %s\n", a->value);
+			return r;
+		}
+		if (xmldebug)
+			fprint(2, "    no value\n");
+		return r;
+	}
+	if (strcmp(path, "text()") == 0) {
+		if (xmldebug)
+			fprint(2, "  rule text() matches: %s\n", path);
+		buildsinglestring(&r, ep->pcdata);
+		return r;
+	}
+	
+	new = catchallpath(new, nil, newcatchall);
+	if (xmldebug)
+		fprint(2, "  no match, run for all childrennnn: %s\n", new);
+	
+	rel = isroot ? ep : ep->child;
+	for (el = rel; el; el = el->next) {
+		if (xmldebug) {
+			fprint(2, "    runchildren: ");
+			dbgprintnode(el);
+			fprint(2, "\n");
+		}
+		if (newcatchall || strcmp(el->name, path) == 0) {
+			appendresult(&r, xmllookpath(el, new));
+		}
+	}
+	
+	return r;
+}
--- a/mkfile
+++ b/mkfile
@@ -3,8 +3,11 @@
 INSTALLFILES=\
 	/sys/man/1/xb \
 	/sys/man/2/xml \
+	/sys/man/2/xpath \
 	/sys/include/xml.h \
+	/sys/include/xpath.h \
 	/$objtype/lib/libxml.a \
+	/$objtype/lib/libxpath.a \
 	/$objtype/bin/xb \
 
 CFLAGS=$CFLAGS -I..
@@ -21,12 +24,15 @@
 /sys/man/2/xml: xml
 	cp $prereq $target
 
-/sys/include/xml.h: xml.h
+/sys/man/2/xpath: xpath
 	cp $prereq $target
 
-/$objtype/lib/libxml.a:
-	cd libxml && mk install && cd ..
+/sys/include/%.h: %.h
+	cp $prereq $target
 
+/$objtype/lib/%.a:V:
+	cd $stem && mk install && cd ..
+
 /$objtype/bin/xb: $O.xb
 	cp $prereq $target
 
@@ -38,7 +44,8 @@
 
 clean:V:
 	cd libxml && mk clean && cd ..
+	cd libxpath && mk clean && cd ..
 	rm -f [$OS].* *.[$OS]
 
-nuke:V:
+nuke:V: clean
 	rm -f $INSTALLFILES
--- /dev/null
+++ b/xpath
@@ -1,0 +1,54 @@
+.TH XPATH 2
+.SH NAME
+xmllookpath
+\- XPath support
+.SH SYNOPSIS
+.de PB
+.PP
+.ft L
+.nf
+..
+.PB
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+.PB
+enum {
+	XTelems = 1,
+	XTstring = 2,
+}
+.PB
+struct XpResult {
+	int type;	/* type of XpResult */
+	int num; 	/* number of results */
+	union {  	/* array of results */
+		char **strings;	/* if type == XTstring */
+		Elem **elems;  	/* if type == XTelems */
+	};
+	...
+};
+.PB
+.PD 0
+.ta +\w'\fLXpResult 'u
+XpResult	xmllookpath(Elem *ep, char *xpath)
+.SH DESCRIPTION
+.PP
+.I Libxpath
+is an extension library to
+.IR libxml .
+It provides XPath functionality for looking up certain nodes in an
+existing in-memory XML DOM model.
+.PP
+.I Xmllookpath
+is the main function for querying the XML document using an XPath string.
+It's using
+.I ep
+as the reference element within the DOM model.
+.SH SOURCE
+/sys/src/libxpath
+.SH "SEE ALSO"
+.IR xml (2).
+.SH BUGS
+The current implementation of XPath is incomplete and very limited.
+A future implementation should be able to support the full set of XPath.
--- /dev/null
+++ b/xpath.h
@@ -1,0 +1,19 @@
+#pragma lib "libxpath.a"
+
+enum {
+	XTelem = 1,
+	XTstring = 2,
+};
+
+typedef struct XpResult XpResult;
+struct XpResult {
+	int type;
+	int size;
+	int num;
+	union {
+		char **strings;
+		Elem **elems;
+	};
+};
+
+XpResult 	xmllookpath(Elem *, char *);