shithub: xml-9atom

ref: e864dc493153ca5083018c94a3737165613ef0d5
dir: /libxpath/xmllookpath.c/

View raw version
#include <u.h>
#include <libc.h>
#include <xml.h>
#include <xpath.h>
#include <regexp.h>

Reprog *fattr = nil;
Reprog *fnum = nil;
Reprog *fattrend = nil;

static int
attrmatches(Elem *e, char *attr, char *value)
{
	Attr *a;
	for (a = e->attrs; a; a = a->next) {
		if (strcmp(a->name, attr) == 0
		 && strcmp(a->value, value) == 0)
			return 1;
	}
	return 0;
}

static int
bufsize(int m)
{
	int b = 32;
	return (m/b + 1) * b;
}

static void
dbgprintnode(Elem *e)
{
	Attr *a;
	fprint(2, "<%s", e->name);
	for (a = e->attrs; a; a = a->next)
		fprint(2, " %s='%s'", a->name, a->value);
	fprint(2, " />");
}

static void
appendresult(XpResult *a, XpResult b)
{
	int n;
	
	if (b.num < 1)
		return;
	if (!a->type) {
		*a = b;
		goto Out;
	}
	if (a->type != b.type)
		sysfatal("error: incompatible type");
	n = a->num + b.num;
	switch (a->type) {
	case XTelem:
		if (n >= a->size) {
			a->elems = realloc(a->elems, bufsize(n) * sizeof(Elem*));
		}
		memcpy(&a->elems[a->num], b.elems, b.num * sizeof(Elem*));
		a->num = n;
		free(b.elems);
		break;
	case XTstring:
		if (n >= a->size) {
			a->strings = realloc(a->strings, bufsize(n) * sizeof(char*));
		}
		memcpy(&a->strings[a->num], b.strings, b.num * sizeof(char*));
		a->num = n;
		free(b.strings);
		break;
	}
	
Out:
	if (xmldebug) {
		fprint(2, "appendresult:\n");
		fprint(2, "  type: %s\n", a->type == XTelem ? "elems" : "string");
		switch (a->type) {
		case XTelem:
			for (n = 0; n < a->num; n++) {
				fprint(2, "  e: ");
				dbgprintnode(a->elems[n]);
				fprint(2, "\n");
			}
			break;
		case XTstring:
			for (n = 0; n < a->num; n++) {
				fprint(2, "  s: %s\n", a->strings[n]);
			}
		}
	}
}

static void
buildsinglestring(XpResult *a, char *s)
{
	a->type = XTstring;
	a->num = a->size = 1;
	a->strings = malloc(sizeof(char*));
	a->strings[0] = s;
}

static void
buildsingleelem(XpResult *a, Elem *e)
{
	a->type = XTelem;
	a->num = a->size = 1;
	a->elems = malloc(sizeof(Elem*));
	a->elems[0] = e;
}

static char*
catchallpath(char *path, char *new, int catchall)
{
	if (!catchall)
		return path;
	path--;
	*path = '/';
	path--;
	*path = '/';
	if (new) {
		new--;
		*new = '/';
	}
	return path;
}

/*
 * search for element using XPath, starting at ep.
 */
XpResult
xmllookpath(Elem *ep, char *path)
{
	Resub match[3];
	Elem *el, *rel;
	Attr *a;
	char *attr, *val;
	char *new;
	int id, i;
	int isroot;
	char *s;
	XpResult r, nr, mr;
	int catchall;
	int newcatchall;
	
	if (!fattr)
		fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
	if (!fnum)
		fnum = regcomp("\\[([0-9]+)\\]");
	if (!fattrend)
		fattrend = regcomp("@(.+)$");
	
	if (xmldebug) {
		fprint(2, "xmllookpath: %s %s\n", ep->name, path);
	}
	
	memset(&r, 0, sizeof(XpResult));
	
	if (!path || !*path) {
		if (xmldebug)
			fprint(2, "  final, return %s\n", ep->name);
		buildsingleelem(&r, ep);
		return r;
	}
	
	/* handle starting '/' as document root and '//' as catchall */
	isroot = 0;
	catchall = 0;
	if (path[0] == '/') {
		if (path[1] == '/') {
			/* catchall */
			catchall = 1;
			path += 2;
		} else {
			/* root */
			isroot = 1;
			path++;
		}
	}
	if (isroot) {
		while (ep->parent)
			ep = ep->parent;
	}
	
	newcatchall = 0;
	new = strchr(catchall ? path + 2 : path, '/');
	if (new) {
		*new = 0;
		new++;
		if (new[0] == '/') {
			newcatchall = 1;
			new++;
		}
	}
	
	if (xmldebug) {
		fprint(2, "  query is root: %d\n", isroot);
		fprint(2, "  query is catchall: %d\n", catchall);
		fprint(2, "  query is newcatchall: %d\n", newcatchall);
		fprint(2, "  testing path part: %s\n", path);
		fprint(2, "  new path part: %s\n", new);
	}
	
	if (catchall) {
		if (xmldebug)
			fprint(2, "  rule catchall matches: %s\n", path);
		for (el = ep->child; el; el = el->next) {
			nr = xmllookpath(el, path);
			if (nr.type) {
				if (xmldebug)
					fprint(2, "    found element\n");
				for (i = 0; i < nr.num; i++) {
					appendresult(&r, xmllookpath(nr.elems[i], new));
				}
				free(nr.elems);
				continue;
			}
			if (xmldebug)
				fprint(2, "    found child element\n");
			appendresult(&r, xmllookpath(el, catchallpath(path, new, catchall)));
		}
		return r;
	}
	memset(match, 0, 3*sizeof(Resub));
	if (regexec(fattr, path, match, 3)) {
		if (xmldebug)
			fprint(2, "  rule [a=b] matches: %s\n", path);
		*match[0].sp = 0;
		attr = match[1].sp;
		*match[1].ep = 0;
		val = match[2].sp;
		*match[2].ep = 0;
		
		for (el = ep->child; el; el = el->next) {
			if (!attrmatches(el, attr, val))
				continue;
			appendresult(&r, xmllookpath(el, new));
		}
		return r;
	}
	memset(match, 0, 3*sizeof(Resub));
	if (regexec(fnum, path, match, 3)) {
		if (xmldebug)
			fprint(2, "  rule [n] matches: %s\n", path);
		*match[0].sp = 0;
		*match[1].ep = 0;
		id = atoi(match[1].sp);
		
		i = 0;
		for (el = ep->child; el; el = el->next) {
			if (strcmp(el->name, path) != 0)
				continue;
			i++;
			if (i == id) {
				return xmllookpath(el, new);
			}
		}
		return r;
	}
	memset(match, 0, 3*sizeof(Resub));
	if (regexec(fattrend, path, match, 3)) {
		if (xmldebug)
			fprint(2, "  rule @attr matches: %s - %s\n", ep->name, path);
		*match[1].ep = 0;
		attr = match[1].sp;
		for (a = ep->attrs; a; a = a->next) {
			if (strcmp(a->name, attr) != 0)
				continue;
			buildsinglestring(&r, a->value);
			if (xmldebug)
				fprint(2, "    value: %s\n", a->value);
			return r;
		}
		if (xmldebug)
			fprint(2, "    no value\n");
		return r;
	}
	if (strcmp(path, "text()") == 0) {
		if (xmldebug)
			fprint(2, "  rule text() matches: %s\n", path);
		buildsinglestring(&r, ep->pcdata);
		return r;
	}
	
	new = catchallpath(new, nil, newcatchall);
	if (xmldebug)
		fprint(2, "  no match, run for all childrennnn: %s\n", new);
	
	rel = isroot ? ep : ep->child;
	for (el = rel; el; el = el->next) {
		if (xmldebug) {
			fprint(2, "    runchildren: ");
			dbgprintnode(el);
			fprint(2, "\n");
		}
		if (newcatchall || strcmp(el->name, path) == 0) {
			appendresult(&r, xmllookpath(el, new));
		}
	}
	
	return r;
}