ref: ce4a8027322c53b2832f221e2956c90dcd65fd1a
parent: e864dc493153ca5083018c94a3737165613ef0d5
author: sirjofri <sirjofri@sirjofri.de>
date: Sun Jul 14 11:44:01 EDT 2024
adds yacc/lex parser this parser should be more stable and can be easily extended, and it provides a more complete set of xpath functionality
--- a/README
+++ b/README
@@ -32,10 +32,20 @@
currently supported rules:
- root path: /path/from/root
-- attribute path: /path/to/@attribute
-- text path: /path/to/text()
-- attribute filter: /path/to[@attribute='value']/filtered
-- select all path: /path/to//all/children
+- functions: text(), position(), last(), node()
+- filter/predicates: /path/to[path=path]
+ - full path support: [path/to/@attr='hello']
+ - type-safe: num==num, string==string, node==node
+- node types:
+ - name (/path)
+ - string (/'hello')
+ - number (/2)
+ - function (/func())
- numbered element: /path/to/second[2]/element
+- stepping:
+ - /absolute/paths and relative/paths
+ - descendant-or-self:: and //
+ - child:: and x/y
+ - attribute:: and @attr
There are probably bugs.
--- /dev/null
+++ b/libxpath/dat.c
@@ -1,0 +1,421 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+
+char *typestrings[] = {
+ "root",
+ "string",
+ "number",
+ "child",
+ "attribute",
+ "function",
+ "descendant-or-self",
+ nil
+};
+
+char*
+type2str(int type)
+{
+ if (type >= NEND)
+ return nil;
+ if (type < 0)
+ return nil;
+ return typestrings[type];
+}
+
+Name *firstname;
+Name *lastname;
+Cond *firstcond;
+Cond *lastcond;
+Node *firstnode;
+Node *lastnode;
+Func *firstfunc;
+Func *lastfunc;
+Mbuf *firstmbuf;
+Mbuf *lastmbuf;
+
+Name*
+findname(char *n)
+{
+ Name *nm;
+
+ if (!n)
+ return nil;
+
+ for (nm = firstname; nm; nm = nm->next)
+ if (nm->name && strcmp(nm->name, n) == 0)
+ return nm;
+ return nil;
+}
+
+Name*
+addname(char *n)
+{
+ Name *nm;
+ int quoted, l;
+
+ quoted = 0;
+ if (*n == '\'') {
+ quoted = 1;
+ n++;
+ l = strlen(n);
+ n[l-1] = 0;
+ }
+
+ if (!firstname) {
+ firstname = mallocz(sizeof(Name), 1);
+ firstname->name = strdup(n);
+ firstname->quoted = quoted;
+ lastname = firstname;
+ return firstname;
+ }
+
+ nm = findname(n);
+ if (nm)
+ return nm;
+
+ lastname->next = mallocz(sizeof(Name), 1);
+ lastname = lastname->next;
+ lastname->name = strdup(n);
+ lastname->quoted = quoted;
+ return lastname;
+}
+
+Cond*
+genaddcond(void)
+{
+ if (!firstcond) {
+ firstcond = mallocz(sizeof(Cond), 1);
+ lastcond = firstcond;
+ return firstcond;
+ }
+
+ lastcond->next = mallocz(sizeof(Cond), 1);
+ lastcond = lastcond->next;
+ return lastcond;
+}
+
+Cond*
+addcondb(int type, Cond *A, Cond *B)
+{
+ Cond *c;
+ c = genaddcond();
+ c->type = type;
+ c->a = A;
+ c->b = B;
+ return c;
+}
+
+Cond*
+addcondi(int index)
+{
+ Cond *c;
+ c = genaddcond();
+ c->type = CTindex;
+ c->index = index;
+ return c;
+}
+
+Cond*
+addcondattr(Name *attr, Name *value)
+{
+ Cond *c;
+ c = genaddcond();
+ c->type = CTattr;
+ c->attr = attr;
+ c->value = value;
+ return c;
+}
+
+Cond*
+addcondhasattr(Name *attr)
+{
+ Cond *c;
+ c = genaddcond();
+ c->type = CThasattr;
+ c->attr = attr;
+ return c;
+}
+
+Cond*
+addcondcnode(int type, Node *a, Node *b)
+{
+ Cond *c;
+ c = genaddcond();
+ c->type = type;
+ c->anode = a;
+ c->bnode = b;
+ return c;
+}
+
+static Node*
+gennode(void)
+{
+ if (!firstnode) {
+ firstnode = mallocz(sizeof(Node), 1);
+ lastnode = firstnode;
+ return firstnode;
+ }
+ lastnode->next = mallocz(sizeof(Node), 1);
+ lastnode = lastnode->next;
+ return lastnode;
+}
+
+Node*
+addnode(Name *name, int type, Cond *cond)
+{
+ Node *n;
+ n = gennode();
+
+ if (name && name->quoted)
+ type = Nstring;
+
+ n->name = name;
+ n->type = type;
+ n->cond = cond;
+
+ switch (type) {
+ case Nfunction:
+ n->func = findfunc(name);
+ break;
+ }
+ return n;
+}
+
+Node*
+addnoden(int number, Cond *cond)
+{
+ Node *n;
+ n = gennode();
+
+ n->type = Nnumber;
+ n->number = number;
+ n->cond = cond;
+ return n;
+}
+
+Node*
+chainnode(Node *base, Node *new)
+{
+ Node *o;
+ o = base;
+ while (base->chain)
+ base = base->chain;
+ base->chain = new;
+ return o;
+}
+
+Func*
+findfunc(Name *name)
+{
+ Func *f;
+ for (f = firstfunc; f; f = f->next)
+ if (f->name == name)
+ return f;
+ return nil;
+}
+
+Func*
+addfunc(Name *name, void (*f)(XpResult*, Elem*))
+{
+ Func *func;
+
+ func = findfunc(name);
+ if (func)
+ sysfatal("double registering function: %s", name->name);
+
+ if (!firstfunc) {
+ firstfunc = mallocz(sizeof(Func), 1);
+ firstfunc->name = name;
+ firstfunc->f = f;
+ lastfunc = firstfunc;
+ return firstfunc;
+ }
+ lastfunc->next = mallocz(sizeof(Func), 1);
+ lastfunc = lastfunc->next;
+ lastfunc->name = name;
+ lastfunc->f = f;
+ return lastfunc;
+}
+
+static void
+debugprintceq(Cond *c)
+{
+ if (c->anode && c->bnode) {
+ fprint(2, " A: nodes\n");
+ debugprintnodes(c->anode);
+ fprint(2, " B: nodes\n");
+ debugprintnodes(c->bnode);
+ return;
+ }
+}
+
+static void
+debugprintcond(Cond *c)
+{
+ if (!c)
+ return;
+
+ fprint(2, " Cond:\n");
+
+ switch (c->type) {
+ case CTand:
+ debugprintcond(c->a);
+ fprint(2, " AND\n");
+ debugprintcond(c->b);
+ break;
+ case CTor:
+ debugprintcond(c->a);
+ fprint(2, " OR\n");
+ debugprintcond(c->b);
+ break;
+ case CTindex:
+ fprint(2, " Index: %d\n", c->index);
+ break;
+ case CTattr:
+ fprint(2, " Attr: %s == %s\n", c->attr->name, c->value->name);
+ break;
+ case CThasattr:
+ fprint(2, " Attr: %s\n", c->attr->name);
+ break;
+ case CTeq:
+ debugprintceq(c);
+ break;
+ }
+}
+
+void
+debugprintfunc(Func *f)
+{
+ if (!f)
+ return;
+
+ fprint(2, " Func: %s %p\n", f->name->name, f->f);
+}
+
+void
+debugprintnodes(Node *node)
+{
+ Node *n;
+
+ for (n = node ? node : firstnode; n; n = n->chain) {
+ fprint(2, "Node:\n");
+ fprint(2, " type: %s\n", type2str(n->type));
+ switch (n->type) {
+ case Nnumber:
+ fprint(2, " number: %d\n", n->number);
+ break;
+ default:
+ fprint(2, " name: %s\n", n->name ? n->name->name : "<root>");
+ break;
+ }
+ debugprintcond(n->cond);
+ debugprintfunc(n->func);
+ }
+}
+
+static void
+reset(void)
+{
+ Cond *c, *oc;
+ Node *n, *on;
+ Name *nm, *onm;
+ Func *f, *of;
+ Mbuf *m, *om;
+
+ for (nm = firstname; nm;) {
+ onm = nm->next;
+ free(nm->name);
+ free(nm);
+ nm = onm;
+ }
+ firstname = lastname = nil;
+
+ for (n = firstnode; n;) {
+ on = n->next;
+ free(n);
+ n = on;
+ }
+ firstnode = lastnode = nil;
+
+ for (c = firstcond; c;) {
+ oc = c->next;
+ free(c);
+ c = oc;
+ }
+ firstcond = lastcond = nil;
+
+ for (f = firstfunc; c;) {
+ of = f->next;
+ free(f);
+ f = of;
+ }
+ firstfunc = lastfunc = nil;
+
+ for (m = firstmbuf; m;) {
+ om = m->next;
+ free(m->ptr);
+ free(m);
+ m = om;
+ }
+ firstmbuf = lastmbuf = nil;
+
+ initfuncs();
+}
+
+void
+regmbuf(void *ptr)
+{
+ if (!firstmbuf) {
+ firstmbuf = mallocz(sizeof(Mbuf), 1);
+ firstmbuf->ptr = ptr;
+ lastmbuf = firstmbuf;
+ return;
+ }
+ lastmbuf->next = mallocz(sizeof(Mbuf), 1);
+ lastmbuf = lastmbuf->next;
+ lastmbuf->ptr = ptr;
+}
+
+void setinputpath(char*);
+int yyparse(void);
+
+Node*
+parsexpath(char *s)
+{
+ reset();
+ setinputpath(s);
+ if (!yyparse()) /* if successful */
+ return firstnode;
+ werrstr("syntax error");
+ return nil;
+}
+
+void
+buildsinglestring(XpResult *r, char *s)
+{
+ r->type = Xstring;
+ r->num = r->size = 1;
+ r->strings = malloc(sizeof(char*));
+ r->strings[0] = s;
+}
+
+void
+buildsingleelem(XpResult *r, Elem *e)
+{
+ r->type = Xelem;
+ r->num = r->size = 1;
+ r->elems = malloc(sizeof(Elem*));
+ r->elems[0] = e;
+}
+
+void
+buildsinglenum(XpResult *r, int n)
+{
+ r->type = Xnum;
+ r->num = r->size = 1;
+ r->numbers = malloc(sizeof(int));
+ r->numbers[0] = n;
+}
--- /dev/null
+++ b/libxpath/dat.h
@@ -1,0 +1,97 @@
+typedef struct Name Name;
+struct Name {
+ char *name;
+ int quoted;
+ Name *next;
+};
+
+Name* addname(char *);
+
+
+typedef struct Func Func;
+typedef struct Node Node;
+typedef struct Cond Cond;
+
+enum {
+ Nroot = 0, /* ^/ */
+ Nstring, /* 'bla' */
+ Nnumber, /* 234 */
+ Nchild, /* child:: */
+ Nattribute, /* attribute:: */
+ Nfunction, /* name() */
+ Ndescself, /* descendant-or-self:: */
+ NEND,
+};
+
+struct Node {
+ Name *name; /* name of node */
+ int type; /* type of node */
+ int number;
+ Cond *cond; /* conditions */
+ Func *func; /* function */
+ Node *chain; /* next node in chain */
+
+ Node *next;
+};
+
+struct Func {
+ Name *name;
+ void (*f)(XpResult*, Elem*);
+
+ Func *next;
+};
+
+enum {
+ CTand = 0,
+ CTor,
+ CTindex,
+ CTeq,
+ CTattr,
+ CThasattr,
+};
+
+struct Cond {
+ int type;
+ int index;
+ Name *attr;
+ Name *value;
+ Cond *a;
+ Cond *b;
+ Node *anode;
+ Node *bnode;
+
+ Cond *next;
+};
+
+void debugprintnodes(Node*);
+Node* parsexpath(char*);
+
+
+Cond* addcondb(int, Cond*, Cond*);
+Cond* addcondi(int);
+Cond* addcondattr(Name*, Name*);
+Cond* addcondhasattr(Name*);
+Cond* addcondcnode(int, Node*, Node*);
+
+Node* addnode(Name*, int, Cond*);
+Node* addnoden(int, Cond*);
+Node* chainnode(Node*, Node*);
+
+Func* findfunc(Name*);
+Func* addfunc(Name*, void (*f)(XpResult*, Elem*));
+void initfuncs(void);
+
+void buildsinglestring(XpResult*, char*);
+void buildsingleelem(XpResult*, Elem*);
+void buildsinglenum(XpResult*, int);
+
+typedef struct Mbuf Mbuf;
+struct Mbuf {
+ void *ptr;
+ Mbuf *next;
+};
+
+void regmbuf(void*);
+
+int position(Elem*);
+int last(Elem*);
--- /dev/null
+++ b/libxpath/fns.c
@@ -1,0 +1,88 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+
+int
+position(Elem *e)
+{
+ Elem *p;
+ p = e->parent;
+ int i;
+
+ i = 0;
+ for (p = p->child; p; p = p->next) {
+ if (strcmp(p->name, e->name) == 0)
+ i++;
+ if (p == e)
+ return i;
+ }
+ return i;
+}
+
+int
+last(Elem *e)
+{
+ Elem *p;
+ p = e->parent;
+ int i;
+
+ i = 0;
+ for (p = p->child; p; p = p->next) {
+ if (strcmp(p->name, e->name) == 0)
+ i++;
+ }
+ return i;
+}
+
+void
+ftext(XpResult *r, Elem *ep)
+{
+ buildsinglestring(r, ep->pcdata);
+}
+
+void
+fposition(XpResult *r, Elem *ep)
+{
+ buildsinglenum(r, position(ep));
+}
+
+void
+fprocinst(XpResult *r, Elem *ep)
+{
+ fprint(2, "function processing-instruction()");
+}
+
+void
+fcomment(XpResult *r, Elem *ep)
+{
+ fprint(2, "function comment()");
+}
+
+void
+fnode(XpResult *r, Elem *ep)
+{
+ buildsingleelem(r, ep);
+}
+
+void
+flast(XpResult *r, Elem *ep)
+{
+ buildsinglenum(r, last(ep));
+}
+
+void
+initfuncs()
+{
+#define F(name, func) addfunc(addname(name), func)
+
+ F("text", ftext);
+ F("position", fposition);
+ F("processing-instruction", fprocinst);
+ F("comment", fcomment);
+ F("node", fnode);
+ F("last", flast);
+
+#undef F
+}
--- a/libxpath/mkfile
+++ b/libxpath/mkfile
@@ -4,9 +4,30 @@
OFILES=\
xmllookpath.$O\
+ lex.yy.$O\
+ y.tab.$O\
+ dat.$O\
+ fns.$O\
+LFILES=xmlpathl.l\
+
+YFILES=xmlpath.y\
+
HFILES=\
/sys/include/xml.h\
/sys/include/xpath.h\
+ dat.h\
+ y.tab.h\
+CLEANFILES=\
+ y.tab.c\
+ y.tab.h\
+ lex.yy.c\
+
</sys/src/cmd/mksyslib
+
+y.tab.h y.tab.c: $YFILES
+ yacc -dv $YFLAGS $prereq
+
+lex.yy.c: $LFILES
+ lex -9 $prereq
--- /dev/null
+++ b/libxpath/test/mkfile
@@ -1,0 +1,6 @@
+</$objtype/mkfile
+
+TEST=\
+ t\
+
+</sys/src/cmd/mktest
--- /dev/null
+++ b/libxpath/test/t.c
@@ -1,0 +1,121 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+
+char *tests[] = {
+ "/path//to[2]/node[@a='b']/@attr",
+ "/path/text()",
+ "/html/a",
+ "/html/a[2]/text()",
+ "/html//e",
+ "/html/a[@href='p.php']/text()",
+ "/html/a[position()='2']/text()", /* should error */
+ "/html/a[position()=2]/text()",
+ "/html/'2'",
+ "/html/2",
+ "/html/'hello'",
+ "/[inval]",
+ nil
+};
+
+Xml *x;
+
+void
+printelem(Elem *e)
+{
+ Attr *a;
+
+ print("el: <%s", e->name);
+ for (a = e->attrs; a; a = a->next)
+ print(" %s='%s'", a->name, a->value);
+ print(" />\n");
+}
+
+void
+printstring(char *s)
+{
+ print("st: %s\n", s);
+}
+
+void
+printnum(int n)
+{
+ print("nr: %d\n", n);
+}
+
+void
+runtest(char *s)
+{
+ XpResult r;
+
+ print("====== test ======\n - %s\n", s);
+ r = xmllookpath(x->root, s);
+ print("===== result =====\n");
+
+ print("found %d results:\n", r.num);
+
+ if (r.error) {
+ fprint(2, "err: %r\n");
+ werrstr("");
+ }
+
+ for (int i = 0; i < r.num; i++) {
+ switch (r.type) {
+ case Xelem:
+ if (!r.elems)
+ sysfatal("elems not set");
+ printelem(r.elems[i]);
+ break;
+ case Xstring:
+ if (!r.strings)
+ sysfatal("strings not set");
+ printstring(r.strings[i]);
+ break;
+ case Xnum:
+ if (!r.numbers)
+ sysfatal("numbers not set");
+ printnum(r.numbers[i]);
+ break;
+ }
+ }
+
+ switch (r.type) {
+ case Xelem:
+ if (r.num && r.elems)
+ free(r.elems);
+ break;
+ case Xstring:
+ if (r.num && r.strings)
+ free(r.strings);
+ break;
+ case Xnum:
+ if (r.num && r.numbers)
+ free(r.numbers);
+ break;
+ }
+}
+
+void
+main(int argc, char **argv)
+{
+ USED(argc, argv);
+ char **s;
+ int fd;
+
+ fd = open("test.xml", OREAD);
+ if (fd < 0)
+ sysfatal("unable to test: %r");
+
+ x = xmlparse(fd, 8192, 0);
+
+ close(fd);
+
+// xmldebug = 1;
+
+ for (s = tests; *s; s++) {
+ runtest(*s);
+ }
+
+ exits(nil);
+}
--- /dev/null
+++ b/libxpath/test/test.xml
@@ -1,0 +1,13 @@
+<html lang='en'>
+ <a href='help.php'>Some link</a>
+ <a href='whatever.php'>Other link</a>
+ <a href='p.php' bref='p.php'>Second link</a>
+ <e a='1'>
+ <c>
+ <d>
+ <e a='2'>
+ </e>
+ </d>
+ </c>
+ </e>
+</html>
--- a/libxpath/xmllookpath.c
+++ b/libxpath/xmllookpath.c
@@ -2,25 +2,11 @@
#include <libc.h>
#include <xml.h>
#include <xpath.h>
-#include <regexp.h>
+#include "dat.h"
-Reprog *fattr = nil;
-Reprog *fnum = nil;
-Reprog *fattrend = nil;
+static XpResult recurse(Elem*, Node*);
static int
-attrmatches(Elem *e, char *attr, char *value)
-{
- Attr *a;
- for (a = e->attrs; a; a = a->next) {
- if (strcmp(a->name, attr) == 0
- && strcmp(a->value, value) == 0)
- return 1;
- }
- return 0;
-}
-
-static int
bufsize(int m)
{
int b = 32;
@@ -37,6 +23,20 @@
fprint(2, " />");
}
+static char*
+resulttypestring(int type)
+{
+ switch (type) {
+ case Xelem:
+ return "elem";
+ case Xstring:
+ return "string";
+ case Xnum:
+ return "num";
+ }
+ return "invalid";
+}
+
static void
appendresult(XpResult *a, XpResult b)
{
@@ -52,7 +52,7 @@
sysfatal("error: incompatible type");
n = a->num + b.num;
switch (a->type) {
- case XTelem:
+ case Xelem:
if (n >= a->size) {
a->elems = realloc(a->elems, bufsize(n) * sizeof(Elem*));
}
@@ -60,7 +60,7 @@
a->num = n;
free(b.elems);
break;
- case XTstring:
+ case Xstring:
if (n >= a->size) {
a->strings = realloc(a->strings, bufsize(n) * sizeof(char*));
}
@@ -68,14 +68,22 @@
a->num = n;
free(b.strings);
break;
+ case Xnum:
+ if (n >= a->size) {
+ a->numbers = realloc(a->numbers, bufsize(n) * sizeof(int));
+ }
+ memcpy(&a->numbers[a->num], b.numbers, b.num * sizeof(int));
+ a->num = n;
+ free(b.numbers);
+ break;
}
Out:
if (xmldebug) {
fprint(2, "appendresult:\n");
- fprint(2, " type: %s\n", a->type == XTelem ? "elems" : "string");
+ fprint(2, " type: %s\n", resulttypestring(a->type));
switch (a->type) {
- case XTelem:
+ case Xelem:
for (n = 0; n < a->num; n++) {
fprint(2, " e: ");
dbgprintnode(a->elems[n]);
@@ -82,220 +90,202 @@
fprint(2, "\n");
}
break;
- case XTstring:
+ case Xstring:
for (n = 0; n < a->num; n++) {
fprint(2, " s: %s\n", a->strings[n]);
}
+ break;
+ case Xnum:
+ for (n = 0; n < a->num; n++) {
+ fprint(2, " n: %d\n", a->numbers[n]);
+ }
+ break;
}
}
}
-static void
-buildsinglestring(XpResult *a, char *s)
+static XpResult
+getattrvalue(Elem *ep, char *attr)
{
- a->type = XTstring;
- a->num = a->size = 1;
- a->strings = malloc(sizeof(char*));
- a->strings[0] = s;
+ XpResult r;
+ Attr *a;
+
+ r.type = 0;
+ for (a = ep->attrs; a; a = a->next)
+ if (strcmp(a->name, attr) == 0) {
+ buildsinglestring(&r, a->value);
+ return r;
+ }
+ return r;
}
-static void
-buildsingleelem(XpResult *a, Elem *e)
+static int
+equals(Elem *e, Cond *c)
{
- a->type = XTelem;
- a->num = a->size = 1;
- a->elems = malloc(sizeof(Elem*));
- a->elems[0] = e;
+ XpResult ra, rb;
+ int n;
+
+ if (c->anode && c->bnode) {
+ ra = recurse(e, c->anode);
+ rb = recurse(e, c->bnode);
+ if (ra.num != 1) {
+ return 0;
+ }
+ if (rb.num != 1) {
+ return 0;
+ }
+ if (ra.type != rb.type) {
+ werrstr("equals: A.type != B.type (%s != %s)\n",
+ resulttypestring(ra.type), resulttypestring(rb.type));
+ return 0;
+ }
+ if (ra.type == Xstring)
+ return strcmp(ra.strings[0], rb.strings[0]) == 0;
+ if (ra.type == Xelem)
+ return ra.elems[0] == rb.elems[0];
+ if (ra.type == Xnum)
+ return ra.numbers[0] == rb.numbers[0];
+ sysfatal("code error");
+ }
+ return 0;
}
-static char*
-catchallpath(char *path, char *new, int catchall)
+static int
+evalcond(Elem *e, Cond *c)
{
- if (!catchall)
- return path;
- path--;
- *path = '/';
- path--;
- *path = '/';
- if (new) {
- new--;
- *new = '/';
+ Attr *a;
+
+ if (!c)
+ return 1;
+
+ switch (c->type) {
+ case CTand:
+ return evalcond(e, c->a) && evalcond(e, c->b);
+ case CTor:
+ return evalcond(e, c->a) || evalcond(e, c->b);
+ case CTindex:
+ return position(e) == c->index;
+ break;
+ case CTattr:
+ for (a = e->attrs; a; a = a->next)
+ if (strcmp(a->name, c->attr->name) == 0
+ && strcmp(a->value, c->value->name) == 0)
+ return 1;
+ return 0;
+ case CThasattr:
+ for (a = e->attrs; a; a = a->next)
+ if (strcmp(a->name, c->attr->name) == 0)
+ return 1;
+ return 0;
+ case CTeq:
+ return equals(e, c);
}
- return path;
+ werrstr("unhandled predicate condition: %d\n", c->type);
+ return 1;
}
-/*
- * search for element using XPath, starting at ep.
- */
-XpResult
-xmllookpath(Elem *ep, char *path)
+static XpResult
+recurse(Elem *ep, Node *n)
{
- Resub match[3];
- Elem *el, *rel;
- Attr *a;
- char *attr, *val;
- char *new;
- int id, i;
- int isroot;
+ XpResult r;
char *s;
- XpResult r, nr, mr;
- int catchall;
- int newcatchall;
- if (!fattr)
- fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
- if (!fnum)
- fnum = regcomp("\\[([0-9]+)\\]");
- if (!fattrend)
- fattrend = regcomp("@(.+)$");
-
- if (xmldebug) {
- fprint(2, "xmllookpath: %s %s\n", ep->name, path);
- }
-
memset(&r, 0, sizeof(XpResult));
- if (!path || !*path) {
- if (xmldebug)
- fprint(2, " final, return %s\n", ep->name);
+ if (!n) {
buildsingleelem(&r, ep);
return r;
}
- /* handle starting '/' as document root and '//' as catchall */
- isroot = 0;
- catchall = 0;
- if (path[0] == '/') {
- if (path[1] == '/') {
- /* catchall */
- catchall = 1;
- path += 2;
- } else {
- /* root */
- isroot = 1;
- path++;
- }
- }
- if (isroot) {
+ if (n->type == Nroot) {
while (ep->parent)
ep = ep->parent;
+ r = recurse(ep, n->chain);
+ return r;
}
- newcatchall = 0;
- new = strchr(catchall ? path + 2 : path, '/');
- if (new) {
- *new = 0;
- new++;
- if (new[0] == '/') {
- newcatchall = 1;
- new++;
- }
+ if (n->type == Nattribute) {
+ return getattrvalue(ep, n->name->name);
}
- if (xmldebug) {
- fprint(2, " query is root: %d\n", isroot);
- fprint(2, " query is catchall: %d\n", catchall);
- fprint(2, " query is newcatchall: %d\n", newcatchall);
- fprint(2, " testing path part: %s\n", path);
- fprint(2, " new path part: %s\n", new);
+ if (n->type == Nfunction) {
+ if (!(n->func && n->func->f))
+ sysfatal("error: no valid func");
+ n->func->f(&r, ep);
+ return r;
}
- if (catchall) {
- if (xmldebug)
- fprint(2, " rule catchall matches: %s\n", path);
- for (el = ep->child; el; el = el->next) {
- nr = xmllookpath(el, path);
- if (nr.type) {
- if (xmldebug)
- fprint(2, " found element\n");
- for (i = 0; i < nr.num; i++) {
- appendresult(&r, xmllookpath(nr.elems[i], new));
- }
- free(nr.elems);
- continue;
+ if (n->type == Ndescself) {
+ /* descendant or self */
+ for (Elem *e = ep->child; e; e = e->next) {
+ if (strcmp(e->name, n->name->name) == 0
+ && evalcond(e, n->cond)) {
+ /* if found, proceed with next rule */
+ appendresult(&r, recurse(e, n->chain));
}
- if (xmldebug)
- fprint(2, " found child element\n");
- appendresult(&r, xmllookpath(el, catchallpath(path, new, catchall)));
+ /* search for more occuring children */
+ appendresult(&r, recurse(e, n));
}
return r;
}
- memset(match, 0, 3*sizeof(Resub));
- if (regexec(fattr, path, match, 3)) {
- if (xmldebug)
- fprint(2, " rule [a=b] matches: %s\n", path);
- *match[0].sp = 0;
- attr = match[1].sp;
- *match[1].ep = 0;
- val = match[2].sp;
- *match[2].ep = 0;
-
- for (el = ep->child; el; el = el->next) {
- if (!attrmatches(el, attr, val))
- continue;
- appendresult(&r, xmllookpath(el, new));
- }
- return r;
- }
- memset(match, 0, 3*sizeof(Resub));
- if (regexec(fnum, path, match, 3)) {
- if (xmldebug)
- fprint(2, " rule [n] matches: %s\n", path);
- *match[0].sp = 0;
- *match[1].ep = 0;
- id = atoi(match[1].sp);
-
- i = 0;
- for (el = ep->child; el; el = el->next) {
- if (strcmp(el->name, path) != 0)
- continue;
- i++;
- if (i == id) {
- return xmllookpath(el, new);
+
+ if (n->type == Nchild) {
+ for (Elem *e = ep->child; e; e = e->next)
+ if (strcmp(e->name, n->name->name) == 0
+ && evalcond(e, n->cond)) {
+ appendresult(&r, recurse(e, n->chain));
}
- }
return r;
}
- memset(match, 0, 3*sizeof(Resub));
- if (regexec(fattrend, path, match, 3)) {
- if (xmldebug)
- fprint(2, " rule @attr matches: %s - %s\n", ep->name, path);
- *match[1].ep = 0;
- attr = match[1].sp;
- for (a = ep->attrs; a; a = a->next) {
- if (strcmp(a->name, attr) != 0)
- continue;
- buildsinglestring(&r, a->value);
- if (xmldebug)
- fprint(2, " value: %s\n", a->value);
- return r;
- }
- if (xmldebug)
- fprint(2, " no value\n");
+
+ if (n->type == Nstring) {
+ buildsinglestring(&r, n->name->name);
return r;
}
- if (strcmp(path, "text()") == 0) {
- if (xmldebug)
- fprint(2, " rule text() matches: %s\n", path);
- buildsinglestring(&r, ep->pcdata);
+
+ if (n->type == Nnumber) {
+ buildsinglenum(&r, n->number);
return r;
}
- new = catchallpath(new, nil, newcatchall);
- if (xmldebug)
- fprint(2, " no match, run for all childrennnn: %s\n", new);
+ return r;
+}
+
+/*
+ * search for element using XPath, starting at ep.
+ */
+XpResult
+xmllookpath(Elem *ep, char *path)
+{
+ Node *nodes;
+ Elem *p, root;
+ XpResult r;
+ char err[2];
- rel = isroot ? ep : ep->child;
- for (el = rel; el; el = el->next) {
- if (xmldebug) {
- fprint(2, " runchildren: ");
- dbgprintnode(el);
- fprint(2, "\n");
- }
- if (newcatchall || strcmp(el->name, path) == 0) {
- appendresult(&r, xmllookpath(el, new));
- }
+ memset(&r, 0, sizeof(XpResult));
+
+ nodes = parsexpath(path);
+ if (!nodes) {
+ r.error = 1;
+ return r;
}
+ if (xmldebug)
+ debugprintnodes(nil);
+ p = ep;
+ while (p->parent)
+ p = p->parent;
+ memset(&root, 0, sizeof(Elem));
+ root.child = p;
+ p->parent = &root;
+
+ r = recurse(ep, nodes);
+
+ rerrstr(err, sizeof(err));
+ if (*err)
+ r.error = 1;
+
+ root.child = nil;
+ p->parent = nil;
return r;
}
--- /dev/null
+++ b/libxpath/xmlpath.y
@@ -1,0 +1,102 @@
+%{
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+
+extern int yylex(void);
+extern int yyparse(void);
+
+int yyxstep;
+
+void
+yyerror(char *s)
+{
+ werrstr("%s", s);
+}
+
+%}
+
+%union {
+ int i;
+ int n;
+ Name *nm;
+ Cond *c;
+ Node *nd;
+}
+
+%token <i> CHILD PARENT SELF ANCESTOR ANCESTOR_OR_SELF DESCENDANT
+%token <i> DESCENDANT_OR_SELF
+%token <i> FOLLOWING FOLLOWING_SIBLING PRECEDING PRECEDING_SIBLING
+%token <i> ATTRIBUTE NAMESPACE
+
+%token <i> AND OR
+
+%token <nm> NAME
+%token <nm> QUOTE
+%token <n> NUMBER
+
+%type <nm> name
+%type <n> number
+%type <nm> attr
+
+%type <nd> node
+%type <nd> func
+%type <nd> path
+%type <c> specl
+%type <c> slist
+
+%left AND
+%left OR
+%left '='
+
+%%
+
+path:
+ node
+ | /* empty */ { $$ = addnode(nil, Nroot, nil); }
+ | path '/' '/' node { $4->type = Ndescself; $$ = chainnode($1, $4); }
+ | path '/' DESCENDANT_OR_SELF node { $4->type = Ndescself; $$ = chainnode($1, $4); }
+ | path '/' node { $$ = chainnode($1, $3); }
+ | path '/' CHILD node { $$ = chainnode($1, $4); }
+ ;
+
+node:
+ name { $$ = addnode($1, Nchild, nil); }
+ | name specl { $$ = addnode($1, Nchild, $2); }
+ | number { $$ = addnoden($1, nil); }
+ | number specl { $$ = addnoden($1, $2); }
+ | attr { $$ = addnode($1, Nattribute, nil); }
+ | func
+ ;
+
+specl:
+ '[' slist ']' { $$ = $2; }
+ | specl '[' slist ']' { $$ = addcondb(CTand, $1, $3); }
+ ;
+
+slist:
+ number { $$ = addcondi($1); }
+ | attr { $$ = addcondhasattr($1); }
+ | slist AND slist { $$ = addcondb(CTand, $1, $3); }
+ | slist OR slist { $$ = addcondb(CTor, $1, $3); }
+ | path '=' path { $$ = addcondcnode(CTeq, $1, $3); }
+ ;
+
+attr:
+ '@' name { $$ = $2; }
+ | ATTRIBUTE name { $$ = $2; }
+ ;
+
+func:
+ name '(' ')' { $$ = addnode($1, Nfunction, nil); }
+ ;
+
+number:
+ NUMBER
+ ;
+
+name:
+ NAME
+ ;
--- /dev/null
+++ b/libxpath/xmlpathl.l
@@ -1,0 +1,76 @@
+%{
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+#include "y.tab.h"
+#undef input
+#undef unput
+#define input() (xinput())
+#define unput(c) (xunput(c))
+
+char *inputpath;
+char *currentchar;
+
+void
+setinputpath(char *s)
+{
+ currentchar = inputpath = s;
+}
+
+int
+xinput(void)
+{
+ char c;
+ c = *currentchar;
+ currentchar++;
+ return c;
+}
+
+int
+xunput(int c)
+{
+ /* do I need to handle that? */
+ if (currentchar <= inputpath)
+ sysfatal("error");
+
+ currentchar--;
+ *currentchar = c;
+ return c;
+}
+%}
+
+A [a-zA-Z_.]
+AN [a-zA-Z0-9_.]
+D [0-9]
+LIT [/@=*()']
+Q [^']
+
+%s SPEC
+%s QUOT
+
+%%
+{D}+ { yylval.n = atoi(yytext); return NUMBER; }
+{AN}+ { yylval.nm = addname(yytext); return NAME; }
+'{Q}+' { yylval.nm = addname(yytext); return NAME; }
+
+\[ { BEGIN SPEC; return '['; }
+<SPEC>\] { BEGIN 0; return ']'; }
+
+child:: return CHILD;
+ancestor:: return ANCESTOR;
+ancestor-or-self:: return ANCESTOR_OR_SELF;
+attribute:: return ATTRIBUTE;
+descendant:: return DESCENDANT;
+descendant-or-self:: return DESCENDANT_OR_SELF;
+following:: return FOLLOWING;
+following-sibling:: return FOLLOWING_SIBLING;
+namespace:: return NAMESPACE;
+parent:: return PARENT;
+preceding:: return PRECEDING;
+preceding-sibling:: return PRECEDING_SIBLING;
+self:: return SELF;
+
+<SPEC>and return AND;
+<SPEC>or return OR;
+
+{LIT} { return *yytext; }
--- a/xpath
+++ b/xpath
@@ -15,16 +15,19 @@
#include <xpath.h>
.PB
enum {
- XTelems = 1,
- XTstring = 2,
+ Xelems = 1,
+ Xstring = 2,
+ Xnum = 3,
}
.PB
struct XpResult {
- int type; /* type of XpResult */
- int num; /* number of results */
- union { /* array of results */
- char **strings; /* if type == XTstring */
- Elem **elems; /* if type == XTelems */
+ int type; /* type of XpResult */
+ int error; /* 1 if error. Check errstr */
+ int num; /* number of results */
+ union { /* array of results */
+ char **strings; /* if type == Xstring */
+ Elem **elems; /* if type == Xelems */
+ int *numbers; /* if type == Xnum */
};
...
};
@@ -45,10 +48,23 @@
It's using
.I ep
as the reference element within the DOM model.
+.PP
+The resulting
+.I XpResult
+holds the typed results as an array depending on the type.
+The allocated array will hold
+.I num
+items.
+If
+.I error
+is set,
+.I errstr
+contains the description of the error.
.SH SOURCE
/sys/src/libxpath
.SH "SEE ALSO"
-.IR xml (2).
+.IR xml (2),
+.IR errstr (2).
.SH BUGS
The current implementation of XPath is incomplete and very limited.
-A future implementation should be able to support the full set of XPath.
+It should be possible to extend the parser to cover a full set of functionality.
--- a/xpath.h
+++ b/xpath.h
@@ -1,18 +1,21 @@
#pragma lib "libxpath.a"
enum {
- XTelem = 1,
- XTstring = 2,
+ Xelem = 1,
+ Xstring = 2,
+ Xnum = 3,
};
typedef struct XpResult XpResult;
struct XpResult {
int type;
+ int error;
int size;
int num;
union {
char **strings;
Elem **elems;
+ int *numbers;
};
};