ref: f1e559d47916b74f473153a0a04df6956ca4052b
parent: 47a288683dbdc55c5df9b9f65db9909de6021af0
author: sirjofri <sirjofri@sirjofri.de>
date: Tue Jul 30 14:56:51 EDT 2024
bug fixes, leak fixes, simpler structure
--- a/libxpath/dat.c
+++ b/libxpath/dat.c
@@ -12,6 +12,15 @@
"attribute",
"function",
"descendant-or-self",
+ "AND",
+ "OR",
+ "INDEX",
+ "EQ",
+ "NEQ",
+ "LT",
+ "GT",
+ "LE",
+ "GE",
nil
};
@@ -27,8 +36,6 @@
Name *firstname;
Name *lastname;
-Cond *firstcond;
-Cond *lastcond;
Node *firstnode;
Node *lastnode;
Func *firstfunc;
@@ -36,6 +43,8 @@
Mbuf *firstmbuf;
Mbuf *lastmbuf;
+Node *rootchain;
+
Name*
findname(char *n)
{
@@ -83,73 +92,6 @@
return lastname;
}
-Cond*
-genaddcond(void)
-{
- if (!firstcond) {
- firstcond = mallocz(sizeof(Cond), 1);
- lastcond = firstcond;
- return firstcond;
- }
-
- lastcond->next = mallocz(sizeof(Cond), 1);
- lastcond = lastcond->next;
- return lastcond;
-}
-
-Cond*
-addcondb(int type, Cond *A, Cond *B)
-{
- Cond *c;
- c = genaddcond();
- c->type = type;
- c->a = A;
- c->b = B;
- return c;
-}
-
-Cond*
-addcondi(int index)
-{
- Cond *c;
- c = genaddcond();
- c->type = CTindex;
- c->index = index;
- return c;
-}
-
-Cond*
-addcondattr(Name *attr, Name *value)
-{
- Cond *c;
- c = genaddcond();
- c->type = CTattr;
- c->attr = attr;
- c->value = value;
- return c;
-}
-
-Cond*
-addcondhasattr(Name *attr)
-{
- Cond *c;
- c = genaddcond();
- c->type = CThasattr;
- c->attr = attr;
- return c;
-}
-
-Cond*
-addcondcnode(int type, Node *a, Node *b)
-{
- Cond *c;
- c = genaddcond();
- c->type = type;
- c->anode = a;
- c->bnode = b;
- return c;
-}
-
static Node*
gennode(void)
{
@@ -164,10 +106,21 @@
}
Node*
-addnode(Name *name, int type, Cond *cond)
+addcondcnode(int type, Node *a, Node *b)
{
Node *n;
n = gennode();
+ n->type = type;
+ n->anode = a;
+ n->bnode = b;
+ return n;
+}
+
+Node*
+addnode(Name *name, int type, Node *cond)
+{
+ Node *n;
+ n = gennode();
if (name && name->quoted)
type = Nstring;
@@ -185,7 +138,7 @@
}
Node*
-addnoden(int number, Cond *cond)
+addnoden(int number, Node *cond)
{
Node *n;
n = gennode();
@@ -241,78 +194,126 @@
}
static void
-debugprintceq(Cond *c)
+debugindent(int level)
{
+ while (level--)
+ fprint(2, "\t");
+}
+
+static void
+debugprintceq(Node *c, int level)
+{
+ debugindent(level);
+ switch (c->type) {
+ case Neq:
+ fprint(2, " EQ\n");
+ break;
+ case Nneq:
+ fprint(2, " NEQ\n");
+ break;
+ case Nlt:
+ fprint(2, " LT\n");
+ break;
+ case Ngt:
+ fprint(2, " GT\n");
+ break;
+ case Nle:
+ fprint(2, " LE\n");
+ break;
+ case Nge:
+ fprint(2, " GE\n");
+ break;
+ }
if (c->anode && c->bnode) {
+ debugindent(level);
fprint(2, " A: nodes\n");
- debugprintnodes(c->anode);
+ debugprintnodes(c->anode, level+1);
+ debugindent(level);
fprint(2, " B: nodes\n");
- debugprintnodes(c->bnode);
+ debugprintnodes(c->bnode, level+1);
return;
}
}
static void
-debugprintcond(Cond *c)
+debugprintcond(Node *c, int level)
{
if (!c)
return;
+ debugindent(level);
fprint(2, " Cond:\n");
switch (c->type) {
- case CTand:
- debugprintcond(c->a);
+ case Nand:
+ debugprintcond(c->anode, level+1);
+ debugindent(level);
fprint(2, " AND\n");
- debugprintcond(c->b);
+ debugprintcond(c->bnode, level+1);
break;
- case CTor:
- debugprintcond(c->a);
+ case Nor:
+ debugprintcond(c->anode, level+1);
+ debugindent(level);
fprint(2, " OR\n");
- debugprintcond(c->b);
+ debugprintcond(c->bnode, level+1);
break;
- case CTindex:
- fprint(2, " Index: %d\n", c->index);
+ case Nindex:
+ debugindent(level);
+ fprint(2, " Index: %d\n", c->number);
break;
- case CTattr:
- fprint(2, " Attr: %s == %s\n", c->attr->name, c->value->name);
+ case Nattribute:
+ debugindent(level);
+ fprint(2, " Attr: %s == %s\n",
+ c->anode->name->name,
+ c->bnode && c->bnode->name ? c->bnode->name->name : nil);
break;
- case CThasattr:
- fprint(2, " Attr: %s\n", c->attr->name);
+ case Neq:
+ case Nneq:
+ case Nlt:
+ case Ngt:
+ case Nle:
+ case Nge:
+ debugprintceq(c, level);
break;
- case CTeq:
- debugprintceq(c);
- break;
}
}
void
-debugprintfunc(Func *f)
+debugprintfunc(Func *f, int level)
{
if (!f)
return;
+ debugindent(level);
fprint(2, " Func: %s %p\n", f->name->name, f->f);
}
void
-debugprintnodes(Node *node)
+debugprintnodes(Node *node, int level)
{
Node *n;
- for (n = node ? node : firstnode; n; n = n->chain) {
- fprint(2, "Node:\n");
+ for (n = node ? node : rootchain; n; n = n->chain) {
+ debugindent(level);
+ fprint(2, "Node: %p\n", n);
+ debugindent(level);
fprint(2, " type: %s\n", type2str(n->type));
+ if (n->type >= Nand) {
+ debugprintcond(n, level);
+ continue;
+ }
switch (n->type) {
case Nnumber:
+ debugindent(level);
fprint(2, " number: %d\n", n->number);
break;
default:
+ debugindent(level);
fprint(2, " name: %s\n", n->name ? n->name->name : "<root>");
break;
}
- debugprintcond(n->cond);
- debugprintfunc(n->func);
+ debugprintcond(n->cond, level);
+ debugprintfunc(n->func, level);
}
}
@@ -319,7 +320,6 @@
static void
reset(void)
{
- Cond *c, *oc;
Node *n, *on;
Name *nm, *onm;
Func *f, *of;
@@ -340,14 +340,7 @@
}
firstnode = lastnode = nil;
- for (c = firstcond; c;) {
- oc = c->next;
- free(c);
- c = oc;
- }
- firstcond = lastcond = nil;
-
- for (f = firstfunc; c;) {
+ for (f = firstfunc; f;) {
of = f->next;
free(f);
f = of;
@@ -383,12 +376,55 @@
int yyparse(void);
Node*
+findroot(void)
+{
+ Node *n, *m;
+
+ /* find unlinked node */
+ for (n = firstnode; n;) {
+ for (m = firstnode; m; m = m->next) {
+ if (n == m) {
+ continue;
+ }
+ if (m->chain == n) {
+ fprint(2, "%p chained %p\n", m, n);
+ n = m;
+ break;
+ }
+ if (m->cond == n) {
+ fprint(2, "%p cond %p\n", m, n);
+ n = m;
+ break;
+ }
+ if (m->anode == n) {
+ fprint(2, "%p anode %p\n", m, n);
+ n = m;
+ break;
+ }
+ if (m->bnode == n) {
+ fprint(2, "%p bnode %p\n", m, n);
+ n = m;
+ break;
+ }
+ }
+ if (!m) {
+ /* if no node links to n, n is root */
+ return n;
+ }
+ /* if m is valid, n == m. Proceed with new n. */
+ }
+ return nil;
+}
+
+Node*
parsexpath(char *s)
{
reset();
setinputpath(s);
- if (!yyparse()) /* if successful */
- return firstnode;
+ if (!yyparse()) { /* if successful */
+ rootchain = findroot();
+ return rootchain;
+ }
werrstr("syntax error");
return nil;
}
--- a/libxpath/dat.h
+++ b/libxpath/dat.h
@@ -10,7 +10,6 @@
typedef struct Func Func;
typedef struct Node Node;
-typedef struct Cond Cond;
enum {
Nroot = 0, /* ^/ */
@@ -20,15 +19,28 @@
Nattribute, /* attribute:: */
Nfunction, /* name() */
Ndescself, /* descendant-or-self:: */
+ Nand,
+ Nor,
+ Nindex,
+ Neq,
+ Nneq,
+ Nlt,
+ Ngt,
+ Nle,
+ Nge,
NEND,
};
+char* type2str(int);
+
struct Node {
Name *name; /* name of node */
int type; /* type of node */
int number;
- Cond *cond; /* conditions */
+ Node *cond; /* conditions */
Func *func; /* function */
+ Node *anode; /* A node */
+ Node *bnode; /* B node */
Node *chain; /* next node in chain */
Node *next;
@@ -41,40 +53,13 @@
Func *next;
};
-enum {
- CTand = 0,
- CTor,
- CTindex,
- CTeq,
- CTattr,
- CThasattr,
-};
-
-struct Cond {
- int type;
- int index;
- Name *attr;
- Name *value;
- Cond *a;
- Cond *b;
- Node *anode;
- Node *bnode;
-
- Cond *next;
-};
-
-void debugprintnodes(Node*);
+void debugprintnodes(Node*, int);
Node* parsexpath(char*);
-Cond* addcondb(int, Cond*, Cond*);
-Cond* addcondi(int);
-Cond* addcondattr(Name*, Name*);
-Cond* addcondhasattr(Name*);
-Cond* addcondcnode(int, Node*, Node*);
-
-Node* addnode(Name*, int, Cond*);
-Node* addnoden(int, Cond*);
+Node* addcondcnode(int, Node*, Node*);
+Node* addnode(Name*, int, Node*);
+Node* addnoden(int, Node*);
Node* chainnode(Node*, Node*);
Func* findfunc(Name*);
--- a/libxpath/test/t.c
+++ b/libxpath/test/t.c
@@ -3,22 +3,46 @@
#include <xml.h>
#include <xpath.h>
-char *tests[] = {
- "/path//to[2]/node[@a='b']/@attr",
- "/path/text()",
- "/html/a",
- "/html/a[2]/text()",
- "/html//e",
- "/html/a[@href='p.php']/text()",
- "/html/a[position()='2']/text()", /* should error */
- "/html/a[position()=2]/text()",
- "/html/'2'",
- "/html/2",
- "/html/'hello'",
- "/[inval]",
- nil
+typedef struct Test Test;
+struct Test {
+ char *s;
+ int num;
+ int err;
+ int type;
+ char *e;
};
+Test tests[] = {
+ { "/path//to[2]/node[@a='b']/@attr", 0, 0, 0, nil },
+ { "/path/text()", 0, 0, 0, nil },
+ { "/html/a", 3, 0, Xelem, "help.php, whatever.php, p.php" },
+ { "/html/a[2]/text()", 1, 0, Xstring, "Other link" },
+ { "/html//e", 2, 0, Xelem, "a='1', a='2'" },
+ { "/html/a[@href='p.php']/text()", 1, 0, Xstring, "Second link" },
+ { "/html/a[position()='2']/text()", 0, 1, 0, nil },
+ { "/html/a[position()=2]/text()", 1, 0, Xstring, "Other link" },
+ { "/html/'2'", 1, 0, Xstring, "2" },
+ { "/html/2", 1, 0, Xnum, "2" },
+ { "/html/'hello'", 1, 0, Xstring, "hello" },
+ { "/[inval]", 0, 1, 0, nil },
+ { "position()", 1, 0, Xnum, "number" },
+ { nil, 0, 0, 0, nil },
+};
+
+static char*
+type2str(int type)
+{
+ switch (type) {
+ case Xnum:
+ return "Xnum";
+ case Xstring:
+ return "Xstring";
+ case Xelem:
+ return "Xelem";
+ }
+ return "invalid";
+}
+
Xml *x;
void
@@ -45,21 +69,43 @@
}
void
-runtest(char *s)
+runtest(Test *t)
{
XpResult r;
+ char *s;
+ s = t->s;
+
+ if (t != tests)
+ print("\n");
+
print("====== test ======\n - %s\n", s);
r = xmllookpath(x->root, s);
+ if (r.num == t->num) {
+ print("√ correct number of results: %d\n", r.num);
+ } else {
+ print("× wrong number of results: %d (%d)\n", r.num, t->num);
+ }
+ if (t->err == r.error) {
+ if (t->err) {
+ print("√ is error: %r\n");
+ werrstr("");
+ } else
+ print("√ is not error\n");
+ } else {
+ if (r.error)
+ print("× is error: (%d (%d)): %r\n", r.error, t->err);
+ else
+ print("× no error: (%d (%d))\n", r.error, t->err);
+ }
+ if (t->type == r.type)
+ print("√ type is correct: %s\n", type2str(r.type));
+ else
+ print("× type is incorrect: %s (%s)\n", type2str(r.type), type2str(t->type));
+ if (t->e)
+ print("expect: %s\n", t->e);
print("===== result =====\n");
- print("found %d results:\n", r.num);
-
- if (r.error) {
- fprint(2, "err: %r\n");
- werrstr("");
- }
-
for (int i = 0; i < r.num; i++) {
switch (r.type) {
case Xelem:
@@ -100,8 +146,8 @@
main(int argc, char **argv)
{
USED(argc, argv);
- char **s;
int fd;
+ Test *t;
fd = open("test.xml", OREAD);
if (fd < 0)
@@ -113,8 +159,8 @@
// xmldebug = 1;
- for (s = tests; *s; s++) {
- runtest(*s);
+ for (t = tests; t->s; t++) {
+ runtest(t);
}
exits(nil);
--- a/libxpath/xmllookpath.c
+++ b/libxpath/xmllookpath.c
@@ -120,39 +120,101 @@
}
static int
-equals(Elem *e, Cond *c)
+equals(Elem *e, Node *c)
{
XpResult ra, rb;
int n;
+ n = 0;
if (c->anode && c->bnode) {
ra = recurse(e, c->anode);
rb = recurse(e, c->bnode);
if (ra.num != 1) {
- return 0;
+ goto Out;
}
if (rb.num != 1) {
- return 0;
+ goto Out;
}
if (ra.type != rb.type) {
werrstr("equals: A.type != B.type (%s != %s)\n",
resulttypestring(ra.type), resulttypestring(rb.type));
- return 0;
+ goto Out;
}
- if (ra.type == Xstring)
- return strcmp(ra.strings[0], rb.strings[0]) == 0;
- if (ra.type == Xelem)
- return ra.elems[0] == rb.elems[0];
- if (ra.type == Xnum)
- return ra.numbers[0] == rb.numbers[0];
+ if (ra.type == Xstring) {
+ n = strcmp(ra.strings[0], rb.strings[0]) == 0;
+ goto Out;
+ }
+ if (ra.type == Xelem) {
+ n = ra.elems[0] == rb.elems[0];
+ goto Out;
+ }
+ if (ra.type == Xnum) {
+ n = ra.numbers[0] == rb.numbers[0];
+ goto Out;
+ }
sysfatal("code error");
}
+Out:
+ xmlfreeresult(&ra);
+ xmlfreeresult(&rb);
+ return n;
+}
+
+static int
+evalnum(Elem *e, Node *n, int *result)
+{
+ XpResult r;
+ r = recurse(e, n);
+ if (!r.type)
+ return 0;
+ if (r.num != 1)
+ goto Out;
+ if (r.type != Xnum)
+ goto Out;
+ *result = r.numbers[0];
+ xmlfreeresult(&r);
+ return 1;
+Out:
+ xmlfreeresult(&r);
return 0;
}
static int
-evalcond(Elem *e, Cond *c)
+cmpthan(Elem *e, Node *c)
{
+ int a, b;
+
+ if (!(c->anode && c->bnode)) {
+ werrstr("Cond: anode or bnode not valid");
+ return 0;
+ }
+
+ if (!evalnum(e, c->anode, &a)) {
+ werrstr("Cond: anode no value");
+ return 0;
+ }
+ if (!evalnum(e, c->bnode, &b)) {
+ werrstr("Cond: bnode no value");
+ return 0;
+ }
+ fprint(2, "comparing: %d <%d> %d\n", a, c->type, b);
+ switch (c->type) {
+ case Nlt:
+ return a < b;
+ case Ngt:
+ return a > b;
+ case Nle:
+ return a <= b;
+ case Nge:
+ return a >= b;
+ }
+ werrstr("Cond: invalid cmpthan type");
+ return 0;
+}
+
+static int
+evalcond(Elem *e, Node *c)
+{
Attr *a;
if (!c)
@@ -159,28 +221,39 @@
return 1;
switch (c->type) {
- case CTand:
- return evalcond(e, c->a) && evalcond(e, c->b);
- case CTor:
- return evalcond(e, c->a) || evalcond(e, c->b);
- case CTindex:
- return position(e) == c->index;
+ case Nand:
+ return evalcond(e, c->anode) && evalcond(e, c->bnode);
+ case Nor:
+ return evalcond(e, c->anode) || evalcond(e, c->bnode);
+ case Nindex:
+ if (!(c->anode && c->anode->type == Nnumber)) {
+ werrstr("invalid check: position() with no number");
+ return 0;
+ }
+ return position(e) == c->anode->number;
break;
- case CTattr:
+ case Nattribute:
for (a = e->attrs; a; a = a->next)
- if (strcmp(a->name, c->attr->name) == 0
- && strcmp(a->value, c->value->name) == 0)
- return 1;
+ if (strcmp(a->name, c->anode->name->name) == 0) {
+ if (!c->bnode->name->name)
+ return 1;
+ if (strcmp(a->value, c->bnode->name->name) == 0)
+ return 1;
+ }
return 0;
- case CThasattr:
- for (a = e->attrs; a; a = a->next)
- if (strcmp(a->name, c->attr->name) == 0)
- return 1;
- return 0;
- case CTeq:
+ case Nnumber:
+ return position(e) == c->number;
+ case Neq:
return equals(e, c);
+ case Nneq:
+ return !equals(e, c);
+ case Nlt:
+ case Ngt:
+ case Nle:
+ case Nge:
+ return cmpthan(e, c);
}
- werrstr("unhandled predicate condition: %d\n", c->type);
+ werrstr("unhandled predicate condition: %s\n", type2str(c->type));
return 1;
}
@@ -188,7 +261,6 @@
recurse(Elem *ep, Node *n)
{
XpResult r;
- char *s;
memset(&r, 0, sizeof(XpResult));
@@ -248,6 +320,12 @@
return r;
}
+ if (n->type >= Nand) {
+ /* boolean nodes */
+ buildsinglenum(&r, evalcond(ep, n));
+ return r;
+ }
+
return r;
}
@@ -270,7 +348,7 @@
return r;
}
if (xmldebug)
- debugprintnodes(nil);
+ debugprintnodes(nil, 0);
p = ep;
while (p->parent)
@@ -288,4 +366,26 @@
root.child = nil;
p->parent = nil;
return r;
+}
+
+void
+xmlfreeresult(XpResult *r)
+{
+ switch (r->type) {
+ case Xelem:
+ if (r->elems)
+ free(r->elems);
+ break;
+ case Xstring:
+ if (r->strings)
+ free(r->strings);
+ break;
+ case Xnum:
+ if (r->numbers)
+ free(r->numbers);
+ break;
+ default:
+ break;
+ }
+ memset(r, 0, sizeof(XpResult));
}
--- a/libxpath/xmlpath.y
+++ b/libxpath/xmlpath.y
@@ -22,16 +22,15 @@
int i;
int n;
Name *nm;
- Cond *c;
Node *nd;
}
%token <i> CHILD PARENT SELF ANCESTOR ANCESTOR_OR_SELF DESCENDANT
-%token <i> DESCENDANT_OR_SELF
+%token <i> DESCENDANT_OR_SELF DSLASH
%token <i> FOLLOWING FOLLOWING_SIBLING PRECEDING PRECEDING_SIBLING
%token <i> ATTRIBUTE NAMESPACE
-%token <i> AND OR
+%token <i> AND OR EQ NEQ LT GT LE GE
%token <nm> NAME
%token <nm> QUOTE
@@ -44,12 +43,15 @@
%type <nd> node
%type <nd> func
%type <nd> path
-%type <c> specl
-%type <c> slist
+%type <nd> specl
+%left EQ NEQ
+%left LT GT
+%left LE GE
%left AND
%left OR
-%left '='
+%left '/' DSLASH CHILD
+%left DESCENDANT_OR_SELF
%%
@@ -56,10 +58,18 @@
path:
node
| /* empty */ { $$ = addnode(nil, Nroot, nil); }
- | path '/' '/' node { $4->type = Ndescself; $$ = chainnode($1, $4); }
- | path '/' DESCENDANT_OR_SELF node { $4->type = Ndescself; $$ = chainnode($1, $4); }
- | path '/' node { $$ = chainnode($1, $3); }
- | path '/' CHILD node { $$ = chainnode($1, $4); }
+ | path DSLASH path { $3->type = Ndescself; $$ = chainnode($1, $3); }
+ | path '/' DESCENDANT_OR_SELF path { $4->type = Ndescself; $$ = chainnode($1, $4); }
+ | path '/' path { $$ = chainnode($1, $3); }
+ | path '/' CHILD path { $$ = chainnode($1, $4); }
+ | path AND path { $$ = addcondcnode(Nand, $1, $3); }
+ | path OR path { $$ = addcondcnode(Nor, $1, $3); }
+ | path EQ path { $$ = addcondcnode(Neq, $1, $3); }
+ | path NEQ path { $$ = addcondcnode(Nneq, $1, $3); }
+ | path LT path { $$ = addcondcnode(Nlt, $1, $3); }
+ | path GT path { $$ = addcondcnode(Ngt, $1, $3); }
+ | path LE path { $$ = addcondcnode(Nle, $1, $3); }
+ | path GE path { $$ = addcondcnode(Nge, $1, $3); }
;
node:
@@ -72,16 +82,8 @@
;
specl:
- '[' slist ']' { $$ = $2; }
- | specl '[' slist ']' { $$ = addcondb(CTand, $1, $3); }
- ;
-
-slist:
- number { $$ = addcondi($1); }
- | attr { $$ = addcondhasattr($1); }
- | slist AND slist { $$ = addcondb(CTand, $1, $3); }
- | slist OR slist { $$ = addcondb(CTor, $1, $3); }
- | path '=' path { $$ = addcondcnode(CTeq, $1, $3); }
+ '[' path ']' { $$ = $2; }
+ | specl '[' path ']' { $$ = addcondcnode(Nand, $1, $3); }
;
attr:
--- a/libxpath/xmlpathl.l
+++ b/libxpath/xmlpathl.l
@@ -42,7 +42,7 @@
A [a-zA-Z_.]
AN [a-zA-Z0-9_.]
D [0-9]
-LIT [/@=*()']
+LIT [/@=*()'!<>]
Q [^']
%s SPEC
@@ -56,6 +56,7 @@
\[ { BEGIN SPEC; return '['; }
<SPEC>\] { BEGIN 0; return ']'; }
+\/\/ return DSLASH;
child:: return CHILD;
ancestor:: return ANCESTOR;
ancestor-or-self:: return ANCESTOR_OR_SELF;
@@ -72,5 +73,12 @@
<SPEC>and return AND;
<SPEC>or return OR;
+
+!= { return NEQ; }
+\<= { return LE; }
+\< { return LT; }
+\>= { return GE; }
+\> { return GT; }
+= { return EQ; }
{LIT} { return *yytext; }
--- a/xpath
+++ b/xpath
@@ -35,6 +35,7 @@
.PD 0
.ta +\w'\fLXpResult 'u
XpResult xmllookpath(Elem *ep, char *xpath)
+void xmlfreeresult(XpResult *r)
.SH DESCRIPTION
.PP
.I Libxpath
@@ -60,6 +61,11 @@
is set,
.I errstr
contains the description of the error.
+.PP
+.I Xmlfreeresult
+frees and resets the specified
+.I XpResult
+structure.
.SH SOURCE
/sys/src/libxpath
.SH "SEE ALSO"
--- a/xpath.h
+++ b/xpath.h
@@ -20,3 +20,4 @@
};
XpResult xmllookpath(Elem *, char *);
+void xmlfreeresult(XpResult *);