chiark / gitweb /
shared: add simplistic XML parser for usage in the D-Bus policy language compat parser
authorLennart Poettering <lennart@poettering.net>
Sat, 28 Dec 2013 02:03:50 +0000 (03:03 +0100)
committerLennart Poettering <lennart@poettering.net>
Sat, 28 Dec 2013 02:04:29 +0000 (03:04 +0100)
.gitignore
Makefile.am
src/shared/xml.c [new file with mode: 0644]
src/shared/xml.h [new file with mode: 0644]
src/test/test-xml.c [new file with mode: 0644]

index 1cb34e0..4799bad 100644 (file)
 /test-utf8
 /test-util
 /test-watchdog
+/test-xml
 /timedatectl
 /udevadm
 /undefined
index 7556db8..bc6fa1a 100644 (file)
@@ -779,7 +779,9 @@ libsystemd_shared_la_SOURCES = \
        src/shared/syscall-list.c \
        src/shared/syscall-list.h \
        src/shared/audit.c \
-       src/shared/audit.h
+       src/shared/audit.h \
+       src/shared/xml.c \
+       src/shared/xml.h
 
 nodist_libsystemd_shared_la_SOURCES = \
        src/shared/errno-from-name.h \
@@ -1156,7 +1158,8 @@ tests += \
        test-hashmap \
        test-list \
        test-tables \
-       test-device-nodes
+       test-device-nodes \
+       test-xml
 
 EXTRA_DIST += \
        test/sched_idle_bad.service \
@@ -1258,6 +1261,12 @@ test_hashmap_SOURCES = \
 test_hashmap_LDADD = \
        libsystemd-core.la
 
+test_xml_SOURCES = \
+       src/test/test-xml.c
+
+test_xml_LDADD = \
+       libsystemd-shared.la
+
 test_list_SOURCES = \
        src/test/test-list.c
 
diff --git a/src/shared/xml.c b/src/shared/xml.c
new file mode 100644 (file)
index 0000000..be56b08
--- /dev/null
@@ -0,0 +1,216 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <string.h>
+
+#include "util.h"
+#include "xml.h"
+
+enum {
+        STATE_TEXT,
+        STATE_TAG,
+        STATE_ATTRIBUTE,
+};
+
+/* We don't actually do real XML here. We only read a simplistic
+ * subset, that is a bit less strict that XML and lacks all the more
+ * complex features, like entities, or namespaces. However, we do
+ * support some HTML5-like simplifications */
+
+int xml_tokenize(const char **p, char **name, void **state) {
+        const char *c, *e, *b;
+        char *ret;
+        int t;
+
+        assert(p);
+        assert(*p);
+        assert(name);
+        assert(state);
+
+        t = PTR_TO_INT(*state);
+        c = *p;
+
+        for (;;) {
+                if (*c == 0)
+                        return XML_END;
+
+                switch (t) {
+
+                case STATE_TEXT: {
+                        int x;
+
+                        e = strchrnul(c, '<');
+                        if (e > c) {
+                                /* More text... */
+                                ret = strndup(c, e - c);
+                                if (!ret)
+                                        return -ENOMEM;
+
+                                *name = ret;
+                                *p = e;
+                                *state = INT_TO_PTR(STATE_TEXT);
+
+                                return XML_TEXT;
+                        }
+
+                        assert(*e == '<');
+                        b = c + 1;
+
+                        if (startswith(b, "!--")) {
+                                /* A comment */
+                                e = strstr(b + 3, "-->");
+                                if (!e)
+                                        return -EINVAL;
+
+                                c = e + 3;
+                                continue;
+                        }
+
+                        if (*b == '?') {
+                                /* Processing instruction */
+
+                                e = strstr(b + 1, "?>");
+                                if (!e)
+                                        return -EINVAL;
+
+                                c = e + 2;
+                                continue;
+                        }
+
+                        if (*b == '!') {
+                                /* DTD */
+
+                                e = strchr(b + 1, '>');
+                                if (!e)
+                                        return -EINVAL;
+
+                                c = e + 1;
+                                continue;
+                        }
+
+                        if (*b == '/') {
+                                /* A closing tag */
+                                x = XML_TAG_CLOSE;
+                                b++;
+                        } else
+                                x = XML_TAG_OPEN;
+
+                        e = strpbrk(b, WHITESPACE "/>");
+                        if (!e)
+                                return -EINVAL;
+
+                        ret = strndup(b, e - b);
+                        if (!ret)
+                                return -ENOMEM;
+
+                        *name = ret;
+                        *p = e;
+                        *state = INT_TO_PTR(STATE_TAG);
+
+                        return x;
+                }
+
+                case STATE_TAG:
+
+                        b = c + strspn(c, WHITESPACE);
+                        if (*b == 0)
+                                return -EINVAL;
+
+                        e = b + strcspn(b, WHITESPACE "=/>");
+                        if (e > b) {
+                                /* An attribute */
+
+                                ret = strndup(b, e - b);
+                                if (!ret)
+                                        return -ENOMEM;
+
+                                *name = ret;
+                                *p = e;
+                                *state = INT_TO_PTR(STATE_ATTRIBUTE);
+
+                                return XML_ATTRIBUTE_NAME;
+                        }
+
+                        if (startswith(b, "/>")) {
+                                /* An empty tag */
+
+                                *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
+                                *p = b + 2;
+                                *state = INT_TO_PTR(STATE_TEXT);
+
+                                return XML_TAG_CLOSE_EMPTY;
+                        }
+
+                        if (*b != '>')
+                                return -EINVAL;
+
+                        c = b + 1;
+                        t = STATE_TEXT;
+                        continue;
+
+                case STATE_ATTRIBUTE:
+
+                        if (*c == '=') {
+                                c++;
+
+                                if (*c == '\'' || *c == '\"') {
+                                        /* Tag with a quoted value */
+
+                                        e = strchr(c+1, *c);
+                                        if (!e)
+                                                return -EINVAL;
+
+                                        ret = strndup(c+1, e - c - 1);
+                                        if (!ret)
+                                                return -ENOMEM;
+
+                                        *name = ret;
+                                        *p = e + 1;
+                                        *state = INT_TO_PTR(STATE_TAG);
+
+                                        return XML_ATTRIBUTE_VALUE;
+
+                                }
+
+                                /* Tag with a value without quotes */
+
+                                b = strpbrk(c, WHITESPACE ">");
+                                if (!b)
+                                        b = c;
+
+                                ret = strndup(c, b - c);
+                                if (!ret)
+                                        return -ENOMEM;
+
+                                *name = ret;
+                                *p = b;
+                                *state = INT_TO_PTR(STATE_TAG);
+                                return XML_ATTRIBUTE_VALUE;
+                        }
+
+                        t = STATE_TAG;
+                        continue;
+                }
+
+        }
+
+        assert_not_reached("Bad state");
+}
diff --git a/src/shared/xml.h b/src/shared/xml.h
new file mode 100644 (file)
index 0000000..18ebbd9
--- /dev/null
@@ -0,0 +1,34 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+enum {
+        XML_END,
+        XML_TEXT,
+        XML_TAG_OPEN,
+        XML_TAG_CLOSE,
+        XML_TAG_CLOSE_EMPTY,
+        XML_ATTRIBUTE_NAME,
+        XML_ATTRIBUTE_VALUE
+};
+
+int xml_tokenize(const char **p, char **name, void **state);
diff --git a/src/test/test-xml.c b/src/test/test-xml.c
new file mode 100644 (file)
index 0000000..7a34f14
--- /dev/null
@@ -0,0 +1,83 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdarg.h>
+
+#include "xml.h"
+#include "util.h"
+
+static void test_one(const char *data, ...) {
+        void *state = NULL;
+        va_list ap;
+
+        va_start(ap, data);
+
+        for (;;) {
+                _cleanup_free_ char *name = NULL;
+                int t, tt;
+                const char *nn;
+
+                t = xml_tokenize(&data, &name, &state);
+                assert_se(t >= 0);
+
+                tt = va_arg(ap, int);
+                assert_se(tt >= 0);
+
+                assert_se(t == tt);
+                if (t == XML_END)
+                        break;
+
+                nn = va_arg(ap, const char *);
+                assert_se(streq_ptr(nn, name));
+        }
+
+        va_end(ap);
+}
+
+int main(int argc, char *argv[]) {
+
+        test_one("", XML_END);
+
+        test_one("<foo></foo>",
+                 XML_TAG_OPEN, "foo",
+                 XML_TAG_CLOSE, "foo",
+                 XML_END);
+
+        test_one("<foo waldo=piep meh=\"huhu\"/>",
+                 XML_TAG_OPEN, "foo",
+                 XML_ATTRIBUTE_NAME, "waldo",
+                 XML_ATTRIBUTE_VALUE, "piep",
+                 XML_ATTRIBUTE_NAME, "meh",
+                 XML_ATTRIBUTE_VALUE, "huhu",
+                 XML_TAG_CLOSE_EMPTY, NULL,
+                 XML_END);
+
+        test_one("xxxx\n"
+                 "<foo><?xml foo?>     <!-- zzzz -->  </foo>",
+                 XML_TEXT, "xxxx\n",
+                 XML_TAG_OPEN, "foo",
+                 XML_TEXT, "     ",
+                 XML_TEXT, "  ",
+                 XML_TAG_CLOSE, "foo",
+                 XML_END);
+
+        return 0;
+}