chiark / gitweb /
shared: add simplistic XML parser for usage in the D-Bus policy language compat parser
[elogind.git] / src / shared / xml.c
diff --git a/src/shared/xml.c b/src/shared/xml.c
new file mode 100644 (file)
index 0000000..be56b08
--- /dev/null
@@ -0,0 +1,216 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <string.h>
+
+#include "util.h"
+#include "xml.h"
+
+enum {
+        STATE_TEXT,
+        STATE_TAG,
+        STATE_ATTRIBUTE,
+};
+
+/* We don't actually do real XML here. We only read a simplistic
+ * subset, that is a bit less strict that XML and lacks all the more
+ * complex features, like entities, or namespaces. However, we do
+ * support some HTML5-like simplifications */
+
+int xml_tokenize(const char **p, char **name, void **state) {
+        const char *c, *e, *b;
+        char *ret;
+        int t;
+
+        assert(p);
+        assert(*p);
+        assert(name);
+        assert(state);
+
+        t = PTR_TO_INT(*state);
+        c = *p;
+
+        for (;;) {
+                if (*c == 0)
+                        return XML_END;
+
+                switch (t) {
+
+                case STATE_TEXT: {
+                        int x;
+
+                        e = strchrnul(c, '<');
+                        if (e > c) {
+                                /* More text... */
+                                ret = strndup(c, e - c);
+                                if (!ret)
+                                        return -ENOMEM;
+
+                                *name = ret;
+                                *p = e;
+                                *state = INT_TO_PTR(STATE_TEXT);
+
+                                return XML_TEXT;
+                        }
+
+                        assert(*e == '<');
+                        b = c + 1;
+
+                        if (startswith(b, "!--")) {
+                                /* A comment */
+                                e = strstr(b + 3, "-->");
+                                if (!e)
+                                        return -EINVAL;
+
+                                c = e + 3;
+                                continue;
+                        }
+
+                        if (*b == '?') {
+                                /* Processing instruction */
+
+                                e = strstr(b + 1, "?>");
+                                if (!e)
+                                        return -EINVAL;
+
+                                c = e + 2;
+                                continue;
+                        }
+
+                        if (*b == '!') {
+                                /* DTD */
+
+                                e = strchr(b + 1, '>');
+                                if (!e)
+                                        return -EINVAL;
+
+                                c = e + 1;
+                                continue;
+                        }
+
+                        if (*b == '/') {
+                                /* A closing tag */
+                                x = XML_TAG_CLOSE;
+                                b++;
+                        } else
+                                x = XML_TAG_OPEN;
+
+                        e = strpbrk(b, WHITESPACE "/>");
+                        if (!e)
+                                return -EINVAL;
+
+                        ret = strndup(b, e - b);
+                        if (!ret)
+                                return -ENOMEM;
+
+                        *name = ret;
+                        *p = e;
+                        *state = INT_TO_PTR(STATE_TAG);
+
+                        return x;
+                }
+
+                case STATE_TAG:
+
+                        b = c + strspn(c, WHITESPACE);
+                        if (*b == 0)
+                                return -EINVAL;
+
+                        e = b + strcspn(b, WHITESPACE "=/>");
+                        if (e > b) {
+                                /* An attribute */
+
+                                ret = strndup(b, e - b);
+                                if (!ret)
+                                        return -ENOMEM;
+
+                                *name = ret;
+                                *p = e;
+                                *state = INT_TO_PTR(STATE_ATTRIBUTE);
+
+                                return XML_ATTRIBUTE_NAME;
+                        }
+
+                        if (startswith(b, "/>")) {
+                                /* An empty tag */
+
+                                *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
+                                *p = b + 2;
+                                *state = INT_TO_PTR(STATE_TEXT);
+
+                                return XML_TAG_CLOSE_EMPTY;
+                        }
+
+                        if (*b != '>')
+                                return -EINVAL;
+
+                        c = b + 1;
+                        t = STATE_TEXT;
+                        continue;
+
+                case STATE_ATTRIBUTE:
+
+                        if (*c == '=') {
+                                c++;
+
+                                if (*c == '\'' || *c == '\"') {
+                                        /* Tag with a quoted value */
+
+                                        e = strchr(c+1, *c);
+                                        if (!e)
+                                                return -EINVAL;
+
+                                        ret = strndup(c+1, e - c - 1);
+                                        if (!ret)
+                                                return -ENOMEM;
+
+                                        *name = ret;
+                                        *p = e + 1;
+                                        *state = INT_TO_PTR(STATE_TAG);
+
+                                        return XML_ATTRIBUTE_VALUE;
+
+                                }
+
+                                /* Tag with a value without quotes */
+
+                                b = strpbrk(c, WHITESPACE ">");
+                                if (!b)
+                                        b = c;
+
+                                ret = strndup(c, b - c);
+                                if (!ret)
+                                        return -ENOMEM;
+
+                                *name = ret;
+                                *p = b;
+                                *state = INT_TO_PTR(STATE_TAG);
+                                return XML_ATTRIBUTE_VALUE;
+                        }
+
+                        t = STATE_TAG;
+                        continue;
+                }
+
+        }
+
+        assert_not_reached("Bad state");
+}