#include "xml.h"
enum {
+ STATE_NULL,
STATE_TEXT,
STATE_TAG,
STATE_ATTRIBUTE,
};
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+ const char *p = s;
+
+ if (!line)
+ return;
+
+ for (;;) {
+ const char *f;
+
+ f = memchr(p, '\n', n);
+ if (!f)
+ return;
+
+ n -= (f - p) + 1;
+ p = f + 1;
+ (*line)++;
+ }
+}
+
/* We don't actually do real XML here. We only read a simplistic
* subset, that is a bit less strict that XML and lacks all the more
* complex features, like entities, or namespaces. However, we do
* support some HTML5-like simplifications */
-int xml_tokenize(const char **p, char **name, void **state) {
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
const char *c, *e, *b;
char *ret;
int t;
t = PTR_TO_INT(*state);
c = *p;
+ if (t == STATE_NULL) {
+ if (line)
+ *line = 1;
+ t = STATE_TEXT;
+ }
+
for (;;) {
if (*c == 0)
return XML_END;
if (!ret)
return -ENOMEM;
+ inc_lines(line, c, e - c);
+
*name = ret;
*p = e;
*state = INT_TO_PTR(STATE_TEXT);
if (!e)
return -EINVAL;
+ inc_lines(line, b, e + 3 - b);
+
c = e + 3;
continue;
}
if (!e)
return -EINVAL;
+ inc_lines(line, b, e + 2 - b);
+
c = e + 2;
continue;
}
if (!e)
return -EINVAL;
+ inc_lines(line, b, e + 1 - b);
+
c = e + 1;
continue;
}
if (*b == 0)
return -EINVAL;
+ inc_lines(line, c, b - c);
+
e = b + strcspn(b, WHITESPACE "=/>");
if (e > b) {
/* An attribute */
if (!e)
return -EINVAL;
+ inc_lines(line, c, e - c);
+
ret = strndup(c+1, e - c - 1);
if (!ret)
return -ENOMEM;