From: Lennart Poettering Date: Mon, 15 Dec 2014 21:26:56 +0000 (+0100) Subject: shared: add minimal JSON tokenizer X-Git-Tag: v219~979 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=commitdiff_plain;h=e7eebcfc42f00aa481ef31abc8e7e243c16f5b2c shared: add minimal JSON tokenizer --- diff --git a/.gitignore b/.gitignore index dbc56bca1..bd9125d79 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,7 @@ /test-journal-stream /test-journal-syslog /test-journal-verify +/test-json /test-libsystemd-sym* /test-libudev /test-libudev-sym* diff --git a/Makefile.am b/Makefile.am index 84b587ddb..ab07d3bee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -868,6 +868,8 @@ libsystemd_shared_la_SOURCES = \ src/shared/audit.h \ src/shared/xml.c \ src/shared/xml.h \ + src/shared/json.c \ + src/shared/json.h \ src/shared/bus-label.c \ src/shared/bus-label.h \ src/shared/gpt.h \ @@ -1366,6 +1368,7 @@ tests += \ test-tables \ test-device-nodes \ test-xml \ + test-json \ test-architecture \ test-socket-util \ test-fdset \ @@ -1686,6 +1689,13 @@ test_xml_SOURCES = \ test_xml_LDADD = \ libsystemd-shared.la +test_json_SOURCES = \ + src/test/test-json.c + +test_json_LDADD = \ + libsystemd-shared.la \ + -lm + test_list_SOURCES = \ src/test/test-list.c diff --git a/src/shared/json.c b/src/shared/json.c new file mode 100644 index 000000000..f1495e99c --- /dev/null +++ b/src/shared/json.c @@ -0,0 +1,409 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "macro.h" +#include "log.h" +#include "util.h" +#include "utf8.h" +#include "json.h" + +enum { + STATE_NULL, + STATE_VALUE, + STATE_VALUE_POST, +}; + +static void inc_lines(unsigned *line, const char *s, size_t n) { + const char *p = s; + + if (!line) + return; + + for (;;) { + const char *f; + + f = memchr(p, '\n', n); + if (!f) + return; + + n -= (f - p) + 1; + p = f + 1; + (*line)++; + } +} + +static int json_parse_string(const char **p, char **ret) { + _cleanup_free_ char *s = NULL; + size_t n = 0, allocated = 0; + const char *c; + + assert(p); + assert(*p); + assert(ret); + + c = *p; + + if (*c != '"') + return -EINVAL; + + c++; + + for (;;) { + int len; + + /* Check for EOF */ + if (*c == 0) + return -EINVAL; + + /* Check for control characters 0x00..0x1f */ + if (*c > 0 && *c < ' ') + return -EINVAL; + + /* Check for control character 0x7f */ + if (*c == 0x7f) + return -EINVAL; + + if (*c == '"') { + if (!s) { + s = strdup(""); + if (!s) + return -ENOMEM; + } else + s[n] = 0; + + *p = c + 1; + + *ret = s; + s = NULL; + return JSON_STRING; + } + + if (*c == '\\') { + char ch = 0; + c++; + + if (*c == 0) + return -EINVAL; + + if (IN_SET(*c, '"', '\\', '/')) + ch = *c; + else if (*c == 'b') + ch = '\b'; + else if (*c == 'f') + ch = '\f'; + else if (*c == 'n') + ch = '\n'; + else if (*c == 'r') + ch = '\r'; + else if (*c == 't') + ch = '\t'; + else if (*c == 'u') { + int aa, bb, cc, dd; + uint16_t x; + + aa = unhexchar(c[1]); + if (aa < 0) + return -EINVAL; + + bb = unhexchar(c[2]); + if (bb < 0) + return -EINVAL; + + cc = unhexchar(c[3]); + if (cc < 0) + return -EINVAL; + + dd = unhexchar(c[4]); + if (dd < 0) + return -EINVAL; + + + x = ((uint16_t) aa << 12) | + ((uint16_t) bb << 8) | + ((uint16_t) cc << 4) | + ((uint16_t) dd); + + if (x <= 0) + return -EINVAL; + + if (!GREEDY_REALLOC(s, allocated, n + 4)) + return -ENOMEM; + + n += utf8_encode_unichar(x, s + n); + c += 5; + continue; + } else + return -EINVAL; + + if (!GREEDY_REALLOC(s, allocated, n + 2)) + return -ENOMEM; + + s[n++] = ch; + c ++; + continue; + } + + len = utf8_encoded_valid_unichar(c); + if (len < 0) + return len; + + if (!GREEDY_REALLOC(s, allocated, n + len + 1)) + return -ENOMEM; + + memcpy(s + n, c, len); + n += len; + c += len; + } +} + +static int json_parse_number(const char **p, union json_value *ret) { + bool negative = false, exponent_negative = false, is_double = false; + double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0; + intmax_t i = 0; + const char *c; + + assert(p); + assert(*p); + assert(ret); + + c = *p; + + if (*c == '-') { + negative = true; + c++; + } + + if (*c == '0') + c++; + else { + if (!strchr("123456789", *c) || *c == 0) + return -EINVAL; + + do { + if (!is_double) { + int64_t t; + + t = 10 * i + (*c - '0'); + if (t < i) /* overflow */ + is_double = false; + else + i = t; + } + + x = 10.0 * x + (*c - '0'); + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (*c == '.') { + is_double = true; + c++; + + if (!strchr("0123456789", *c) || *c == 0) + return -EINVAL; + + do { + y = 10.0 * y + (*c - '0'); + shift = 10.0 * shift; + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (*c == 'e' || *c == 'E') { + is_double = true; + c++; + + if (*c == '-') { + exponent_negative = true; + c++; + } else if (*c == '+') + c++; + + if (!strchr("0123456789", *c) || *c == 0) + return -EINVAL; + + do { + exponent = 10.0 * exponent + (*c - '0'); + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (*c != 0) + return -EINVAL; + + *p = c; + + if (is_double) { + ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent); + return JSON_REAL; + } else { + ret->integer = negative ? -i : i; + return JSON_INTEGER; + } +} + +int json_tokenize( + const char **p, + char **ret_string, + union json_value *ret_value, + void **state, + unsigned *line) { + + const char *c; + int t; + int r; + + assert(p); + assert(*p); + assert(ret_string); + assert(ret_value); + assert(state); + + t = PTR_TO_INT(*state); + c = *p; + + if (t == STATE_NULL) { + if (line) + *line = 1; + t = STATE_VALUE; + } + + for (;;) { + const char *b; + + b = c + strspn(c, WHITESPACE); + if (*b == 0) + return JSON_END; + + inc_lines(line, c, b - c); + c = b; + + switch (t) { + + case STATE_VALUE: + + if (*c == '{') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE); + return JSON_OBJECT_OPEN; + + } else if (*c == '}') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_OBJECT_CLOSE; + + } else if (*c == '[') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE); + return JSON_ARRAY_OPEN; + + } else if (*c == ']') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_ARRAY_CLOSE; + + } else if (*c == '"') { + r = json_parse_string(&c, ret_string); + if (r < 0) + return r; + + *ret_value = JSON_VALUE_NULL; + *p = c; + *state = INT_TO_PTR(STATE_VALUE_POST); + return r; + + } else if (strchr("-0123456789", *c)) { + r = json_parse_number(&c, ret_value); + if (r < 0) + return r; + + *ret_string = NULL; + *p = c; + *state = INT_TO_PTR(STATE_VALUE_POST); + return r; + + } else if (startswith(c, "true")) { + *ret_string = NULL; + ret_value->boolean = true; + *p = c + 4; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_BOOLEAN; + + } else if (startswith(c, "false")) { + *ret_string = NULL; + ret_value->boolean = false; + *p = c + 5; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_BOOLEAN; + + } else if (startswith(c, "null")) { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 4; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_NULL; + + } else + return -EINVAL; + + case STATE_VALUE_POST: + + if (*c == ':') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE); + return JSON_COLON; + } else if (*c == ',') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE); + return JSON_COMMA; + } else if (*c == '}') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_OBJECT_CLOSE; + } else if (*c == ']') { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + *p = c + 1; + *state = INT_TO_PTR(STATE_VALUE_POST); + return JSON_ARRAY_CLOSE; + } else + return -EINVAL; + } + + } +} diff --git a/src/shared/json.h b/src/shared/json.h new file mode 100644 index 000000000..a8457132e --- /dev/null +++ b/src/shared/json.h @@ -0,0 +1,50 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include + +enum { + JSON_END, + JSON_COLON, + JSON_COMMA, + JSON_OBJECT_OPEN, + JSON_OBJECT_CLOSE, + JSON_ARRAY_OPEN, + JSON_ARRAY_CLOSE, + JSON_STRING, + JSON_REAL, + JSON_INTEGER, + JSON_BOOLEAN, + JSON_NULL, +}; + +union json_value { + bool boolean; + double real; + intmax_t integer; +}; + +#define JSON_VALUE_NULL ((union json_value) {}) + +int json_tokenize(const char **p, char **ret_string, union json_value *ret_value, void **state, unsigned *line); diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 4469a7375..67f6285ee 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -263,39 +263,37 @@ char *ascii_is_valid(const char *str) { return (char*) str; } +int utf8_encode_unichar(uint16_t c, char *p) { + uint8_t *t = (uint8_t*) p; + int d; + + if (c < 0x80) { + t[0] = (uint8_t) c; + return 1; + } else if (c < 0x800) { + t[0] = (uint8_t) (0xc0 | (c >> 6)); + t[1] = (uint8_t) (0x80 | (c & 0x3f)); + return 2; + } else { + t[0] = (uint8_t) (0xe0 | (c >> 12)); + t[1] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + t[2] = (uint8_t) (0x80 | (c & 0x3f)); + return 3; + } +} + char *utf16_to_utf8(const void *s, size_t length) { - char *r; const uint8_t *f; - uint8_t *t; + char *r, *t; r = new(char, (length*3+1)/2 + 1); if (!r) return NULL; - t = (uint8_t*) r; - - for (f = s; f < (const uint8_t*) s + length; f += 2) { - uint16_t c; - - c = (f[1] << 8) | f[0]; - - if (c == 0) { - *t = 0; - return r; - } else if (c < 0x80) { - *(t++) = (uint8_t) c; - } else if (c < 0x800) { - *(t++) = (uint8_t) (0xc0 | (c >> 6)); - *(t++) = (uint8_t) (0x80 | (c & 0x3f)); - } else { - *(t++) = (uint8_t) (0xe0 | (c >> 12)); - *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); - *(t++) = (uint8_t) (0x80 | (c & 0x3f)); - } - } + for (f = s, t = r; f < (const uint8_t*) s + length; f += 2) + t += utf8_encode_unichar((f[1] << 8) | f[0], t); *t = 0; - return r; } diff --git a/src/shared/utf8.h b/src/shared/utf8.h index 59abee50a..dcf8588d3 100644 --- a/src/shared/utf8.h +++ b/src/shared/utf8.h @@ -36,6 +36,7 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pu char *utf8_escape_invalid(const char *s); char *utf8_escape_non_printable(const char *str); +int utf8_encode_unichar(uint16_t c, char *p); char *utf16_to_utf8(const void *s, size_t length); int utf8_encoded_valid_unichar(const char *str); diff --git a/src/shared/xml.h b/src/shared/xml.h index af71709c3..b256b0ba1 100644 --- a/src/shared/xml.h +++ b/src/shared/xml.h @@ -28,7 +28,7 @@ enum { XML_TAG_CLOSE, XML_TAG_CLOSE_EMPTY, XML_ATTRIBUTE_NAME, - XML_ATTRIBUTE_VALUE + XML_ATTRIBUTE_VALUE, }; int xml_tokenize(const char **p, char **name, void **state, unsigned *line); diff --git a/src/test/test-json.c b/src/test/test-json.c new file mode 100644 index 000000000..8777cf7a4 --- /dev/null +++ b/src/test/test-json.c @@ -0,0 +1,101 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include "log.h" +#include "util.h" +#include "json.h" + +static void test_one(const char *data, ...) { + void *state = NULL; + va_list ap; + + va_start(ap, data); + + for (;;) { + _cleanup_free_ char *str = NULL; + union json_value v = {}; + int t, tt; + + t = json_tokenize(&data, &str, &v, &state, NULL); + tt = va_arg(ap, int); + + assert_se(t == tt); + + if (t == JSON_END || t < 0) + break; + + else if (t == JSON_STRING) { + const char *nn; + + nn = va_arg(ap, const char *); + assert_se(streq_ptr(nn, str)); + + } else if (t == JSON_REAL) { + double d; + + d = va_arg(ap, double); + assert_se(abs(d - v.real) < 0.001); + + } else if (t == JSON_INTEGER) { + intmax_t i; + + i = va_arg(ap, intmax_t); + assert_se(i == v.integer); + + } else if (t == JSON_BOOLEAN) { + bool b; + + b = va_arg(ap, int); + assert_se(b == v.boolean); + } + } + + va_end(ap); +} + +int main(int argc, char *argv[]) { + + test_one("x", -EINVAL); + test_one("", JSON_END); + test_one(" ", JSON_END); + test_one("0", JSON_INTEGER, (intmax_t) 0, JSON_END); + test_one("1234", JSON_INTEGER, (intmax_t) 1234, JSON_END); + test_one("3.141", JSON_REAL, 3.141, JSON_END); + test_one("0.0", JSON_REAL, 0.0, JSON_END); + test_one("7e3", JSON_REAL, 7e3, JSON_END); + test_one("-7e-3", JSON_REAL, -7e-3, JSON_END); + test_one("true", JSON_BOOLEAN, true, JSON_END); + test_one("false", JSON_BOOLEAN, false, JSON_END); + test_one("null", JSON_NULL, JSON_END); + test_one("{}", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END); + test_one("\t {\n} \n", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END); + test_one("[]", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END); + test_one("\t [] \n\n", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END); + test_one("\"\"", JSON_STRING, "", JSON_END); + test_one("\"foo\"", JSON_STRING, "foo", JSON_END); + test_one("\"foo\\nfoo\"", JSON_STRING, "foo\nfoo", JSON_END); + test_one("{\"foo\" : \"bar\"}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_STRING, "bar", JSON_OBJECT_CLOSE, JSON_END); + test_one("{\"foo\" : [true, false]}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_ARRAY_OPEN, JSON_BOOLEAN, true, JSON_COMMA, JSON_BOOLEAN, false, JSON_ARRAY_CLOSE, JSON_OBJECT_CLOSE, JSON_END); + test_one("\"\xef\xbf\xbd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END); + test_one("\"\\ufffd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END); + + return 0; +}