chiark / gitweb /
shared: utf8 - support ucs4 -> utf8
[elogind.git] / src / shared / json.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2014 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/types.h>
23 #include <math.h>
24
25 #include "macro.h"
26 #include "log.h"
27 #include "util.h"
28 #include "utf8.h"
29 #include "json.h"
30
31 enum {
32         STATE_NULL,
33         STATE_VALUE,
34         STATE_VALUE_POST,
35 };
36
37 static void inc_lines(unsigned *line, const char *s, size_t n) {
38         const char *p = s;
39
40         if (!line)
41                 return;
42
43         for (;;) {
44                 const char *f;
45
46                 f = memchr(p, '\n', n);
47                 if (!f)
48                         return;
49
50                 n -= (f - p) + 1;
51                 p = f + 1;
52                 (*line)++;
53         }
54 }
55
56 static int json_parse_string(const char **p, char **ret) {
57         _cleanup_free_ char *s = NULL;
58         size_t n = 0, allocated = 0;
59         const char *c;
60
61         assert(p);
62         assert(*p);
63         assert(ret);
64
65         c = *p;
66
67         if (*c != '"')
68                 return -EINVAL;
69
70         c++;
71
72         for (;;) {
73                 int len;
74
75                 /* Check for EOF */
76                 if (*c == 0)
77                         return -EINVAL;
78
79                 /* Check for control characters 0x00..0x1f */
80                 if (*c > 0 && *c < ' ')
81                         return -EINVAL;
82
83                 /* Check for control character 0x7f */
84                 if (*c == 0x7f)
85                         return -EINVAL;
86
87                 if (*c == '"') {
88                         if (!s) {
89                                 s = strdup("");
90                                 if (!s)
91                                         return -ENOMEM;
92                         } else
93                                 s[n] = 0;
94
95                         *p = c + 1;
96
97                         *ret = s;
98                         s = NULL;
99                         return JSON_STRING;
100                 }
101
102                 if (*c == '\\') {
103                         char ch = 0;
104                         c++;
105
106                         if (*c == 0)
107                                 return -EINVAL;
108
109                         if (IN_SET(*c, '"', '\\', '/'))
110                                 ch = *c;
111                         else if (*c == 'b')
112                                 ch = '\b';
113                         else if (*c == 'f')
114                                 ch = '\f';
115                         else if (*c == 'n')
116                                 ch = '\n';
117                         else if (*c == 'r')
118                                 ch = '\r';
119                         else if (*c == 't')
120                                 ch = '\t';
121                         else if (*c == 'u') {
122                                 int aa, bb, cc, dd;
123                                 uint16_t x;
124
125                                 aa = unhexchar(c[1]);
126                                 if (aa < 0)
127                                         return -EINVAL;
128
129                                 bb = unhexchar(c[2]);
130                                 if (bb < 0)
131                                         return -EINVAL;
132
133                                 cc = unhexchar(c[3]);
134                                 if (cc < 0)
135                                         return -EINVAL;
136
137                                 dd = unhexchar(c[4]);
138                                 if (dd < 0)
139                                         return -EINVAL;
140
141
142                                 x =     ((uint16_t) aa << 12) |
143                                         ((uint16_t) bb << 8) |
144                                         ((uint16_t) cc << 4) |
145                                         ((uint16_t) dd);
146
147                                 if (x <= 0)
148                                         return -EINVAL;
149
150                                 if (!GREEDY_REALLOC(s, allocated, n + 4))
151                                         return -ENOMEM;
152
153                                 n += utf8_encode_unichar(s + n, x);
154                                 c += 5;
155                                 continue;
156                         } else
157                                 return -EINVAL;
158
159                         if (!GREEDY_REALLOC(s, allocated, n + 2))
160                                 return -ENOMEM;
161
162                         s[n++] = ch;
163                         c ++;
164                         continue;
165                 }
166
167                 len = utf8_encoded_valid_unichar(c);
168                 if (len < 0)
169                         return len;
170
171                 if (!GREEDY_REALLOC(s, allocated, n + len + 1))
172                         return -ENOMEM;
173
174                 memcpy(s + n, c, len);
175                 n += len;
176                 c += len;
177         }
178 }
179
180 static int json_parse_number(const char **p, union json_value *ret) {
181         bool negative = false, exponent_negative = false, is_double = false;
182         double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
183         intmax_t i = 0;
184         const char *c;
185
186         assert(p);
187         assert(*p);
188         assert(ret);
189
190         c = *p;
191
192         if (*c == '-') {
193                 negative = true;
194                 c++;
195         }
196
197         if (*c == '0')
198                 c++;
199         else {
200                 if (!strchr("123456789", *c) || *c == 0)
201                         return -EINVAL;
202
203                 do {
204                         if (!is_double) {
205                                 int64_t t;
206
207                                 t = 10 * i + (*c - '0');
208                                 if (t < i) /* overflow */
209                                         is_double = false;
210                                 else
211                                         i = t;
212                         }
213
214                         x = 10.0 * x + (*c - '0');
215                         c++;
216                 } while (strchr("0123456789", *c) && *c != 0);
217         }
218
219         if (*c == '.') {
220                 is_double = true;
221                 c++;
222
223                 if (!strchr("0123456789", *c) || *c == 0)
224                         return -EINVAL;
225
226                 do {
227                         y = 10.0 * y + (*c - '0');
228                         shift = 10.0 * shift;
229                         c++;
230                 } while (strchr("0123456789", *c) && *c != 0);
231         }
232
233         if (*c == 'e' || *c == 'E') {
234                 is_double = true;
235                 c++;
236
237                 if (*c == '-') {
238                         exponent_negative = true;
239                         c++;
240                 } else if (*c == '+')
241                         c++;
242
243                 if (!strchr("0123456789", *c) || *c == 0)
244                         return -EINVAL;
245
246                 do {
247                         exponent = 10.0 * exponent + (*c - '0');
248                         c++;
249                 } while (strchr("0123456789", *c) && *c != 0);
250         }
251
252         if (*c != 0)
253                 return -EINVAL;
254
255         *p = c;
256
257         if (is_double) {
258                 ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent);
259                 return JSON_REAL;
260         } else {
261                 ret->integer = negative ? -i : i;
262                 return JSON_INTEGER;
263         }
264 }
265
266 int json_tokenize(
267                 const char **p,
268                 char **ret_string,
269                 union json_value *ret_value,
270                 void **state,
271                 unsigned *line) {
272
273         const char *c;
274         int t;
275         int r;
276
277         assert(p);
278         assert(*p);
279         assert(ret_string);
280         assert(ret_value);
281         assert(state);
282
283         t = PTR_TO_INT(*state);
284         c = *p;
285
286         if (t == STATE_NULL) {
287                 if (line)
288                         *line = 1;
289                 t = STATE_VALUE;
290         }
291
292         for (;;) {
293                 const char *b;
294
295                 b = c + strspn(c, WHITESPACE);
296                 if (*b == 0)
297                         return JSON_END;
298
299                 inc_lines(line, c, b - c);
300                 c = b;
301
302                 switch (t) {
303
304                 case STATE_VALUE:
305
306                         if (*c == '{') {
307                                 *ret_string = NULL;
308                                 *ret_value = JSON_VALUE_NULL;
309                                 *p = c + 1;
310                                 *state = INT_TO_PTR(STATE_VALUE);
311                                 return JSON_OBJECT_OPEN;
312
313                         } else if (*c == '}') {
314                                 *ret_string = NULL;
315                                 *ret_value = JSON_VALUE_NULL;
316                                 *p = c + 1;
317                                 *state = INT_TO_PTR(STATE_VALUE_POST);
318                                 return JSON_OBJECT_CLOSE;
319
320                         } else if (*c == '[') {
321                                 *ret_string = NULL;
322                                 *ret_value = JSON_VALUE_NULL;
323                                 *p = c + 1;
324                                 *state = INT_TO_PTR(STATE_VALUE);
325                                 return JSON_ARRAY_OPEN;
326
327                         } else if (*c == ']') {
328                                 *ret_string = NULL;
329                                 *ret_value = JSON_VALUE_NULL;
330                                 *p = c + 1;
331                                 *state = INT_TO_PTR(STATE_VALUE_POST);
332                                 return JSON_ARRAY_CLOSE;
333
334                         } else if (*c == '"') {
335                                 r = json_parse_string(&c, ret_string);
336                                 if (r < 0)
337                                         return r;
338
339                                 *ret_value = JSON_VALUE_NULL;
340                                 *p = c;
341                                 *state = INT_TO_PTR(STATE_VALUE_POST);
342                                 return r;
343
344                         } else if (strchr("-0123456789", *c)) {
345                                 r = json_parse_number(&c, ret_value);
346                                 if (r < 0)
347                                         return r;
348
349                                 *ret_string = NULL;
350                                 *p = c;
351                                 *state = INT_TO_PTR(STATE_VALUE_POST);
352                                 return r;
353
354                         } else if (startswith(c, "true")) {
355                                 *ret_string = NULL;
356                                 ret_value->boolean = true;
357                                 *p = c + 4;
358                                 *state = INT_TO_PTR(STATE_VALUE_POST);
359                                 return JSON_BOOLEAN;
360
361                         } else if (startswith(c, "false")) {
362                                 *ret_string = NULL;
363                                 ret_value->boolean = false;
364                                 *p = c + 5;
365                                 *state = INT_TO_PTR(STATE_VALUE_POST);
366                                 return JSON_BOOLEAN;
367
368                         } else if (startswith(c, "null")) {
369                                 *ret_string = NULL;
370                                 *ret_value = JSON_VALUE_NULL;
371                                 *p = c + 4;
372                                 *state = INT_TO_PTR(STATE_VALUE_POST);
373                                 return JSON_NULL;
374
375                         } else
376                                 return -EINVAL;
377
378                 case STATE_VALUE_POST:
379
380                         if (*c == ':') {
381                                 *ret_string = NULL;
382                                 *ret_value = JSON_VALUE_NULL;
383                                 *p = c + 1;
384                                 *state = INT_TO_PTR(STATE_VALUE);
385                                 return JSON_COLON;
386                         } else if (*c == ',') {
387                                 *ret_string = NULL;
388                                 *ret_value = JSON_VALUE_NULL;
389                                 *p = c + 1;
390                                 *state = INT_TO_PTR(STATE_VALUE);
391                                 return JSON_COMMA;
392                         } else if (*c == '}') {
393                                 *ret_string = NULL;
394                                 *ret_value = JSON_VALUE_NULL;
395                                 *p = c + 1;
396                                 *state = INT_TO_PTR(STATE_VALUE_POST);
397                                 return JSON_OBJECT_CLOSE;
398                         } else if (*c == ']') {
399                                 *ret_string = NULL;
400                                 *ret_value = JSON_VALUE_NULL;
401                                 *p = c + 1;
402                                 *state = INT_TO_PTR(STATE_VALUE_POST);
403                                 return JSON_ARRAY_CLOSE;
404                         } else
405                                 return -EINVAL;
406                 }
407
408         }
409 }