chiark / gitweb /
basic: modernize conf-files.c a bit
[elogind.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "extract-word.h"
25 #include "string-util.h"
26 #include "utf8.h"
27 #include "util.h"
28
29 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30         _cleanup_free_ char *s = NULL;
31         size_t allocated = 0, sz = 0;
32         char c;
33         int r;
34
35         char quote = 0;                 /* 0 or ' or " */
36         bool backslash = false;         /* whether we've just seen a backslash */
37
38         assert(p);
39         assert(ret);
40
41         /* Bail early if called after last value or with no input */
42         if (!*p)
43                 goto finish_force_terminate;
44         c = **p;
45
46         if (!separators)
47                 separators = WHITESPACE;
48
49         /* Parses the first word of a string, and returns it in
50          * *ret. Removes all quotes in the process. When parsing fails
51          * (because of an uneven number of quotes or similar), leaves
52          * the pointer *p at the first invalid character. */
53
54         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
55                 if (!GREEDY_REALLOC(s, allocated, sz+1))
56                         return -ENOMEM;
57
58         for (;; (*p) ++, c = **p) {
59                 if (c == 0)
60                         goto finish_force_terminate;
61                 else if (strchr(separators, c)) {
62                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
63                                 (*p) ++;
64                                 goto finish_force_next;
65                         }
66                 } else {
67                         /* We found a non-blank character, so we will always
68                          * want to return a string (even if it is empty),
69                          * allocate it here. */
70                         if (!GREEDY_REALLOC(s, allocated, sz+1))
71                                 return -ENOMEM;
72                         break;
73                 }
74         }
75
76         for (;; (*p) ++, c = **p) {
77                 if (backslash) {
78                         if (!GREEDY_REALLOC(s, allocated, sz+7))
79                                 return -ENOMEM;
80
81                         if (c == 0) {
82                                 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
83                                     (!quote || flags & EXTRACT_RELAX)) {
84                                         /* If we find an unquoted trailing backslash and we're in
85                                          * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
86                                          * output.
87                                          *
88                                          * Unbalanced quotes will only be allowed in EXTRACT_RELAX
89                                          * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
90                                          */
91                                         s[sz++] = '\\';
92                                         goto finish_force_terminate;
93                                 }
94                                 if (flags & EXTRACT_RELAX)
95                                         goto finish_force_terminate;
96                                 return -EINVAL;
97                         }
98
99                         if (flags & EXTRACT_CUNESCAPE) {
100                                 uint32_t u;
101
102                                 r = cunescape_one(*p, (size_t) -1, &c, &u);
103                                 if (r < 0) {
104                                         if (flags & EXTRACT_CUNESCAPE_RELAX) {
105                                                 s[sz++] = '\\';
106                                                 s[sz++] = c;
107                                         } else
108                                                 return -EINVAL;
109                                 } else {
110                                         (*p) += r - 1;
111
112                                         if (c != 0)
113                                                 s[sz++] = c; /* normal explicit char */
114                                         else
115                                                 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
116                                 }
117                         } else
118                                 s[sz++] = c;
119
120                         backslash = false;
121
122                 } else if (quote) {     /* inside either single or double quotes */
123                         for (;; (*p) ++, c = **p) {
124                                 if (c == 0) {
125                                         if (flags & EXTRACT_RELAX)
126                                                 goto finish_force_terminate;
127                                         return -EINVAL;
128                                 } else if (c == quote) {        /* found the end quote */
129                                         quote = 0;
130                                         break;
131                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
132                                         backslash = true;
133                                         break;
134                                 } else {
135                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
136                                                 return -ENOMEM;
137
138                                         s[sz++] = c;
139                                 }
140                         }
141
142                 } else {
143                         for (;; (*p) ++, c = **p) {
144                                 if (c == 0)
145                                         goto finish_force_terminate;
146                                 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
147                                         quote = c;
148                                         break;
149                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
150                                         backslash = true;
151                                         break;
152                                 } else if (strchr(separators, c)) {
153                                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
154                                                 (*p) ++;
155                                                 goto finish_force_next;
156                                         }
157                                         /* Skip additional coalesced separators. */
158                                         for (;; (*p) ++, c = **p) {
159                                                 if (c == 0)
160                                                         goto finish_force_terminate;
161                                                 if (!strchr(separators, c))
162                                                         break;
163                                         }
164                                         goto finish;
165
166                                 } else {
167                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
168                                                 return -ENOMEM;
169
170                                         s[sz++] = c;
171                                 }
172                         }
173                 }
174         }
175
176 finish_force_terminate:
177         *p = NULL;
178 finish:
179         if (!s) {
180                 *p = NULL;
181                 *ret = NULL;
182                 return 0;
183         }
184
185 finish_force_next:
186         s[sz] = 0;
187         *ret = s;
188         s = NULL;
189
190         return 1;
191 }
192
193 #if 0 /// UNNEEDED by elogind
194 int extract_first_word_and_warn(
195                 const char **p,
196                 char **ret,
197                 const char *separators,
198                 ExtractFlags flags,
199                 const char *unit,
200                 const char *filename,
201                 unsigned line,
202                 const char *rvalue) {
203
204         /* Try to unquote it, if it fails, warn about it and try again
205          * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
206          * backslashes verbatim in invalid escape sequences. */
207
208         const char *save;
209         int r;
210
211         save = *p;
212         r = extract_first_word(p, ret, separators, flags);
213         if (r >= 0)
214                 return r;
215
216         if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
217
218                 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
219                 *p = save;
220                 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
221                 if (r >= 0) {
222                         /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
223                         log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
224                         return r;
225                 }
226
227                 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
228                 if (r == -EINVAL)
229                         return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
230         }
231
232         /* Can be any error, report it */
233         return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
234 }
235
236 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
237         va_list ap;
238         char **l;
239         int n = 0, i, c, r;
240
241         /* Parses a number of words from a string, stripping any
242          * quotes if necessary. */
243
244         assert(p);
245
246         /* Count how many words are expected */
247         va_start(ap, flags);
248         for (;;) {
249                 if (!va_arg(ap, char **))
250                         break;
251                 n++;
252         }
253         va_end(ap);
254
255         if (n <= 0)
256                 return 0;
257
258         /* Read all words into a temporary array */
259         l = newa0(char*, n);
260         for (c = 0; c < n; c++) {
261
262                 r = extract_first_word(p, &l[c], separators, flags);
263                 if (r < 0) {
264                         int j;
265
266                         for (j = 0; j < c; j++)
267                                 free(l[j]);
268
269                         return r;
270                 }
271
272                 if (r == 0)
273                         break;
274         }
275
276         /* If we managed to parse all words, return them in the passed
277          * in parameters */
278         va_start(ap, flags);
279         for (i = 0; i < n; i++) {
280                 char **v;
281
282                 v = va_arg(ap, char **);
283                 assert(v);
284
285                 *v = l[i];
286         }
287         va_end(ap);
288
289         return c;
290 }
291 #endif // 0