chiark / gitweb /
Prep v228: Add remaining updates from upstream (1/3)
[elogind.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "extract-word.h"
25 #include "string-util.h"
26 #include "utf8.h"
27 #include "util.h"
28
29 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30         _cleanup_free_ char *s = NULL;
31         size_t allocated = 0, sz = 0;
32         char c;
33         int r;
34
35         char quote = 0;                 /* 0 or ' or " */
36         bool backslash = false;         /* whether we've just seen a backslash */
37
38         assert(p);
39         assert(ret);
40
41         /* Bail early if called after last value or with no input */
42         if (!*p)
43                 goto finish_force_terminate;
44         c = **p;
45
46         if (!separators)
47                 separators = WHITESPACE;
48
49         /* Parses the first word of a string, and returns it in
50          * *ret. Removes all quotes in the process. When parsing fails
51          * (because of an uneven number of quotes or similar), leaves
52          * the pointer *p at the first invalid character. */
53
54         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
55                 if (!GREEDY_REALLOC(s, allocated, sz+1))
56                         return -ENOMEM;
57
58         for (;; (*p) ++, c = **p) {
59                 if (c == 0)
60                         goto finish_force_terminate;
61                 else if (strchr(separators, c)) {
62                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
63                                 (*p) ++;
64                                 goto finish_force_next;
65                         }
66                 } else {
67                         /* We found a non-blank character, so we will always
68                          * want to return a string (even if it is empty),
69                          * allocate it here. */
70                         if (!GREEDY_REALLOC(s, allocated, sz+1))
71                                 return -ENOMEM;
72                         break;
73                 }
74         }
75
76         for (;; (*p) ++, c = **p) {
77                 if (backslash) {
78                         if (!GREEDY_REALLOC(s, allocated, sz+7))
79                                 return -ENOMEM;
80
81                         if (c == 0) {
82                                 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
83                                     (!quote || flags & EXTRACT_RELAX)) {
84                                         /* If we find an unquoted trailing backslash and we're in
85                                          * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
86                                          * output.
87                                          *
88                                          * Unbalanced quotes will only be allowed in EXTRACT_RELAX
89                                          * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
90                                          */
91                                         s[sz++] = '\\';
92                                         goto finish_force_terminate;
93                                 }
94                                 if (flags & EXTRACT_RELAX)
95                                         goto finish_force_terminate;
96                                 return -EINVAL;
97                         }
98
99                         if (flags & EXTRACT_CUNESCAPE) {
100                                 uint32_t u;
101
102                                 r = cunescape_one(*p, (size_t) -1, &c, &u);
103                                 if (r < 0) {
104                                         if (flags & EXTRACT_CUNESCAPE_RELAX) {
105                                                 s[sz++] = '\\';
106                                                 s[sz++] = c;
107                                         } else
108                                                 return -EINVAL;
109                                 } else {
110                                         (*p) += r - 1;
111
112                                         if (c != 0)
113                                                 s[sz++] = c; /* normal explicit char */
114                                         else
115                                                 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
116                                 }
117                         } else
118                                 s[sz++] = c;
119
120                         backslash = false;
121
122                 } else if (quote) {     /* inside either single or double quotes */
123                         for (;; (*p) ++, c = **p) {
124                                 if (c == 0) {
125                                         if (flags & EXTRACT_RELAX)
126                                                 goto finish_force_terminate;
127                                         return -EINVAL;
128                                 } else if (c == quote) {        /* found the end quote */
129                                         quote = 0;
130                                         break;
131                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
132                                         backslash = true;
133                                         break;
134                                 } else {
135                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
136                                                 return -ENOMEM;
137
138                                         s[sz++] = c;
139                                 }
140                         }
141
142                 } else {
143                         for (;; (*p) ++, c = **p) {
144                                 if (c == 0)
145                                         goto finish_force_terminate;
146                                 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
147                                         quote = c;
148                                         break;
149                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
150                                         backslash = true;
151                                         break;
152                                 } else if (strchr(separators, c)) {
153                                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
154                                                 (*p) ++;
155                                                 goto finish_force_next;
156                                         }
157                                         /* Skip additional coalesced separators. */
158                                         for (;; (*p) ++, c = **p) {
159                                                 if (c == 0)
160                                                         goto finish_force_terminate;
161                                                 if (!strchr(separators, c))
162                                                         break;
163                                         }
164                                         goto finish;
165
166                                 } else {
167                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
168                                                 return -ENOMEM;
169
170                                         s[sz++] = c;
171                                 }
172                         }
173                 }
174         }
175
176 finish_force_terminate:
177         *p = NULL;
178 finish:
179         if (!s) {
180                 *p = NULL;
181                 *ret = NULL;
182                 return 0;
183         }
184
185 finish_force_next:
186         s[sz] = 0;
187         *ret = s;
188         s = NULL;
189
190         return 1;
191 }
192
193 /// UNNEEDED by elogind
194 #if 0
195 int extract_first_word_and_warn(
196                 const char **p,
197                 char **ret,
198                 const char *separators,
199                 ExtractFlags flags,
200                 const char *unit,
201                 const char *filename,
202                 unsigned line,
203                 const char *rvalue) {
204
205         /* Try to unquote it, if it fails, warn about it and try again
206          * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
207          * backslashes verbatim in invalid escape sequences. */
208
209         const char *save;
210         int r;
211
212         save = *p;
213         r = extract_first_word(p, ret, separators, flags);
214         if (r >= 0)
215                 return r;
216
217         if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
218
219                 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
220                 *p = save;
221                 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
222                 if (r >= 0) {
223                         /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
224                         log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
225                         return r;
226                 }
227
228                 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
229                 if (r == -EINVAL)
230                         return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
231         }
232
233         /* Can be any error, report it */
234         return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
235 }
236
237 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
238         va_list ap;
239         char **l;
240         int n = 0, i, c, r;
241
242         /* Parses a number of words from a string, stripping any
243          * quotes if necessary. */
244
245         assert(p);
246
247         /* Count how many words are expected */
248         va_start(ap, flags);
249         for (;;) {
250                 if (!va_arg(ap, char **))
251                         break;
252                 n++;
253         }
254         va_end(ap);
255
256         if (n <= 0)
257                 return 0;
258
259         /* Read all words into a temporary array */
260         l = newa0(char*, n);
261         for (c = 0; c < n; c++) {
262
263                 r = extract_first_word(p, &l[c], separators, flags);
264                 if (r < 0) {
265                         int j;
266
267                         for (j = 0; j < c; j++)
268                                 free(l[j]);
269
270                         return r;
271                 }
272
273                 if (r == 0)
274                         break;
275         }
276
277         /* If we managed to parse all words, return them in the passed
278          * in parameters */
279         va_start(ap, flags);
280         for (i = 0; i < n; i++) {
281                 char **v;
282
283                 v = va_arg(ap, char **);
284                 assert(v);
285
286                 *v = l[i];
287         }
288         va_end(ap);
289
290         return c;
291 }
292 #endif // 0