chiark / gitweb /
de56168909c9405d4e66c14965527ffad91befd3
[elogind.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "extract-word.h"
25 #include "string-util.h"
26 #include "utf8.h"
27 #include "util.h"
28
29 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30         _cleanup_free_ char *s = NULL;
31         size_t allocated = 0, sz = 0;
32         char c;
33         int r;
34
35         char quote = 0;                 /* 0 or ' or " */
36         bool backslash = false;         /* whether we've just seen a backslash */
37
38         assert(p);
39         assert(ret);
40
41         /* Bail early if called after last value or with no input */
42         if (!*p)
43                 goto finish_force_terminate;
44         c = **p;
45
46         if (!separators)
47                 separators = WHITESPACE;
48
49         /* Parses the first word of a string, and returns it in
50          * *ret. Removes all quotes in the process. When parsing fails
51          * (because of an uneven number of quotes or similar), leaves
52          * the pointer *p at the first invalid character. */
53
54         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
55                 if (!GREEDY_REALLOC(s, allocated, sz+1))
56                         return -ENOMEM;
57
58         for (;; (*p) ++, c = **p) {
59                 if (c == 0)
60                         goto finish_force_terminate;
61                 else if (strchr(separators, c)) {
62                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
63                                 (*p) ++;
64                                 goto finish_force_next;
65                         }
66                 } else {
67                         /* We found a non-blank character, so we will always
68                          * want to return a string (even if it is empty),
69                          * allocate it here. */
70                         if (!GREEDY_REALLOC(s, allocated, sz+1))
71                                 return -ENOMEM;
72                         break;
73                 }
74         }
75
76         for (;; (*p) ++, c = **p) {
77                 if (backslash) {
78                         if (!GREEDY_REALLOC(s, allocated, sz+7))
79                                 return -ENOMEM;
80
81                         if (c == 0) {
82                                 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
83                                     (!quote || flags & EXTRACT_RELAX)) {
84                                         /* If we find an unquoted trailing backslash and we're in
85                                          * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
86                                          * output.
87                                          *
88                                          * Unbalanced quotes will only be allowed in EXTRACT_RELAX
89                                          * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
90                                          */
91                                         s[sz++] = '\\';
92                                         goto finish_force_terminate;
93                                 }
94                                 if (flags & EXTRACT_RELAX)
95                                         goto finish_force_terminate;
96                                 return -EINVAL;
97                         }
98
99                         if (flags & EXTRACT_CUNESCAPE) {
100                                 uint32_t u;
101                                 bool eight_bit = false;
102
103                                 r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
104                                 if (r < 0) {
105                                         if (flags & EXTRACT_CUNESCAPE_RELAX) {
106                                                 s[sz++] = '\\';
107                                                 s[sz++] = c;
108                                         } else
109                                                 return -EINVAL;
110                                 } else {
111                                         (*p) += r - 1;
112
113                                         if (eight_bit)
114                                                 s[sz++] = u;
115                                         else
116                                                 sz += utf8_encode_unichar(s + sz, u);
117                                 }
118                         } else
119                                 s[sz++] = c;
120
121                         backslash = false;
122
123                 } else if (quote) {     /* inside either single or double quotes */
124                         for (;; (*p) ++, c = **p) {
125                                 if (c == 0) {
126                                         if (flags & EXTRACT_RELAX)
127                                                 goto finish_force_terminate;
128                                         return -EINVAL;
129                                 } else if (c == quote) {        /* found the end quote */
130                                         quote = 0;
131                                         break;
132                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
133                                         backslash = true;
134                                         break;
135                                 } else {
136                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
137                                                 return -ENOMEM;
138
139                                         s[sz++] = c;
140                                 }
141                         }
142
143                 } else {
144                         for (;; (*p) ++, c = **p) {
145                                 if (c == 0)
146                                         goto finish_force_terminate;
147                                 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
148                                         quote = c;
149                                         break;
150                                 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
151                                         backslash = true;
152                                         break;
153                                 } else if (strchr(separators, c)) {
154                                         if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
155                                                 (*p) ++;
156                                                 goto finish_force_next;
157                                         }
158                                         /* Skip additional coalesced separators. */
159                                         for (;; (*p) ++, c = **p) {
160                                                 if (c == 0)
161                                                         goto finish_force_terminate;
162                                                 if (!strchr(separators, c))
163                                                         break;
164                                         }
165                                         goto finish;
166
167                                 } else {
168                                         if (!GREEDY_REALLOC(s, allocated, sz+2))
169                                                 return -ENOMEM;
170
171                                         s[sz++] = c;
172                                 }
173                         }
174                 }
175         }
176
177 finish_force_terminate:
178         *p = NULL;
179 finish:
180         if (!s) {
181                 *p = NULL;
182                 *ret = NULL;
183                 return 0;
184         }
185
186 finish_force_next:
187         s[sz] = 0;
188         *ret = s;
189         s = NULL;
190
191         return 1;
192 }
193
194 #if 0 /// UNNEEDED by elogind
195 int extract_first_word_and_warn(
196                 const char **p,
197                 char **ret,
198                 const char *separators,
199                 ExtractFlags flags,
200                 const char *unit,
201                 const char *filename,
202                 unsigned line,
203                 const char *rvalue) {
204
205         /* Try to unquote it, if it fails, warn about it and try again
206          * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
207          * backslashes verbatim in invalid escape sequences. */
208
209         const char *save;
210         int r;
211
212         save = *p;
213         r = extract_first_word(p, ret, separators, flags);
214         if (r >= 0)
215                 return r;
216
217         if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
218
219                 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
220                 *p = save;
221                 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
222                 if (r >= 0) {
223                         /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
224                         log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
225                         return r;
226                 }
227
228                 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
229                 if (r == -EINVAL)
230                         return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
231         }
232
233         /* Can be any error, report it */
234         return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
235 }
236
237 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
238         va_list ap;
239         char **l;
240         int n = 0, i, c, r;
241
242         /* Parses a number of words from a string, stripping any
243          * quotes if necessary. */
244
245         assert(p);
246
247         /* Count how many words are expected */
248         va_start(ap, flags);
249         for (;;) {
250                 if (!va_arg(ap, char **))
251                         break;
252                 n++;
253         }
254         va_end(ap);
255
256         if (n <= 0)
257                 return 0;
258
259         /* Read all words into a temporary array */
260         l = newa0(char*, n);
261         for (c = 0; c < n; c++) {
262
263                 r = extract_first_word(p, &l[c], separators, flags);
264                 if (r < 0) {
265                         int j;
266
267                         for (j = 0; j < c; j++)
268                                 free(l[j]);
269
270                         return r;
271                 }
272
273                 if (r == 0)
274                         break;
275         }
276
277         /* If we managed to parse all words, return them in the passed
278          * in parameters */
279         va_start(ap, flags);
280         for (i = 0; i < n; i++) {
281                 char **v;
282
283                 v = va_arg(ap, char **);
284                 assert(v);
285
286                 *v = l[i];
287         }
288         va_end(ap);
289
290         return c;
291 }
292 #endif // 0