chiark / gitweb /
New quoted string handling and simple pattern matching.
[mLib] / str.c
CommitLineData
081e6815 1/* -*-c-*-
2 *
efae42a6 3 * $Id: str.c,v 1.4 2000/10/08 09:43:34 mdw Exp $
081e6815 4 *
5 * Functions for hacking with strings
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of the mLib utilities library.
13 *
14 * mLib is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * mLib is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with mLib; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: str.c,v $
efae42a6 33 * Revision 1.4 2000/10/08 09:43:34 mdw
34 * New quoted string handling and simple pattern matching.
35 *
48d198f1 36 * Revision 1.3 1999/12/22 15:41:14 mdw
37 * Skip past trailing whitespace in str_getword.
38 *
f3a542e8 39 * Revision 1.2 1999/05/26 20:52:57 mdw
40 * Add new `rest' argument for `str_split'.
41 *
081e6815 42 * Revision 1.1 1999/05/17 20:37:01 mdw
43 * Some trivial string hacks.
44 *
45 */
46
47/*----- Header files ------------------------------------------------------*/
48
49#include <ctype.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53
54#include "str.h"
55
56/*----- Main code ---------------------------------------------------------*/
57
efae42a6 58/* --- @str_qword@ --- *
081e6815 59 *
60 * Arguments: @char **pp@ = address of pointer into string
efae42a6 61 * @unsigned f@ = various flags
081e6815 62 *
efae42a6 63 * Returns: Pointer to the next space-separated possibly-quoted word from
64 * the string, or null.
081e6815 65 *
efae42a6 66 * Use: Fetches the next word from a string. If the flag
67 * @STRF_QUOTE@ is set, the `\' character acts as an escape, and
68 * single and double quotes protect whitespace.
081e6815 69 */
70
efae42a6 71#define STRF_QUOTE 1u
72
73char *str_qword(char **pp, unsigned f)
081e6815 74{
efae42a6 75 char *p = *pp, *q, *qq;
76 int st = 0, pst = 0;
77
78 /* --- Preliminaries --- */
081e6815 79
80 if (!p)
81 return (0);
081e6815 82 while (isspace((unsigned char)*p))
83 p++;
efae42a6 84 if (!*p) {
85 *pp = 0;
86 return (0);
87 }
88
89 /* --- Main work --- */
081e6815 90
efae42a6 91 for (q = qq = p; *q; q++) {
92 switch (st) {
93 case '\\':
94 *qq++ = *q;
95 st = pst;
96 break;
97 case '\'':
98 case '\"':
99 if (*q == st)
100 st = pst = 0;
101 else if (*q == '\\')
102 st = '\\';
103 else
104 *qq++ = *q;
105 break;
106 default:
107 if (isspace((unsigned char)*q)) {
108 do q++; while (*q && isspace((unsigned char)*q));
109 goto done;
110 } else if (!(f & STRF_QUOTE))
111 goto stdchar;
112 switch (*q) {
113 case '\\':
114 st = '\\';
115 break;
116 case '\'':
117 case '\"':
118 st = pst = *q;
119 break;
120 default:
121 stdchar:
122 *qq++ = *q;
123 break;
124 }
081e6815 125 }
126 }
127
efae42a6 128 /* --- Finished --- */
129
130done:
131 *pp = *q ? q : 0;
132 *qq++ = 0;
081e6815 133 return (p);
134}
135
efae42a6 136/* --- @str_qsplit@ --- *
081e6815 137 *
138 * Arguments: @char *p@ = pointer to string
139 * @char *v[]@ = pointer to array to fill in
140 * @size_t c@ = count of strings to fill in
f3a542e8 141 * @char **rest@ = where to store the remainder of the string
efae42a6 142 * @unsigned f@ = flags for @str_qword@
081e6815 143 *
144 * Returns: Number of strings filled in.
145 *
146 * Use: Fills an array with pointers to the individual words of a
147 * string. The string is modified in place to contain zero
148 * bytes at the word boundaries, and the words have leading
149 * and trailing space stripped off. No more than @c@ words
150 * are read; the actual number is returned as the value of the
151 * function. Unused slots in the array are populated with
f3a542e8 152 * null bytes. If there's any string left, the address of the
153 * remainder is stored in @rest@ (if it's non-null); otherwise
154 * @rest@ is set to a null pointer.
081e6815 155 */
156
efae42a6 157size_t str_qsplit(char *p, char *v[], size_t c, char **rest, unsigned f)
081e6815 158{
159 size_t n = 0;
160 char *q;
161
efae42a6 162 while (c && (q = str_qword(&p, f)) != 0) {
081e6815 163 *v++ = q;
164 c--;
165 n++;
166 }
081e6815 167 while (c) {
168 *v++ = 0;
169 c--;
170 }
48d198f1 171 if (rest)
172 *rest = p;
081e6815 173 return (n);
174}
175
efae42a6 176/* --- @str_getword@ --- *
177 *
178 * Arguments: @char **pp@ = address of pointer into string
179 *
180 * Returns: Pointer to the next space-separated word from the string,
181 * or null.
182 *
183 * Use: Parses off space-separated words from a string. This is a
184 * compatibility veneer over @str_qword@.
185 */
186
187char *str_getword(char **pp)
188{
189 return (str_qword(pp, 0));
190}
191
192/* --- @str_split@ --- *
193 *
194 * Arguments: @char *p@ = pointer to string
195 * @char *v[]@ = pointer to array to fill in
196 * @size_t c@ = count of strings to fill in
197 * @char **rest@ = where to store the remainder of the string
198 *
199 * Returns: Number of strings filled in.
200 *
201 * Use: Fills an array with pointers to the individual words of a
202 * string. This is a compatibility veneer over @str_qsplit@.
203 */
204
205size_t str_split(char *p, char *v[], size_t c, char **rest)
206{
207 return (str_qsplit(p, v, c, rest, 0));
208}
209
210/* --- @str_match@ --- *
211 *
212 * Arguments: @const char *p@ = pointer to pattern string
213 * @const char *s@ = string to compare with
214 *
215 * Returns: Nonzero if the pattern matches the string.
216 *
217 * Use: Does simple wildcard matching. This is quite nasty and more
218 * than a little slow. Supports metacharacters `*', `?' and
219 * '['.
220 */
221
222int str_match(const char *p, const char *s)
223{
224 for (;;) {
225 char pch = *p++, pche, sch;
226 int sense;
227
228 switch (pch) {
229 case '?':
230 if (!*s)
231 return (0);
232 s++;
233 break;
234 case '*':
235 if (!*p)
236 return (1);
237 while (*s) {
238 if (str_match(p, s))
239 return (1);
240 s++;
241 }
242 return (0);
243 case '[':
244 if (!*s)
245 return (0);
246 sch = *s++;
247 pch = *p++;
248 sense = 1;
249 if (pch == '^' || pch == '!') {
250 sense = !sense;
251 pch = *p++;
252 }
253 if (pch == ']') {
254 if (*p == '-' && p[1] && p[1] != ']') {
255 pche = p[1];
256 p += 2;
257 if (pch <= sch && sch <= pche)
258 goto class_match;
259 } else if (pch == sch)
260 goto class_match;
261 pch = *p++;
262 }
263 for (;; pch = *p++) {
264 if (!pch || pch == ']')
265 goto class_nomatch;
266 if (*p == '-' && p[1] && p[1] != ']') {
267 pche = p[1];
268 p += 2;
269 if (pch <= sch && sch <= pche)
270 goto class_match;
271 } else if (pch == sch)
272 goto class_match;
273 }
274 class_match:
275 if (!sense)
276 return (0);
277 for (;;) {
278 pch = *p++;
279 if (!pch)
280 return (0);
281 if (pch == ']')
282 break;
283 if (*p == '-' && p[1] && p[1] != ']')
284 p += 2;
285 }
286 break;
287 class_nomatch:
288 if (sense)
289 return (0);
290 break;
291 case '\\':
292 pch = *p++;
293 default:
294 if (pch != *s)
295 return (0);
296 if (!pch)
297 return (1);
298 s++;
299 break;
300 }
301 }
302}
303
081e6815 304/* --- @str_sanitize@ --- *
305 *
306 * Arguments: @char *d@ = destination buffer
307 * @const char *p@ = pointer to source string
308 * @size_t sz@ = size of destination buffer
309 *
310 * Returns: ---
311 *
312 * Use: Writes a string into a buffer, being careful not to overflow
313 * the buffer, to null terminate the result, and to prevent
314 * nasty nonprintable characters ending up in the buffer.
315 */
316
317void str_sanitize(char *d, const char *p, size_t sz)
318{
319 if (!sz)
320 return;
321 sz--;
322 while (*p && sz) {
323 int ch = *p++;
324 if (!isgraph((unsigned char)ch))
325 ch = '_';
326 *d++ = ch;
327 sz--;
328 }
329 *d++ = 0;
330}
331
332/*----- That's all, folks -------------------------------------------------*/