chiark / gitweb /
Actually implement the right transformation!
[mLib] / str.c
CommitLineData
081e6815 1/* -*-c-*-
2 *
8656dc50 3 * $Id: str.c,v 1.6 2004/04/08 01:36:13 mdw Exp $
081e6815 4 *
5 * Functions for hacking with strings
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of the mLib utilities library.
13 *
14 * mLib is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * mLib is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with mLib; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
081e6815 30/*----- Header files ------------------------------------------------------*/
31
32#include <ctype.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36
37#include "str.h"
38
39/*----- Main code ---------------------------------------------------------*/
40
efae42a6 41/* --- @str_qword@ --- *
081e6815 42 *
43 * Arguments: @char **pp@ = address of pointer into string
efae42a6 44 * @unsigned f@ = various flags
081e6815 45 *
efae42a6 46 * Returns: Pointer to the next space-separated possibly-quoted word from
47 * the string, or null.
081e6815 48 *
efae42a6 49 * Use: Fetches the next word from a string. If the flag
50 * @STRF_QUOTE@ is set, the `\' character acts as an escape, and
51 * single and double quotes protect whitespace.
081e6815 52 */
53
efae42a6 54char *str_qword(char **pp, unsigned f)
081e6815 55{
efae42a6 56 char *p = *pp, *q, *qq;
57 int st = 0, pst = 0;
58
59 /* --- Preliminaries --- */
081e6815 60
61 if (!p)
62 return (0);
081e6815 63 while (isspace((unsigned char)*p))
64 p++;
efae42a6 65 if (!*p) {
66 *pp = 0;
67 return (0);
68 }
69
70 /* --- Main work --- */
081e6815 71
efae42a6 72 for (q = qq = p; *q; q++) {
73 switch (st) {
74 case '\\':
75 *qq++ = *q;
76 st = pst;
77 break;
78 case '\'':
79 case '\"':
80 if (*q == st)
81 st = pst = 0;
82 else if (*q == '\\')
83 st = '\\';
84 else
85 *qq++ = *q;
86 break;
87 default:
88 if (isspace((unsigned char)*q)) {
89 do q++; while (*q && isspace((unsigned char)*q));
90 goto done;
91 } else if (!(f & STRF_QUOTE))
92 goto stdchar;
93 switch (*q) {
94 case '\\':
95 st = '\\';
96 break;
97 case '\'':
98 case '\"':
99 st = pst = *q;
100 break;
101 default:
102 stdchar:
103 *qq++ = *q;
104 break;
105 }
081e6815 106 }
107 }
108
efae42a6 109 /* --- Finished --- */
110
111done:
112 *pp = *q ? q : 0;
113 *qq++ = 0;
081e6815 114 return (p);
115}
116
efae42a6 117/* --- @str_qsplit@ --- *
081e6815 118 *
119 * Arguments: @char *p@ = pointer to string
120 * @char *v[]@ = pointer to array to fill in
121 * @size_t c@ = count of strings to fill in
f3a542e8 122 * @char **rest@ = where to store the remainder of the string
efae42a6 123 * @unsigned f@ = flags for @str_qword@
081e6815 124 *
125 * Returns: Number of strings filled in.
126 *
127 * Use: Fills an array with pointers to the individual words of a
128 * string. The string is modified in place to contain zero
129 * bytes at the word boundaries, and the words have leading
130 * and trailing space stripped off. No more than @c@ words
131 * are read; the actual number is returned as the value of the
132 * function. Unused slots in the array are populated with
f3a542e8 133 * null bytes. If there's any string left, the address of the
134 * remainder is stored in @rest@ (if it's non-null); otherwise
135 * @rest@ is set to a null pointer.
081e6815 136 */
137
efae42a6 138size_t str_qsplit(char *p, char *v[], size_t c, char **rest, unsigned f)
081e6815 139{
140 size_t n = 0;
141 char *q;
142
efae42a6 143 while (c && (q = str_qword(&p, f)) != 0) {
081e6815 144 *v++ = q;
145 c--;
146 n++;
147 }
081e6815 148 while (c) {
149 *v++ = 0;
150 c--;
151 }
48d198f1 152 if (rest)
153 *rest = p;
081e6815 154 return (n);
155}
156
efae42a6 157/* --- @str_getword@ --- *
158 *
159 * Arguments: @char **pp@ = address of pointer into string
160 *
161 * Returns: Pointer to the next space-separated word from the string,
162 * or null.
163 *
164 * Use: Parses off space-separated words from a string. This is a
165 * compatibility veneer over @str_qword@.
166 */
167
168char *str_getword(char **pp)
169{
170 return (str_qword(pp, 0));
171}
172
173/* --- @str_split@ --- *
174 *
175 * Arguments: @char *p@ = pointer to string
176 * @char *v[]@ = pointer to array to fill in
177 * @size_t c@ = count of strings to fill in
178 * @char **rest@ = where to store the remainder of the string
179 *
180 * Returns: Number of strings filled in.
181 *
182 * Use: Fills an array with pointers to the individual words of a
183 * string. This is a compatibility veneer over @str_qsplit@.
184 */
185
186size_t str_split(char *p, char *v[], size_t c, char **rest)
187{
188 return (str_qsplit(p, v, c, rest, 0));
189}
190
191/* --- @str_match@ --- *
192 *
193 * Arguments: @const char *p@ = pointer to pattern string
194 * @const char *s@ = string to compare with
195 *
196 * Returns: Nonzero if the pattern matches the string.
197 *
198 * Use: Does simple wildcard matching. This is quite nasty and more
199 * than a little slow. Supports metacharacters `*', `?' and
200 * '['.
201 */
202
203int str_match(const char *p, const char *s)
204{
205 for (;;) {
206 char pch = *p++, pche, sch;
207 int sense;
208
209 switch (pch) {
210 case '?':
211 if (!*s)
212 return (0);
213 s++;
214 break;
215 case '*':
216 if (!*p)
217 return (1);
218 while (*s) {
219 if (str_match(p, s))
220 return (1);
221 s++;
222 }
223 return (0);
224 case '[':
225 if (!*s)
226 return (0);
227 sch = *s++;
228 pch = *p++;
229 sense = 1;
230 if (pch == '^' || pch == '!') {
231 sense = !sense;
232 pch = *p++;
233 }
234 if (pch == ']') {
235 if (*p == '-' && p[1] && p[1] != ']') {
236 pche = p[1];
237 p += 2;
238 if (pch <= sch && sch <= pche)
239 goto class_match;
240 } else if (pch == sch)
241 goto class_match;
242 pch = *p++;
243 }
244 for (;; pch = *p++) {
245 if (!pch || pch == ']')
246 goto class_nomatch;
247 if (*p == '-' && p[1] && p[1] != ']') {
248 pche = p[1];
249 p += 2;
250 if (pch <= sch && sch <= pche)
251 goto class_match;
252 } else if (pch == sch)
253 goto class_match;
254 }
255 class_match:
256 if (!sense)
257 return (0);
258 for (;;) {
259 pch = *p++;
260 if (!pch)
261 return (0);
262 if (pch == ']')
263 break;
264 if (*p == '-' && p[1] && p[1] != ']')
265 p += 2;
266 }
267 break;
268 class_nomatch:
269 if (sense)
270 return (0);
271 break;
272 case '\\':
273 pch = *p++;
274 default:
275 if (pch != *s)
276 return (0);
277 if (!pch)
278 return (1);
279 s++;
280 break;
281 }
282 }
283}
284
081e6815 285/* --- @str_sanitize@ --- *
286 *
287 * Arguments: @char *d@ = destination buffer
288 * @const char *p@ = pointer to source string
289 * @size_t sz@ = size of destination buffer
290 *
291 * Returns: ---
292 *
293 * Use: Writes a string into a buffer, being careful not to overflow
294 * the buffer, to null terminate the result, and to prevent
295 * nasty nonprintable characters ending up in the buffer.
296 */
297
298void str_sanitize(char *d, const char *p, size_t sz)
299{
300 if (!sz)
301 return;
302 sz--;
303 while (*p && sz) {
304 int ch = *p++;
305 if (!isgraph((unsigned char)ch))
306 ch = '_';
307 *d++ = ch;
308 sz--;
309 }
310 *d++ = 0;
311}
312
313/*----- That's all, folks -------------------------------------------------*/