b6b9d458 |
1 | .\" -*-nroff-*- |
2 | .de VS |
3 | .sp 1 |
4 | .in +5n |
5 | .ft B |
6 | .nf |
7 | .. |
8 | .de VE |
9 | .ft R |
10 | .in -5n |
11 | .sp 1 |
12 | .fi |
13 | .. |
fbf20b5b |
14 | .TH str 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library" |
b6b9d458 |
15 | .SH NAME |
16 | str \- small string utilities |
efae42a6 |
17 | .\" @str_qword |
18 | .\" @str_qsplit |
08da152e |
19 | .\" @str_getword |
20 | .\" @str_split |
efae42a6 |
21 | .\" @str_match |
08da152e |
22 | .\" @str_sanitize |
b6b9d458 |
23 | .SH SYNOPSIS |
24 | .nf |
25 | .B "#include <mLib/str.h>" |
26 | |
efae42a6 |
27 | .BI "char *str_qword(char **" pp ", unsigned " f ); |
28 | .BI "size_t str_qsplit(char *" p ", char *" v "[], size_t " c , |
29 | .BI " char **" rest ", unsigned " f ); |
b6b9d458 |
30 | .BI "char *str_getword(char **" pp ); |
31 | .BI "size_t str_split(char *" p ", char *" v "[], size_t " c ", char **" rest ); |
efae42a6 |
32 | .BI "int str_match(const char *" p ", const char *" s ); |
b6b9d458 |
33 | .BI "void str_sanitize(char *" d ", const char *" p ", size_t " sz ); |
34 | .fi |
35 | .SH DESCRIPTION |
36 | The header file |
37 | .B <mLib/str.h> |
38 | contains a few small utility functions for manipulating null-terminated |
39 | strings. |
40 | .PP |
41 | The function |
efae42a6 |
42 | .B str_qword |
b6b9d458 |
43 | extracts the next whitespace-delimited word from a string. The |
44 | function's argument, |
45 | .IR pp , |
46 | is the address of a pointer into the string: this pointer is updated by |
efae42a6 |
47 | .B str_qword |
b6b9d458 |
48 | so that it can extract the following word on the next call and so on. |
49 | The return value is the address of the next word, appropriately null |
50 | terminated. A null pointer is returned if the entire remainder of the |
51 | string is whitespace. Note that |
efae42a6 |
52 | .B str_qword |
b6b9d458 |
53 | modifies the string as it goes, to null-terminate the individual words. |
efae42a6 |
54 | If the flag |
55 | .B STRF_QUOTE |
56 | is passed, the single- and double-quote characters may be used to quote |
57 | whitespace within words, and the backslash can escape quote characters |
58 | and whitespace. |
b6b9d458 |
59 | .PP |
60 | The function |
efae42a6 |
61 | .B str_qsplit |
b6b9d458 |
62 | divides a string into whitespace-separated words. The arguments are as |
63 | follows: |
64 | .TP |
ff76c38f |
65 | .BI "char *" p |
b6b9d458 |
66 | The address of the string to split. The string is modified by having |
67 | null terminators written after each word extracted. |
68 | .TP |
ff76c38f |
69 | .BI "char *" v [] |
b6b9d458 |
70 | The address of an array of pointers to characters. This array will be |
71 | filled in by |
72 | .BR str_split : |
73 | the first entry will point to the first word extracted from the string, |
74 | and so on. If there aren't enough words in the string, the remaining |
75 | array elements are filled with null pointers. |
76 | .TP |
ff76c38f |
77 | .BI "size_t " c |
d2a91066 |
78 | The maximum number of words to extract; also, the number of elements in |
b6b9d458 |
79 | the array |
80 | .IR v . |
81 | .TP |
ff76c38f |
82 | .BI "char **" rest |
b6b9d458 |
83 | The address of a pointer in which to store the address of the remainder |
84 | of the string. Leading whitespace is removed from the remainder before |
85 | storing. If the remainder string is empty, a null pointer is stored |
86 | instead. If |
87 | .I rest |
88 | is null, the remainder pointer is discarded. |
efae42a6 |
89 | .TP |
90 | .BI "unsigned " f |
91 | Flags, as for |
92 | .BR str_qsplit . |
b6b9d458 |
93 | .PP |
94 | The return value of |
efae42a6 |
95 | .B str_qsplit |
b6b9d458 |
96 | is the number of words extracted from the input string. |
97 | .PP |
efae42a6 |
98 | The functions |
99 | .B str_getword |
100 | and |
101 | .B str_split |
102 | are veneers over |
103 | .B str_qword |
104 | and |
105 | .B str_qsplit |
106 | respectively; they are equivalent to calls to the latter functions with |
107 | flags words of zero. |
108 | .PP |
109 | The |
110 | .B str_match |
111 | function does simple wildcard matching. The first argument is a |
112 | pattern, which may contain metacharacters: |
113 | .RB ` * ' |
114 | matches zero or more arbitrary characters; |
115 | .RB ` ? ' |
116 | matches exactly one arbitrary characters; and |
117 | .RB ` [ ... ] ' |
118 | matches one of the characters listed. The backslash |
119 | .RB ` \e ' |
120 | escapes the following character. Within square brackets, the |
121 | hyphen |
122 | .RB ` \- ' |
123 | may be used to designate ranges of characters. If the initial character |
124 | is |
125 | .RB ` ! ' |
126 | or |
127 | .RB ` ^ ' |
128 | then the sense of the match is reversed. To literally match a |
129 | .RB ` ] ' |
130 | character, list it first; to literally match a |
131 | .RB ` \- ' |
132 | character, list it immediately after a range, or at the beginning or end |
133 | of the set. The return value is nonzero if the pattern |
134 | .I p |
135 | matches the given string |
136 | .IR s , |
137 | or zero if the pattern doesn't match. |
138 | .PP |
b6b9d458 |
139 | The function |
140 | .B str_sanitize |
141 | copies at most |
142 | .I sz \- 1 |
143 | characters from the string |
144 | .I p |
145 | to |
146 | .IR d . |
147 | The result string is null terminated. Any nonprinting characters in |
148 | .I p |
149 | are replaced by an underscore |
150 | .RB ` _ ' |
151 | when written to |
152 | .IR d . |
153 | .SH EXAMPLES |
154 | Given the code |
155 | .VS |
156 | char p[] = " alpha beta gamma delta "; |
157 | char *v[3]; |
158 | size_t n; |
159 | char *q; |
160 | |
161 | n = str_split(p, v, 3, &q); |
162 | .VE |
163 | following the call to |
164 | .BR str_split , |
165 | .B n |
166 | will have the value 3, |
167 | .B v[0] |
168 | will point to |
169 | .RB ` alpha ', |
170 | .B v[1] |
171 | will point to |
172 | .RB ` beta ', |
173 | .B v[2] |
174 | will point to |
175 | .RB ` gamma ' |
176 | and |
177 | .B rest |
178 | will point to |
179 | .RB ` delta\ ' |
180 | (note the trailing space). |
181 | .PP |
182 | Similarly, given the string |
183 | .B """\ alpha\ \ beta\ """ |
184 | instead, |
185 | .B n |
186 | will be assigned the value 2, |
187 | .B v[0] |
188 | and |
189 | .B v[1] |
190 | will have the same values as last time, and |
191 | .B v[2] |
192 | and |
193 | .B rest |
194 | will be null. |
08da152e |
195 | .SH "SEE ALSO" |
196 | .BR mLib (3). |
b6b9d458 |
197 | .SH AUTHOR |
198 | Mark Wooding, <mdw@nsict.org> |