Commit | Line | Data |
---|---|---|
b6b9d458 | 1 | .\" -*-nroff-*- |
2 | .de VS | |
3 | .sp 1 | |
4 | .in +5n | |
5 | .ft B | |
6 | .nf | |
7 | .. | |
8 | .de VE | |
9 | .ft R | |
10 | .in -5n | |
11 | .sp 1 | |
12 | .fi | |
13 | .. | |
fbf20b5b | 14 | .TH str 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library" |
b6b9d458 | 15 | .SH NAME |
16 | str \- small string utilities | |
efae42a6 | 17 | .\" @str_qword |
18 | .\" @str_qsplit | |
08da152e | 19 | .\" @str_getword |
20 | .\" @str_split | |
26f325c0 | 21 | .\" @str_matchx |
efae42a6 | 22 | .\" @str_match |
08da152e | 23 | .\" @str_sanitize |
b6b9d458 | 24 | .SH SYNOPSIS |
25 | .nf | |
26 | .B "#include <mLib/str.h>" | |
27 | ||
efae42a6 | 28 | .BI "char *str_qword(char **" pp ", unsigned " f ); |
29 | .BI "size_t str_qsplit(char *" p ", char *" v "[], size_t " c , | |
30 | .BI " char **" rest ", unsigned " f ); | |
b6b9d458 | 31 | .BI "char *str_getword(char **" pp ); |
32 | .BI "size_t str_split(char *" p ", char *" v "[], size_t " c ", char **" rest ); | |
26f325c0 | 33 | .BI "int str_matchx(const char *" p ", const char *" s ", unsigned " f ); |
efae42a6 | 34 | .BI "int str_match(const char *" p ", const char *" s ); |
b6b9d458 | 35 | .BI "void str_sanitize(char *" d ", const char *" p ", size_t " sz ); |
36 | .fi | |
37 | .SH DESCRIPTION | |
38 | The header file | |
39 | .B <mLib/str.h> | |
40 | contains a few small utility functions for manipulating null-terminated | |
d4efbcd9 | 41 | strings. |
b6b9d458 | 42 | .PP |
43 | The function | |
efae42a6 | 44 | .B str_qword |
b6b9d458 | 45 | extracts the next whitespace-delimited word from a string. The |
46 | function's argument, | |
47 | .IR pp , | |
48 | is the address of a pointer into the string: this pointer is updated by | |
efae42a6 | 49 | .B str_qword |
b6b9d458 | 50 | so that it can extract the following word on the next call and so on. |
51 | The return value is the address of the next word, appropriately null | |
52 | terminated. A null pointer is returned if the entire remainder of the | |
53 | string is whitespace. Note that | |
efae42a6 | 54 | .B str_qword |
b6b9d458 | 55 | modifies the string as it goes, to null-terminate the individual words. |
efae42a6 | 56 | If the flag |
57 | .B STRF_QUOTE | |
58 | is passed, the single- and double-quote characters may be used to quote | |
59 | whitespace within words, and the backslash can escape quote characters | |
60 | and whitespace. | |
b6b9d458 | 61 | .PP |
62 | The function | |
efae42a6 | 63 | .B str_qsplit |
b6b9d458 | 64 | divides a string into whitespace-separated words. The arguments are as |
65 | follows: | |
66 | .TP | |
ff76c38f | 67 | .BI "char *" p |
b6b9d458 | 68 | The address of the string to split. The string is modified by having |
69 | null terminators written after each word extracted. | |
70 | .TP | |
ff76c38f | 71 | .BI "char *" v [] |
b6b9d458 | 72 | The address of an array of pointers to characters. This array will be |
73 | filled in by | |
74 | .BR str_split : | |
75 | the first entry will point to the first word extracted from the string, | |
76 | and so on. If there aren't enough words in the string, the remaining | |
77 | array elements are filled with null pointers. | |
78 | .TP | |
ff76c38f | 79 | .BI "size_t " c |
d2a91066 | 80 | The maximum number of words to extract; also, the number of elements in |
b6b9d458 | 81 | the array |
82 | .IR v . | |
83 | .TP | |
ff76c38f | 84 | .BI "char **" rest |
b6b9d458 | 85 | The address of a pointer in which to store the address of the remainder |
86 | of the string. Leading whitespace is removed from the remainder before | |
87 | storing. If the remainder string is empty, a null pointer is stored | |
88 | instead. If | |
89 | .I rest | |
90 | is null, the remainder pointer is discarded. | |
efae42a6 | 91 | .TP |
92 | .BI "unsigned " f | |
93 | Flags, as for | |
94 | .BR str_qsplit . | |
b6b9d458 | 95 | .PP |
96 | The return value of | |
efae42a6 | 97 | .B str_qsplit |
b6b9d458 | 98 | is the number of words extracted from the input string. |
99 | .PP | |
efae42a6 | 100 | The functions |
101 | .B str_getword | |
102 | and | |
103 | .B str_split | |
104 | are veneers over | |
105 | .B str_qword | |
106 | and | |
107 | .B str_qsplit | |
108 | respectively; they are equivalent to calls to the latter functions with | |
109 | flags words of zero. | |
110 | .PP | |
111 | The | |
26f325c0 | 112 | .B str_matchx |
efae42a6 | 113 | function does simple wildcard matching. The first argument is a |
114 | pattern, which may contain metacharacters: | |
115 | .RB ` * ' | |
116 | matches zero or more arbitrary characters; | |
117 | .RB ` ? ' | |
118 | matches exactly one arbitrary characters; and | |
119 | .RB ` [ ... ] ' | |
120 | matches one of the characters listed. The backslash | |
121 | .RB ` \e ' | |
122 | escapes the following character. Within square brackets, the | |
123 | hyphen | |
124 | .RB ` \- ' | |
125 | may be used to designate ranges of characters. If the initial character | |
126 | is | |
127 | .RB ` ! ' | |
128 | or | |
129 | .RB ` ^ ' | |
130 | then the sense of the match is reversed. To literally match a | |
131 | .RB ` ] ' | |
132 | character, list it first; to literally match a | |
133 | .RB ` \- ' | |
134 | character, list it immediately after a range, or at the beginning or end | |
135 | of the set. The return value is nonzero if the pattern | |
136 | .I p | |
137 | matches the given string | |
138 | .IR s , | |
26f325c0 MW |
139 | or zero if the pattern doesn't match. If the flag |
140 | .B STRF_PREFIX | |
141 | is passed, | |
142 | .B str_matchx | |
143 | returns true if it reaches the end of the target string before finding a | |
144 | mismatch \(en i.e., if the target string is a prefix of a string which | |
145 | might match the pattern. The function | |
146 | .B str_match | |
147 | is a convenient wrapper for | |
148 | .B str_matchx | |
149 | with a zero flags word, which is the normal case. | |
efae42a6 | 150 | .PP |
b6b9d458 | 151 | The function |
152 | .B str_sanitize | |
153 | copies at most | |
154 | .I sz \- 1 | |
155 | characters from the string | |
156 | .I p | |
157 | to | |
158 | .IR d . | |
159 | The result string is null terminated. Any nonprinting characters in | |
160 | .I p | |
161 | are replaced by an underscore | |
162 | .RB ` _ ' | |
163 | when written to | |
164 | .IR d . | |
165 | .SH EXAMPLES | |
166 | Given the code | |
167 | .VS | |
168 | char p[] = " alpha beta gamma delta "; | |
169 | char *v[3]; | |
170 | size_t n; | |
171 | char *q; | |
172 | ||
173 | n = str_split(p, v, 3, &q); | |
174 | .VE | |
175 | following the call to | |
176 | .BR str_split , | |
177 | .B n | |
178 | will have the value 3, | |
179 | .B v[0] | |
180 | will point to | |
181 | .RB ` alpha ', | |
182 | .B v[1] | |
183 | will point to | |
184 | .RB ` beta ', | |
185 | .B v[2] | |
186 | will point to | |
187 | .RB ` gamma ' | |
188 | and | |
189 | .B rest | |
190 | will point to | |
191 | .RB ` delta\ ' | |
192 | (note the trailing space). | |
193 | .PP | |
194 | Similarly, given the string | |
195 | .B """\ alpha\ \ beta\ """ | |
196 | instead, | |
197 | .B n | |
198 | will be assigned the value 2, | |
199 | .B v[0] | |
200 | and | |
201 | .B v[1] | |
202 | will have the same values as last time, and | |
203 | .B v[2] | |
204 | and | |
205 | .B rest | |
206 | will be null. | |
08da152e | 207 | .SH "SEE ALSO" |
208 | .BR mLib (3). | |
b6b9d458 | 209 | .SH AUTHOR |
9b5ac6ff | 210 | Mark Wooding, <mdw@distorted.org.uk> |