chiark / gitweb /
alsa: set volume via Master rather than PCM.
[disorder] / lib / strptime.c
CommitLineData
477f956c
RK
1/* strptime.c - partial strptime() reimplementation
2 *
3 * (c) 2008 Richard Kettlewell.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
1a164e63
RK
29/** @file lib/strptime.c
30 * @brief strptime() reimplementation
31 *
32 * strptime() is here reimplemented because the FreeBSD (and older MacOS) one
477f956c 33 * is broken and does not report errors properly. See TODO remarks below for
1a164e63
RK
34 * some missing bits.
35 */
477f956c
RK
36
37#include <ctype.h>
38#include <limits.h>
39#include <string.h>
40#include <langinfo.h>
41#include "strptime.h"
42
598b07b7 43/** @brief Lookup table entry for locale-specific strings */
477f956c 44struct locale_item_match {
598b07b7 45 /** @brief Locale key to try */
477f956c 46 nl_item key;
598b07b7
RK
47
48 /** @brief Value to return if value of @ref key matches subject string */
477f956c
RK
49 int value;
50};
51
52static const struct locale_item_match days[] = {
53 { DAY_1, 0 },
54 { DAY_2, 1 },
55 { DAY_3, 2 },
56 { DAY_4, 3 },
57 { DAY_5, 4 },
58 { DAY_6, 5 },
59 { DAY_7, 6 },
60 { ABDAY_1, 0 },
61 { ABDAY_2, 1 },
62 { ABDAY_3, 2 },
63 { ABDAY_4, 3 },
64 { ABDAY_5, 4 },
65 { ABDAY_6, 5 },
66 { ABDAY_7, 6 },
67 { -1, -1 }
68};
69
70static const struct locale_item_match months[] = {
71 { MON_1, 1 },
72 { MON_2, 2 },
73 { MON_3, 3 },
74 { MON_4, 4 },
75 { MON_5, 5 },
76 { MON_6, 6 },
77 { MON_7, 7 },
78 { MON_8, 8 },
79 { MON_9, 9 },
80 { MON_10, 10 },
81 { MON_11, 11 },
82 { MON_12, 12 },
83 { ABMON_1, 1 },
84 { ABMON_2, 2 },
85 { ABMON_3, 3 },
86 { ABMON_4, 4 },
87 { ABMON_5, 5 },
88 { ABMON_6, 6 },
89 { ABMON_7, 7 },
90 { ABMON_8, 8 },
91 { ABMON_9, 9 },
92 { ABMON_10, 10 },
93 { ABMON_11, 11 },
94 { ABMON_12, 12 },
95 { -1, -1 },
96};
97
98/** @brief Match a string
99 * @param buf Start of subject
100 * @param limit End of subject
101 * @param match String to match subject against
102 * @return True if match == [buf,limit) otherwise false
103 *
104 * The match is case-independent at least in ASCII.
105 */
106static int try_match(const char *buf,
107 const char *limit,
108 const char *match) {
109 /* TODO this won't work well outside single-byte encodings. A good bet is
110 * probably to convert to Unicode and then use utf32_casefold_compat() (or
111 * utf8_casefold_compat(); using compatibility matching will ensure missing
112 * accents and so on aren't a problem.
113 *
114 * en_GB and en_US will probably be in any reasonable encoding for them.
115 */
116 while(buf < limit && *match) {
117 if(tolower((unsigned char)*buf) != tolower((unsigned char)*match))
118 return 0;
119 ++buf;
120 ++match;
121 }
122 if(buf != limit || *match)
123 return 0;
124 return 1;
125}
126
127/** @brief Match from table of locale-specific strings
128 * @param buf Start of subject
129 * @param limit End of subject
130 * @param lim Table of locale lookups
131 * @return Looked up value or -1
132 *
133 * The match is case-independent.
134 */
135static int try_locale_match(const char *buf,
136 const char *limit,
137 const struct locale_item_match *lim) {
138 /* This is not very efficient! A (correct) built-in implementation will
139 * presumably have more direct access to locale information. */
140 while(lim->value != -1) {
141 if(try_match(buf, limit, nl_langinfo(lim->key)))
142 return lim->value;
143 ++lim;
144 }
145 return -1;
146}
147
148static int try_numeric_match(const char *buf,
149 const char *limit,
150 unsigned low,
151 unsigned high) {
152 unsigned n = 0;
153
154 while(buf < limit) {
155 int ch = (unsigned char)*buf++;
156 if(ch >= '0' && ch <= '9') {
157 if(n > INT_MAX / 10
158 || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
159 return -1; /* overflow */
160 n = 10 * n + ch - '0';
161 } else
162 return -1;
163 }
164 if(n < low || n > high)
165 return -1;
166 return (int)n;
167}
168
169static const char *my_strptime_guts(const char *buf,
170 const char *format,
171 struct tm *tm) {
172 int fc, mod, spec, next, value;
173 const char *limit;
174 /* nl_langinfo() is allowed to trash its last return value so we copy.
175 * (We're relying on it being usable at all in multithreaded environments
176 * though.) */
177#define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
178 const char *s; \
179 char subformat[128]; \
180 \
181 if(mod == 'E') { \
182 s = nl_langinfo(EITEM); \
183 if(!s || !*s) \
184 s = nl_langinfo(ITEM); \
185 } else \
186 s = nl_langinfo(ITEM); \
187 if(!s || !*s) \
188 s = DEF; \
189 if(strlen(s) >= sizeof subformat) \
190 s = DEF; \
191 strcpy(subformat, s); \
192 if(!(buf = my_strptime_guts(buf, subformat, tm))) \
193 return NULL; \
194} while(0)
195
196 while(*format) {
197 fc = (unsigned char)*format++;
198 if(fc == '%') {
199 /* Get the character defining the converstion specification */
200 spec = (unsigned char)*format++;
201 if(spec == 'E' || spec == 'O') {
202 /* Oops, there's a modifier first */
203 mod = spec;
204 spec = (unsigned char)*format++;
205 } else
206 mod = 0;
207 if(!spec)
208 return NULL; /* format string broken! */
209 /* See what the next directive is. The specification is written in terms
210 * of stopping the match at a character that matches the next directive.
211 * This implementation mirrors this aspect of the specification
212 * directly. */
213 next = (unsigned char)*format;
214 if(next) {
215 limit = buf;
216 if(isspace(next)) {
217 /* Next directive is whitespace, so bound the input string (at least)
218 * by that */
219 while(*limit && !isspace((unsigned char)*limit))
220 ++limit;
221 } else if(next == '%') {
222 /* Prohibited: "The application shall ensure that there is
223 * white-space or other non-alphanumeric characters between any two
224 * conversion specifications". In fact we let alphanumerics
225 * through.
226 *
227 * Forbidding even %% seems a bit harsh but is consistent with the
228 * specification as written.
229 */
230 return NULL;
231 } else {
232 /* Next directive is a specific character, so bound the input string
233 * (at least) by that. This will work badly in the face of multibyte
234 * characters, but then the spec is vague about what kind of string
235 * we're dealing with anyway so you probably couldn't safely use them
236 * in the format string at least in any case. */
237 while(*limit && *limit != next)
238 ++limit;
239 }
240 } else
241 limit = buf + strlen(buf);
242 switch(spec) {
243 case 'A': case 'a': /* day name (abbrev or full) */
244 if((value = try_locale_match(buf, limit, days)) == -1)
245 return NULL;
246 tm->tm_wday = value;
247 break;
248 case 'B': case 'b': case 'h': /* month name (abbrev or full) */
249 if((value = try_locale_match(buf, limit, months)) == -1)
250 return NULL;
251 tm->tm_mon = value - 1;
252 break;
253 case 'c': /* locale date+time */
254 USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
255 break;
256 case 'C': /* century number 0-99 */
257 /* TODO */
258 return NULL;
259 case 'd': case 'e': /* day of month 1-31 */
260 if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
261 return NULL;
262 tm->tm_mday = value;
263 break;
264 case 'D': /* == "%m / %d / %y" */
265 if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
266 return NULL;
267 break;
268 case 'H': /* hour 0-23 */
269 if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
270 return NULL;
271 tm->tm_hour = value;
272 break;
273 case 'I': /* hour 1-12 */
274 /* TODO */
275 return NULL;
276 case 'j': /* day 1-366 */
277 if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
278 return NULL;
279 tm->tm_yday = value - 1;
280 return NULL;
281 case 'm': /* month 1-12 */
282 if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
283 return NULL;
284 tm->tm_mon = value - 1;
285 break;
286 case 'M': /* minute 0-59 */
287 if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
288 return NULL;
289 tm->tm_min = value;
290 break;
291 case 'n': case 't': /* any whitespace */
292 goto matchwhitespace;
293 case 'p': /* locale am/pm */
294 /* TODO */
295 return NULL;
296 case 'r': /* == "%I : %M : %S %p" */
297 /* TODO actually this is locale-dependent; and we don't implement %I
298 * anyway, so it's not going to work even as it stands. */
299 if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
300 return NULL;
301 break;
302 case 'R': /* == "%H : %M" */
303 if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
304 return NULL;
305 break;
306 case 'S': /* seconds 0-60 */
307 if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
308 return NULL;
309 tm->tm_sec = value;
310 break;
311 case 'U': /* week number from Sunday 0-53 */
312 /* TODO */
313 return NULL;
314 case 'w': /* day number 0-6 from Sunday */
315 if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
316 return NULL;
317 tm->tm_wday = value;
318 break;
319 case 'W': /* week number from Monday 0-53 */
320 /* TODO */
321 return NULL;
322 case 'x': /* locale date format */
323 USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
324 break;
325 case 'X': /* locale time format */
326 USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
327 break;
328 case 'y': /* year mod 100 */
329 if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
330 return NULL;
331 if(value >= 0 && value <= 68)
332 value = 2000 + value;
333 else if(value >= 69 && value <= 99)
334 value = 1900 + value;
335 tm->tm_year = value - 1900;
336 break;
337 case 'Y': /* year */
338 if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
339 return NULL;
340 tm->tm_year = value - 1900;
341 break;
342 case '%':
343 goto matchself;
344 default:
345 /* The spec is a bit vague about what to do with invalid format
346 * strings. We return NULL immediately and hope someone will
347 * notice. */
348 return NULL;
349 }
350 buf = limit;
351 } else if(isspace(fc)) {
352 matchwhitespace:
353 /* Any format whitespace matches any number of input whitespace
354 * characters. The directive can formally contain more than one
355 * whitespace character; for the second and subsequent ones we'll match 0
356 * characters from the input. */
357 while(isspace((unsigned char)*buf))
358 ++buf;
359 } else {
360 matchself:
361 /* Non-% non-whitespace characters must match themselves exactly */
362 if(fc != (unsigned char)*buf++)
363 return NULL;
364 }
365 }
366 /* When we run out of format string we return a pointer to the rest of the
367 * input. */
368 return buf;
369}
370
371/** @brief Reimplementation of strptime()
372 * @param buf Input buffer
373 * @param format Format string
374 * @param tm Where to put result
375 * @return Pointer to first unparsed input character, or NULL on error
376 *
377 * Based on <a
378 * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
379 */
380char *my_strptime(const char *buf,
381 const char *format,
382 struct tm *tm) {
383 /* Whether to overwrite or update is unspecified (rather bizarrely). This
384 * implementation does not overwrites, as xgetdate() depends on this
385 * behavior. */
386
387 if(!(buf = my_strptime_guts(buf, format, tm)))
388 return NULL;
389 /* TODO various things we could/should do:
390 * - infer day/month from %j+year
391 * - infer day/month from %U/%W+%w/%a+year
392 * - infer hour from %p+%I
393 * - fill wday/yday from other fields
394 */
395 return (char *)buf;
396}
397
398/*
399Local Variables:
400c-basic-offset:2
401comment-column:40
402fill-column:79
403indent-tabs-mode:nil
404End:
405*/