chiark / gitweb /
doxygen: add some missing docstrings.
[disorder] / lib / strptime.c
CommitLineData
477f956c
RK
1/* strptime.c - partial strptime() reimplementation
2 *
3 * (c) 2008 Richard Kettlewell.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/* strptime() is here reimplemented because the FreeBSD (and older MacOS) one
31 * is broken and does not report errors properly. See TODO remarks below for
32 * some missing bits. */
33
34#include <ctype.h>
35#include <limits.h>
36#include <string.h>
37#include <langinfo.h>
38#include "strptime.h"
39
598b07b7 40/** @brief Lookup table entry for locale-specific strings */
477f956c 41struct locale_item_match {
598b07b7 42 /** @brief Locale key to try */
477f956c 43 nl_item key;
598b07b7
RK
44
45 /** @brief Value to return if value of @ref key matches subject string */
477f956c
RK
46 int value;
47};
48
49static const struct locale_item_match days[] = {
50 { DAY_1, 0 },
51 { DAY_2, 1 },
52 { DAY_3, 2 },
53 { DAY_4, 3 },
54 { DAY_5, 4 },
55 { DAY_6, 5 },
56 { DAY_7, 6 },
57 { ABDAY_1, 0 },
58 { ABDAY_2, 1 },
59 { ABDAY_3, 2 },
60 { ABDAY_4, 3 },
61 { ABDAY_5, 4 },
62 { ABDAY_6, 5 },
63 { ABDAY_7, 6 },
64 { -1, -1 }
65};
66
67static const struct locale_item_match months[] = {
68 { MON_1, 1 },
69 { MON_2, 2 },
70 { MON_3, 3 },
71 { MON_4, 4 },
72 { MON_5, 5 },
73 { MON_6, 6 },
74 { MON_7, 7 },
75 { MON_8, 8 },
76 { MON_9, 9 },
77 { MON_10, 10 },
78 { MON_11, 11 },
79 { MON_12, 12 },
80 { ABMON_1, 1 },
81 { ABMON_2, 2 },
82 { ABMON_3, 3 },
83 { ABMON_4, 4 },
84 { ABMON_5, 5 },
85 { ABMON_6, 6 },
86 { ABMON_7, 7 },
87 { ABMON_8, 8 },
88 { ABMON_9, 9 },
89 { ABMON_10, 10 },
90 { ABMON_11, 11 },
91 { ABMON_12, 12 },
92 { -1, -1 },
93};
94
95/** @brief Match a string
96 * @param buf Start of subject
97 * @param limit End of subject
98 * @param match String to match subject against
99 * @return True if match == [buf,limit) otherwise false
100 *
101 * The match is case-independent at least in ASCII.
102 */
103static int try_match(const char *buf,
104 const char *limit,
105 const char *match) {
106 /* TODO this won't work well outside single-byte encodings. A good bet is
107 * probably to convert to Unicode and then use utf32_casefold_compat() (or
108 * utf8_casefold_compat(); using compatibility matching will ensure missing
109 * accents and so on aren't a problem.
110 *
111 * en_GB and en_US will probably be in any reasonable encoding for them.
112 */
113 while(buf < limit && *match) {
114 if(tolower((unsigned char)*buf) != tolower((unsigned char)*match))
115 return 0;
116 ++buf;
117 ++match;
118 }
119 if(buf != limit || *match)
120 return 0;
121 return 1;
122}
123
124/** @brief Match from table of locale-specific strings
125 * @param buf Start of subject
126 * @param limit End of subject
127 * @param lim Table of locale lookups
128 * @return Looked up value or -1
129 *
130 * The match is case-independent.
131 */
132static int try_locale_match(const char *buf,
133 const char *limit,
134 const struct locale_item_match *lim) {
135 /* This is not very efficient! A (correct) built-in implementation will
136 * presumably have more direct access to locale information. */
137 while(lim->value != -1) {
138 if(try_match(buf, limit, nl_langinfo(lim->key)))
139 return lim->value;
140 ++lim;
141 }
142 return -1;
143}
144
145static int try_numeric_match(const char *buf,
146 const char *limit,
147 unsigned low,
148 unsigned high) {
149 unsigned n = 0;
150
151 while(buf < limit) {
152 int ch = (unsigned char)*buf++;
153 if(ch >= '0' && ch <= '9') {
154 if(n > INT_MAX / 10
155 || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
156 return -1; /* overflow */
157 n = 10 * n + ch - '0';
158 } else
159 return -1;
160 }
161 if(n < low || n > high)
162 return -1;
163 return (int)n;
164}
165
166static const char *my_strptime_guts(const char *buf,
167 const char *format,
168 struct tm *tm) {
169 int fc, mod, spec, next, value;
170 const char *limit;
171 /* nl_langinfo() is allowed to trash its last return value so we copy.
172 * (We're relying on it being usable at all in multithreaded environments
173 * though.) */
174#define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
175 const char *s; \
176 char subformat[128]; \
177 \
178 if(mod == 'E') { \
179 s = nl_langinfo(EITEM); \
180 if(!s || !*s) \
181 s = nl_langinfo(ITEM); \
182 } else \
183 s = nl_langinfo(ITEM); \
184 if(!s || !*s) \
185 s = DEF; \
186 if(strlen(s) >= sizeof subformat) \
187 s = DEF; \
188 strcpy(subformat, s); \
189 if(!(buf = my_strptime_guts(buf, subformat, tm))) \
190 return NULL; \
191} while(0)
192
193 while(*format) {
194 fc = (unsigned char)*format++;
195 if(fc == '%') {
196 /* Get the character defining the converstion specification */
197 spec = (unsigned char)*format++;
198 if(spec == 'E' || spec == 'O') {
199 /* Oops, there's a modifier first */
200 mod = spec;
201 spec = (unsigned char)*format++;
202 } else
203 mod = 0;
204 if(!spec)
205 return NULL; /* format string broken! */
206 /* See what the next directive is. The specification is written in terms
207 * of stopping the match at a character that matches the next directive.
208 * This implementation mirrors this aspect of the specification
209 * directly. */
210 next = (unsigned char)*format;
211 if(next) {
212 limit = buf;
213 if(isspace(next)) {
214 /* Next directive is whitespace, so bound the input string (at least)
215 * by that */
216 while(*limit && !isspace((unsigned char)*limit))
217 ++limit;
218 } else if(next == '%') {
219 /* Prohibited: "The application shall ensure that there is
220 * white-space or other non-alphanumeric characters between any two
221 * conversion specifications". In fact we let alphanumerics
222 * through.
223 *
224 * Forbidding even %% seems a bit harsh but is consistent with the
225 * specification as written.
226 */
227 return NULL;
228 } else {
229 /* Next directive is a specific character, so bound the input string
230 * (at least) by that. This will work badly in the face of multibyte
231 * characters, but then the spec is vague about what kind of string
232 * we're dealing with anyway so you probably couldn't safely use them
233 * in the format string at least in any case. */
234 while(*limit && *limit != next)
235 ++limit;
236 }
237 } else
238 limit = buf + strlen(buf);
239 switch(spec) {
240 case 'A': case 'a': /* day name (abbrev or full) */
241 if((value = try_locale_match(buf, limit, days)) == -1)
242 return NULL;
243 tm->tm_wday = value;
244 break;
245 case 'B': case 'b': case 'h': /* month name (abbrev or full) */
246 if((value = try_locale_match(buf, limit, months)) == -1)
247 return NULL;
248 tm->tm_mon = value - 1;
249 break;
250 case 'c': /* locale date+time */
251 USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
252 break;
253 case 'C': /* century number 0-99 */
254 /* TODO */
255 return NULL;
256 case 'd': case 'e': /* day of month 1-31 */
257 if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
258 return NULL;
259 tm->tm_mday = value;
260 break;
261 case 'D': /* == "%m / %d / %y" */
262 if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
263 return NULL;
264 break;
265 case 'H': /* hour 0-23 */
266 if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
267 return NULL;
268 tm->tm_hour = value;
269 break;
270 case 'I': /* hour 1-12 */
271 /* TODO */
272 return NULL;
273 case 'j': /* day 1-366 */
274 if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
275 return NULL;
276 tm->tm_yday = value - 1;
277 return NULL;
278 case 'm': /* month 1-12 */
279 if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
280 return NULL;
281 tm->tm_mon = value - 1;
282 break;
283 case 'M': /* minute 0-59 */
284 if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
285 return NULL;
286 tm->tm_min = value;
287 break;
288 case 'n': case 't': /* any whitespace */
289 goto matchwhitespace;
290 case 'p': /* locale am/pm */
291 /* TODO */
292 return NULL;
293 case 'r': /* == "%I : %M : %S %p" */
294 /* TODO actually this is locale-dependent; and we don't implement %I
295 * anyway, so it's not going to work even as it stands. */
296 if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
297 return NULL;
298 break;
299 case 'R': /* == "%H : %M" */
300 if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
301 return NULL;
302 break;
303 case 'S': /* seconds 0-60 */
304 if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
305 return NULL;
306 tm->tm_sec = value;
307 break;
308 case 'U': /* week number from Sunday 0-53 */
309 /* TODO */
310 return NULL;
311 case 'w': /* day number 0-6 from Sunday */
312 if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
313 return NULL;
314 tm->tm_wday = value;
315 break;
316 case 'W': /* week number from Monday 0-53 */
317 /* TODO */
318 return NULL;
319 case 'x': /* locale date format */
320 USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
321 break;
322 case 'X': /* locale time format */
323 USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
324 break;
325 case 'y': /* year mod 100 */
326 if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
327 return NULL;
328 if(value >= 0 && value <= 68)
329 value = 2000 + value;
330 else if(value >= 69 && value <= 99)
331 value = 1900 + value;
332 tm->tm_year = value - 1900;
333 break;
334 case 'Y': /* year */
335 if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
336 return NULL;
337 tm->tm_year = value - 1900;
338 break;
339 case '%':
340 goto matchself;
341 default:
342 /* The spec is a bit vague about what to do with invalid format
343 * strings. We return NULL immediately and hope someone will
344 * notice. */
345 return NULL;
346 }
347 buf = limit;
348 } else if(isspace(fc)) {
349 matchwhitespace:
350 /* Any format whitespace matches any number of input whitespace
351 * characters. The directive can formally contain more than one
352 * whitespace character; for the second and subsequent ones we'll match 0
353 * characters from the input. */
354 while(isspace((unsigned char)*buf))
355 ++buf;
356 } else {
357 matchself:
358 /* Non-% non-whitespace characters must match themselves exactly */
359 if(fc != (unsigned char)*buf++)
360 return NULL;
361 }
362 }
363 /* When we run out of format string we return a pointer to the rest of the
364 * input. */
365 return buf;
366}
367
368/** @brief Reimplementation of strptime()
369 * @param buf Input buffer
370 * @param format Format string
371 * @param tm Where to put result
372 * @return Pointer to first unparsed input character, or NULL on error
373 *
374 * Based on <a
375 * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
376 */
377char *my_strptime(const char *buf,
378 const char *format,
379 struct tm *tm) {
380 /* Whether to overwrite or update is unspecified (rather bizarrely). This
381 * implementation does not overwrites, as xgetdate() depends on this
382 * behavior. */
383
384 if(!(buf = my_strptime_guts(buf, format, tm)))
385 return NULL;
386 /* TODO various things we could/should do:
387 * - infer day/month from %j+year
388 * - infer day/month from %U/%W+%w/%a+year
389 * - infer hour from %p+%I
390 * - fill wday/yday from other fields
391 */
392 return (char *)buf;
393}
394
395/*
396Local Variables:
397c-basic-offset:2
398comment-column:40
399fill-column:79
400indent-tabs-mode:nil
401End:
402*/