1 /* strptime.c - partial strptime() reimplementation
3 * Copyright (c) 2008, 2011, 2013 Richard Kettlewell.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 /** @file lib/strptime.c
30 * @brief strptime() reimplementation
32 * strptime() is here reimplemented because the FreeBSD (and older MacOS) one
33 * is broken and does not report errors properly. See TODO remarks below for
45 # include <langinfo.h>
50 /* Fake plastic langinfo. Primarily for Windows.
51 * TODO WIN32 can we get these values out of the win32 api instead? */
99 const char *nl_langinfo(nl_item item) {
101 case DAY_1: return "Sunday";
102 case DAY_2: return "Monday";
103 case DAY_3: return "Tuesday";
104 case DAY_4: return "Wednesday";
105 case DAY_5: return "Thursday";
106 case DAY_6: return "Friday";
107 case DAY_7: return "Saturday";
108 case ABDAY_1: return "Sun";
109 case ABDAY_2: return "Mon";
110 case ABDAY_3: return "Tue";
111 case ABDAY_4: return "Wed";
112 case ABDAY_5: return "Thu";
113 case ABDAY_6: return "Fri";
114 case ABDAY_7: return "Sat";
115 case MON_1: return "January";
116 case MON_2: return "February";
117 case MON_3: return "March";
118 case MON_4: return "April";
119 case MON_5: return "May";
120 case MON_6: return "June";
121 case MON_7: return "July";
122 case MON_8: return "August";
123 case MON_9: return "September";
124 case MON_10: return "October";
125 case MON_11: return "November";
126 case MON_12: return "December";
127 case ABMON_1: return "Jan";
128 case ABMON_2: return "Feb";
129 case ABMON_3: return "Mar";
130 case ABMON_4: return "Apr";
131 case ABMON_5: return "May";
132 case ABMON_6: return "Jun";
133 case ABMON_7: return "Jul";
134 case ABMON_8: return "Aug";
135 case ABMON_9: return "Sep";
136 case ABMON_10: return "Oct";
137 case ABMON_11: return "Nov";
138 case ABMON_12: return "Dec";
139 case D_FMT: return "%d/%m/%y";
140 case T_FMT: return "%H:%M:%S";
141 case D_T_FMT: return "%a %d %b %Y %H:%M:%S %Z";
142 case ERA_D_FMT: return "";
143 case ERA_T_FMT: return "";
144 case ERA_D_T_FMT: return "";
150 /** @brief Lookup table entry for locale-specific strings */
151 struct locale_item_match {
152 /** @brief Locale key to try */
155 /** @brief Value to return if value of @ref key matches subject string */
159 static const struct locale_item_match days[] = {
177 static const struct locale_item_match months[] = {
205 /** @brief Match a string
206 * @param buf Start of subject
207 * @param limit End of subject
208 * @param match String to match subject against
209 * @return True if match == [buf,limit) otherwise false
211 * The match is case-independent at least in ASCII.
213 static int try_match(const char *buf,
216 /* TODO this won't work well outside single-byte encodings. A good bet is
217 * probably to convert to Unicode and then use utf32_casefold_compat() (or
218 * utf8_casefold_compat(); using compatibility matching will ensure missing
219 * accents and so on aren't a problem.
221 * en_GB and en_US will probably be in any reasonable encoding for them.
223 while(buf < limit && *match) {
224 if(tolower((unsigned char)*buf) != tolower((unsigned char)*match))
229 if(buf != limit || *match)
234 /** @brief Match from table of locale-specific strings
235 * @param buf Start of subject
236 * @param limit End of subject
237 * @param lim Table of locale lookups
238 * @return Looked up value or -1
240 * The match is case-independent.
242 static int try_locale_match(const char *buf,
244 const struct locale_item_match *lim) {
245 /* This is not very efficient! A (correct) built-in implementation will
246 * presumably have more direct access to locale information. */
247 while(lim->value != -1) {
248 if(try_match(buf, limit, nl_langinfo(lim->key)))
255 static int try_numeric_match(const char *buf,
262 int ch = (unsigned char)*buf++;
263 if(ch >= '0' && ch <= '9') {
265 || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
266 return -1; /* overflow */
267 n = 10 * n + ch - '0';
271 if(n < low || n > high)
276 static const char *my_strptime_guts(const char *buf,
279 int fc, mod, spec, next, value;
281 /* nl_langinfo() is allowed to trash its last return value so we copy.
282 * (We're relying on it being usable at all in multithreaded environments
284 #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
286 char subformat[128]; \
289 s = nl_langinfo(EITEM); \
291 s = nl_langinfo(ITEM); \
293 s = nl_langinfo(ITEM); \
296 if(strlen(s) >= sizeof subformat) \
298 strcpy(subformat, s); \
299 if(!(buf = my_strptime_guts(buf, subformat, tm))) \
304 fc = (unsigned char)*format++;
306 /* Get the character defining the converstion specification */
307 spec = (unsigned char)*format++;
308 if(spec == 'E' || spec == 'O') {
309 /* Oops, there's a modifier first */
311 spec = (unsigned char)*format++;
315 return NULL; /* format string broken! */
316 /* See what the next directive is. The specification is written in terms
317 * of stopping the match at a character that matches the next directive.
318 * This implementation mirrors this aspect of the specification
320 next = (unsigned char)*format;
324 /* Next directive is whitespace, so bound the input string (at least)
326 while(*limit && !isspace((unsigned char)*limit))
328 } else if(next == '%') {
329 /* Prohibited: "The application shall ensure that there is
330 * white-space or other non-alphanumeric characters between any two
331 * conversion specifications". In fact we let alphanumerics
334 * Forbidding even %% seems a bit harsh but is consistent with the
335 * specification as written.
339 /* Next directive is a specific character, so bound the input string
340 * (at least) by that. This will work badly in the face of multibyte
341 * characters, but then the spec is vague about what kind of string
342 * we're dealing with anyway so you probably couldn't safely use them
343 * in the format string at least in any case. */
344 while(*limit && *limit != next)
348 limit = buf + strlen(buf);
350 case 'A': case 'a': /* day name (abbrev or full) */
351 if((value = try_locale_match(buf, limit, days)) == -1)
355 case 'B': case 'b': case 'h': /* month name (abbrev or full) */
356 if((value = try_locale_match(buf, limit, months)) == -1)
358 tm->tm_mon = value - 1;
360 case 'c': /* locale date+time */
361 USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
363 case 'C': /* century number 0-99 */
366 case 'd': case 'e': /* day of month 1-31 */
367 if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
371 case 'D': /* == "%m / %d / %y" */
372 if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
375 case 'H': /* hour 0-23 */
376 if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
380 case 'I': /* hour 1-12 */
383 case 'j': /* day 1-366 */
384 if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
386 tm->tm_yday = value - 1;
388 case 'm': /* month 1-12 */
389 if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
391 tm->tm_mon = value - 1;
393 case 'M': /* minute 0-59 */
394 if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
398 case 'n': case 't': /* any whitespace */
399 goto matchwhitespace;
400 case 'p': /* locale am/pm */
403 case 'r': /* == "%I : %M : %S %p" */
404 /* TODO actually this is locale-dependent; and we don't implement %I
405 * anyway, so it's not going to work even as it stands. */
406 if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
409 case 'R': /* == "%H : %M" */
410 if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
413 case 'S': /* seconds 0-60 */
414 if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
418 case 'U': /* week number from Sunday 0-53 */
421 case 'w': /* day number 0-6 from Sunday */
422 if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
426 case 'W': /* week number from Monday 0-53 */
429 case 'x': /* locale date format */
430 USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
432 case 'X': /* locale time format */
433 USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
435 case 'y': /* year mod 100 */
436 if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
438 if(value >= 0 && value <= 68)
439 value = 2000 + value;
440 else if(value >= 69 && value <= 99)
441 value = 1900 + value;
442 tm->tm_year = value - 1900;
445 if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
447 tm->tm_year = value - 1900;
452 /* The spec is a bit vague about what to do with invalid format
453 * strings. We return NULL immediately and hope someone will
458 } else if(isspace(fc)) {
460 /* Any format whitespace matches any number of input whitespace
461 * characters. The directive can formally contain more than one
462 * whitespace character; for the second and subsequent ones we'll match 0
463 * characters from the input. */
464 while(isspace((unsigned char)*buf))
468 /* Non-% non-whitespace characters must match themselves exactly */
469 if(fc != (unsigned char)*buf++)
473 /* When we run out of format string we return a pointer to the rest of the
478 /** @brief Reimplementation of strptime()
479 * @param buf Input buffer
480 * @param format Format string
481 * @param tm Where to put result
482 * @return Pointer to first unparsed input character, or NULL on error
485 * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
487 char *my_strptime(const char *buf,
490 /* Whether to overwrite or update is unspecified (rather bizarrely). This
491 * implementation does not overwrites, as xgetdate() depends on this
494 if(!(buf = my_strptime_guts(buf, format, tm)))
496 /* TODO various things we could/should do:
497 * - infer day/month from %j+year
498 * - infer day/month from %U/%W+%w/%a+year
499 * - infer hour from %p+%I
500 * - fill wday/yday from other fields