1 /* strptime.c - partial strptime() reimplementation
3 * Copyright (c) 2008, 2011 Richard Kettlewell.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 /** @file lib/strptime.c
30 * @brief strptime() reimplementation
32 * strptime() is here reimplemented because the FreeBSD (and older MacOS) one
33 * is broken and does not report errors properly. See TODO remarks below for
43 /** @brief Lookup table entry for locale-specific strings */
44 struct locale_item_match {
45 /** @brief Locale key to try */
48 /** @brief Value to return if value of @ref key matches subject string */
52 static const struct locale_item_match days[] = {
70 static const struct locale_item_match months[] = {
98 /** @brief Match a string
99 * @param buf Start of subject
100 * @param limit End of subject
101 * @param match String to match subject against
102 * @return True if match == [buf,limit) otherwise false
104 * The match is case-independent at least in ASCII.
106 static int try_match(const char *buf,
109 /* TODO this won't work well outside single-byte encodings. A good bet is
110 * probably to convert to Unicode and then use utf32_casefold_compat() (or
111 * utf8_casefold_compat(); using compatibility matching will ensure missing
112 * accents and so on aren't a problem.
114 * en_GB and en_US will probably be in any reasonable encoding for them.
116 while(buf < limit && *match) {
117 if(tolower((unsigned char)*buf) != tolower((unsigned char)*match))
122 if(buf != limit || *match)
127 /** @brief Match from table of locale-specific strings
128 * @param buf Start of subject
129 * @param limit End of subject
130 * @param lim Table of locale lookups
131 * @return Looked up value or -1
133 * The match is case-independent.
135 static int try_locale_match(const char *buf,
137 const struct locale_item_match *lim) {
138 /* This is not very efficient! A (correct) built-in implementation will
139 * presumably have more direct access to locale information. */
140 while(lim->value != -1) {
141 if(try_match(buf, limit, nl_langinfo(lim->key)))
148 static int try_numeric_match(const char *buf,
155 int ch = (unsigned char)*buf++;
156 if(ch >= '0' && ch <= '9') {
158 || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
159 return -1; /* overflow */
160 n = 10 * n + ch - '0';
164 if(n < low || n > high)
169 static const char *my_strptime_guts(const char *buf,
172 int fc, mod, spec, next, value;
174 /* nl_langinfo() is allowed to trash its last return value so we copy.
175 * (We're relying on it being usable at all in multithreaded environments
177 #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
179 char subformat[128]; \
182 s = nl_langinfo(EITEM); \
184 s = nl_langinfo(ITEM); \
186 s = nl_langinfo(ITEM); \
189 if(strlen(s) >= sizeof subformat) \
191 strcpy(subformat, s); \
192 if(!(buf = my_strptime_guts(buf, subformat, tm))) \
197 fc = (unsigned char)*format++;
199 /* Get the character defining the converstion specification */
200 spec = (unsigned char)*format++;
201 if(spec == 'E' || spec == 'O') {
202 /* Oops, there's a modifier first */
204 spec = (unsigned char)*format++;
208 return NULL; /* format string broken! */
209 /* See what the next directive is. The specification is written in terms
210 * of stopping the match at a character that matches the next directive.
211 * This implementation mirrors this aspect of the specification
213 next = (unsigned char)*format;
217 /* Next directive is whitespace, so bound the input string (at least)
219 while(*limit && !isspace((unsigned char)*limit))
221 } else if(next == '%') {
222 /* Prohibited: "The application shall ensure that there is
223 * white-space or other non-alphanumeric characters between any two
224 * conversion specifications". In fact we let alphanumerics
227 * Forbidding even %% seems a bit harsh but is consistent with the
228 * specification as written.
232 /* Next directive is a specific character, so bound the input string
233 * (at least) by that. This will work badly in the face of multibyte
234 * characters, but then the spec is vague about what kind of string
235 * we're dealing with anyway so you probably couldn't safely use them
236 * in the format string at least in any case. */
237 while(*limit && *limit != next)
241 limit = buf + strlen(buf);
243 case 'A': case 'a': /* day name (abbrev or full) */
244 if((value = try_locale_match(buf, limit, days)) == -1)
248 case 'B': case 'b': case 'h': /* month name (abbrev or full) */
249 if((value = try_locale_match(buf, limit, months)) == -1)
251 tm->tm_mon = value - 1;
253 case 'c': /* locale date+time */
254 USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
256 case 'C': /* century number 0-99 */
259 case 'd': case 'e': /* day of month 1-31 */
260 if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
264 case 'D': /* == "%m / %d / %y" */
265 if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
268 case 'H': /* hour 0-23 */
269 if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
273 case 'I': /* hour 1-12 */
276 case 'j': /* day 1-366 */
277 if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
279 tm->tm_yday = value - 1;
281 case 'm': /* month 1-12 */
282 if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
284 tm->tm_mon = value - 1;
286 case 'M': /* minute 0-59 */
287 if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
291 case 'n': case 't': /* any whitespace */
292 goto matchwhitespace;
293 case 'p': /* locale am/pm */
296 case 'r': /* == "%I : %M : %S %p" */
297 /* TODO actually this is locale-dependent; and we don't implement %I
298 * anyway, so it's not going to work even as it stands. */
299 if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
302 case 'R': /* == "%H : %M" */
303 if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
306 case 'S': /* seconds 0-60 */
307 if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
311 case 'U': /* week number from Sunday 0-53 */
314 case 'w': /* day number 0-6 from Sunday */
315 if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
319 case 'W': /* week number from Monday 0-53 */
322 case 'x': /* locale date format */
323 USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
325 case 'X': /* locale time format */
326 USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
328 case 'y': /* year mod 100 */
329 if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
331 if(value >= 0 && value <= 68)
332 value = 2000 + value;
333 else if(value >= 69 && value <= 99)
334 value = 1900 + value;
335 tm->tm_year = value - 1900;
338 if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
340 tm->tm_year = value - 1900;
345 /* The spec is a bit vague about what to do with invalid format
346 * strings. We return NULL immediately and hope someone will
351 } else if(isspace(fc)) {
353 /* Any format whitespace matches any number of input whitespace
354 * characters. The directive can formally contain more than one
355 * whitespace character; for the second and subsequent ones we'll match 0
356 * characters from the input. */
357 while(isspace((unsigned char)*buf))
361 /* Non-% non-whitespace characters must match themselves exactly */
362 if(fc != (unsigned char)*buf++)
366 /* When we run out of format string we return a pointer to the rest of the
371 /** @brief Reimplementation of strptime()
372 * @param buf Input buffer
373 * @param format Format string
374 * @param tm Where to put result
375 * @return Pointer to first unparsed input character, or NULL on error
378 * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
380 char *my_strptime(const char *buf,
383 /* Whether to overwrite or update is unspecified (rather bizarrely). This
384 * implementation does not overwrites, as xgetdate() depends on this
387 if(!(buf = my_strptime_guts(buf, format, tm)))
389 /* TODO various things we could/should do:
390 * - infer day/month from %j+year
391 * - infer day/month from %U/%W+%w/%a+year
392 * - infer hour from %p+%I
393 * - fill wday/yday from other fields