| 1 | /* strptime.c - partial strptime() reimplementation |
| 2 | * |
| 3 | * Copyright (c) 2008, 2011, 2013 Richard Kettlewell. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * 3. The name of the author may not be used to endorse or promote products |
| 15 | * derived from this software without specific prior written permission. |
| 16 | * |
| 17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 27 | * SUCH DAMAGE. |
| 28 | */ |
| 29 | /** @file lib/strptime.c |
| 30 | * @brief strptime() reimplementation |
| 31 | * |
| 32 | * strptime() is here reimplemented because the FreeBSD (and older MacOS) one |
| 33 | * is broken and does not report errors properly. See TODO remarks below for |
| 34 | * some missing bits. |
| 35 | */ |
| 36 | |
| 37 | #if HAVE_CONFIG_H |
| 38 | # include <config.h> |
| 39 | #endif |
| 40 | |
| 41 | #include <ctype.h> |
| 42 | #include <limits.h> |
| 43 | #include <string.h> |
| 44 | #if HAVE_LANGINFO_H |
| 45 | # include <langinfo.h> |
| 46 | #endif |
| 47 | #include "strptime.h" |
| 48 | |
| 49 | #if !HAVE_LANGINFO_H |
| 50 | /* Fake plastic langinfo. Primarily for Windows. |
| 51 | * TODO WIN32 can we get these values out of the win32 api instead? */ |
| 52 | typedef enum { |
| 53 | DAY_1, |
| 54 | DAY_2, |
| 55 | DAY_3, |
| 56 | DAY_4, |
| 57 | DAY_5, |
| 58 | DAY_6, |
| 59 | DAY_7, |
| 60 | ABDAY_1, |
| 61 | ABDAY_2, |
| 62 | ABDAY_3, |
| 63 | ABDAY_4, |
| 64 | ABDAY_5, |
| 65 | ABDAY_6, |
| 66 | ABDAY_7, |
| 67 | MON_1, |
| 68 | MON_2, |
| 69 | MON_3, |
| 70 | MON_4, |
| 71 | MON_5, |
| 72 | MON_6, |
| 73 | MON_7, |
| 74 | MON_8, |
| 75 | MON_9, |
| 76 | MON_10, |
| 77 | MON_11, |
| 78 | MON_12, |
| 79 | ABMON_1, |
| 80 | ABMON_2, |
| 81 | ABMON_3, |
| 82 | ABMON_4, |
| 83 | ABMON_5, |
| 84 | ABMON_6, |
| 85 | ABMON_7, |
| 86 | ABMON_8, |
| 87 | ABMON_9, |
| 88 | ABMON_10, |
| 89 | ABMON_11, |
| 90 | ABMON_12, |
| 91 | D_FMT, |
| 92 | T_FMT, |
| 93 | D_T_FMT, |
| 94 | ERA_D_FMT, |
| 95 | ERA_T_FMT, |
| 96 | ERA_D_T_FMT, |
| 97 | } nl_item; |
| 98 | |
| 99 | const char *nl_langinfo(nl_item item) { |
| 100 | switch(item) { |
| 101 | case DAY_1: return "Sunday"; |
| 102 | case DAY_2: return "Monday"; |
| 103 | case DAY_3: return "Tuesday"; |
| 104 | case DAY_4: return "Wednesday"; |
| 105 | case DAY_5: return "Thursday"; |
| 106 | case DAY_6: return "Friday"; |
| 107 | case DAY_7: return "Saturday"; |
| 108 | case ABDAY_1: return "Sun"; |
| 109 | case ABDAY_2: return "Mon"; |
| 110 | case ABDAY_3: return "Tue"; |
| 111 | case ABDAY_4: return "Wed"; |
| 112 | case ABDAY_5: return "Thu"; |
| 113 | case ABDAY_6: return "Fri"; |
| 114 | case ABDAY_7: return "Sat"; |
| 115 | case MON_1: return "January"; |
| 116 | case MON_2: return "February"; |
| 117 | case MON_3: return "March"; |
| 118 | case MON_4: return "April"; |
| 119 | case MON_5: return "May"; |
| 120 | case MON_6: return "June"; |
| 121 | case MON_7: return "July"; |
| 122 | case MON_8: return "August"; |
| 123 | case MON_9: return "September"; |
| 124 | case MON_10: return "October"; |
| 125 | case MON_11: return "November"; |
| 126 | case MON_12: return "December"; |
| 127 | case ABMON_1: return "Jan"; |
| 128 | case ABMON_2: return "Feb"; |
| 129 | case ABMON_3: return "Mar"; |
| 130 | case ABMON_4: return "Apr"; |
| 131 | case ABMON_5: return "May"; |
| 132 | case ABMON_6: return "Jun"; |
| 133 | case ABMON_7: return "Jul"; |
| 134 | case ABMON_8: return "Aug"; |
| 135 | case ABMON_9: return "Sep"; |
| 136 | case ABMON_10: return "Oct"; |
| 137 | case ABMON_11: return "Nov"; |
| 138 | case ABMON_12: return "Dec"; |
| 139 | case D_FMT: return "%d/%m/%y"; |
| 140 | case T_FMT: return "%H:%M:%S"; |
| 141 | case D_T_FMT: return "%a %d %b %Y %H:%M:%S %Z"; |
| 142 | case ERA_D_FMT: return ""; |
| 143 | case ERA_T_FMT: return ""; |
| 144 | case ERA_D_T_FMT: return ""; |
| 145 | default: return 0; |
| 146 | } |
| 147 | } |
| 148 | #endif |
| 149 | |
| 150 | /** @brief Lookup table entry for locale-specific strings */ |
| 151 | struct locale_item_match { |
| 152 | /** @brief Locale key to try */ |
| 153 | nl_item key; |
| 154 | |
| 155 | /** @brief Value to return if value of @ref key matches subject string */ |
| 156 | int value; |
| 157 | }; |
| 158 | |
| 159 | static const struct locale_item_match days[] = { |
| 160 | { DAY_1, 0 }, |
| 161 | { DAY_2, 1 }, |
| 162 | { DAY_3, 2 }, |
| 163 | { DAY_4, 3 }, |
| 164 | { DAY_5, 4 }, |
| 165 | { DAY_6, 5 }, |
| 166 | { DAY_7, 6 }, |
| 167 | { ABDAY_1, 0 }, |
| 168 | { ABDAY_2, 1 }, |
| 169 | { ABDAY_3, 2 }, |
| 170 | { ABDAY_4, 3 }, |
| 171 | { ABDAY_5, 4 }, |
| 172 | { ABDAY_6, 5 }, |
| 173 | { ABDAY_7, 6 }, |
| 174 | { -1, -1 } |
| 175 | }; |
| 176 | |
| 177 | static const struct locale_item_match months[] = { |
| 178 | { MON_1, 1 }, |
| 179 | { MON_2, 2 }, |
| 180 | { MON_3, 3 }, |
| 181 | { MON_4, 4 }, |
| 182 | { MON_5, 5 }, |
| 183 | { MON_6, 6 }, |
| 184 | { MON_7, 7 }, |
| 185 | { MON_8, 8 }, |
| 186 | { MON_9, 9 }, |
| 187 | { MON_10, 10 }, |
| 188 | { MON_11, 11 }, |
| 189 | { MON_12, 12 }, |
| 190 | { ABMON_1, 1 }, |
| 191 | { ABMON_2, 2 }, |
| 192 | { ABMON_3, 3 }, |
| 193 | { ABMON_4, 4 }, |
| 194 | { ABMON_5, 5 }, |
| 195 | { ABMON_6, 6 }, |
| 196 | { ABMON_7, 7 }, |
| 197 | { ABMON_8, 8 }, |
| 198 | { ABMON_9, 9 }, |
| 199 | { ABMON_10, 10 }, |
| 200 | { ABMON_11, 11 }, |
| 201 | { ABMON_12, 12 }, |
| 202 | { -1, -1 }, |
| 203 | }; |
| 204 | |
| 205 | /** @brief Match a string |
| 206 | * @param buf Start of subject |
| 207 | * @param limit End of subject |
| 208 | * @param match String to match subject against |
| 209 | * @return True if match == [buf,limit) otherwise false |
| 210 | * |
| 211 | * The match is case-independent at least in ASCII. |
| 212 | */ |
| 213 | static int try_match(const char *buf, |
| 214 | const char *limit, |
| 215 | const char *match) { |
| 216 | /* TODO this won't work well outside single-byte encodings. A good bet is |
| 217 | * probably to convert to Unicode and then use utf32_casefold_compat() (or |
| 218 | * utf8_casefold_compat(); using compatibility matching will ensure missing |
| 219 | * accents and so on aren't a problem. |
| 220 | * |
| 221 | * en_GB and en_US will probably be in any reasonable encoding for them. |
| 222 | */ |
| 223 | while(buf < limit && *match) { |
| 224 | if(tolower((unsigned char)*buf) != tolower((unsigned char)*match)) |
| 225 | return 0; |
| 226 | ++buf; |
| 227 | ++match; |
| 228 | } |
| 229 | if(buf != limit || *match) |
| 230 | return 0; |
| 231 | return 1; |
| 232 | } |
| 233 | |
| 234 | /** @brief Match from table of locale-specific strings |
| 235 | * @param buf Start of subject |
| 236 | * @param limit End of subject |
| 237 | * @param lim Table of locale lookups |
| 238 | * @return Looked up value or -1 |
| 239 | * |
| 240 | * The match is case-independent. |
| 241 | */ |
| 242 | static int try_locale_match(const char *buf, |
| 243 | const char *limit, |
| 244 | const struct locale_item_match *lim) { |
| 245 | /* This is not very efficient! A (correct) built-in implementation will |
| 246 | * presumably have more direct access to locale information. */ |
| 247 | while(lim->value != -1) { |
| 248 | if(try_match(buf, limit, nl_langinfo(lim->key))) |
| 249 | return lim->value; |
| 250 | ++lim; |
| 251 | } |
| 252 | return -1; |
| 253 | } |
| 254 | |
| 255 | static int try_numeric_match(const char *buf, |
| 256 | const char *limit, |
| 257 | unsigned low, |
| 258 | unsigned high) { |
| 259 | unsigned n = 0; |
| 260 | |
| 261 | while(buf < limit) { |
| 262 | int ch = (unsigned char)*buf++; |
| 263 | if(ch >= '0' && ch <= '9') { |
| 264 | if(n > INT_MAX / 10 |
| 265 | || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0')) |
| 266 | return -1; /* overflow */ |
| 267 | n = 10 * n + ch - '0'; |
| 268 | } else |
| 269 | return -1; |
| 270 | } |
| 271 | if(n < low || n > high) |
| 272 | return -1; |
| 273 | return (int)n; |
| 274 | } |
| 275 | |
| 276 | static const char *my_strptime_guts(const char *buf, |
| 277 | const char *format, |
| 278 | struct tm *tm) { |
| 279 | int fc, mod, spec, next, value; |
| 280 | const char *limit; |
| 281 | /* nl_langinfo() is allowed to trash its last return value so we copy. |
| 282 | * (We're relying on it being usable at all in multithreaded environments |
| 283 | * though.) */ |
| 284 | #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \ |
| 285 | const char *s; \ |
| 286 | char subformat[128]; \ |
| 287 | \ |
| 288 | if(mod == 'E') { \ |
| 289 | s = nl_langinfo(EITEM); \ |
| 290 | if(!s || !*s) \ |
| 291 | s = nl_langinfo(ITEM); \ |
| 292 | } else \ |
| 293 | s = nl_langinfo(ITEM); \ |
| 294 | if(!s || !*s) \ |
| 295 | s = DEF; \ |
| 296 | if(strlen(s) >= sizeof subformat) \ |
| 297 | s = DEF; \ |
| 298 | strcpy(subformat, s); \ |
| 299 | if(!(buf = my_strptime_guts(buf, subformat, tm))) \ |
| 300 | return NULL; \ |
| 301 | } while(0) |
| 302 | |
| 303 | while(*format) { |
| 304 | fc = (unsigned char)*format++; |
| 305 | if(fc == '%') { |
| 306 | /* Get the character defining the converstion specification */ |
| 307 | spec = (unsigned char)*format++; |
| 308 | if(spec == 'E' || spec == 'O') { |
| 309 | /* Oops, there's a modifier first */ |
| 310 | mod = spec; |
| 311 | spec = (unsigned char)*format++; |
| 312 | } else |
| 313 | mod = 0; |
| 314 | if(!spec) |
| 315 | return NULL; /* format string broken! */ |
| 316 | /* See what the next directive is. The specification is written in terms |
| 317 | * of stopping the match at a character that matches the next directive. |
| 318 | * This implementation mirrors this aspect of the specification |
| 319 | * directly. */ |
| 320 | next = (unsigned char)*format; |
| 321 | if(next) { |
| 322 | limit = buf; |
| 323 | if(isspace(next)) { |
| 324 | /* Next directive is whitespace, so bound the input string (at least) |
| 325 | * by that */ |
| 326 | while(*limit && !isspace((unsigned char)*limit)) |
| 327 | ++limit; |
| 328 | } else if(next == '%') { |
| 329 | /* Prohibited: "The application shall ensure that there is |
| 330 | * white-space or other non-alphanumeric characters between any two |
| 331 | * conversion specifications". In fact we let alphanumerics |
| 332 | * through. |
| 333 | * |
| 334 | * Forbidding even %% seems a bit harsh but is consistent with the |
| 335 | * specification as written. |
| 336 | */ |
| 337 | return NULL; |
| 338 | } else { |
| 339 | /* Next directive is a specific character, so bound the input string |
| 340 | * (at least) by that. This will work badly in the face of multibyte |
| 341 | * characters, but then the spec is vague about what kind of string |
| 342 | * we're dealing with anyway so you probably couldn't safely use them |
| 343 | * in the format string at least in any case. */ |
| 344 | while(*limit && *limit != next) |
| 345 | ++limit; |
| 346 | } |
| 347 | } else |
| 348 | limit = buf + strlen(buf); |
| 349 | switch(spec) { |
| 350 | case 'A': case 'a': /* day name (abbrev or full) */ |
| 351 | if((value = try_locale_match(buf, limit, days)) == -1) |
| 352 | return NULL; |
| 353 | tm->tm_wday = value; |
| 354 | break; |
| 355 | case 'B': case 'b': case 'h': /* month name (abbrev or full) */ |
| 356 | if((value = try_locale_match(buf, limit, months)) == -1) |
| 357 | return NULL; |
| 358 | tm->tm_mon = value - 1; |
| 359 | break; |
| 360 | case 'c': /* locale date+time */ |
| 361 | USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y"); |
| 362 | break; |
| 363 | case 'C': /* century number 0-99 */ |
| 364 | /* TODO */ |
| 365 | return NULL; |
| 366 | case 'd': case 'e': /* day of month 1-31 */ |
| 367 | if((value = try_numeric_match(buf, limit, 1, 31)) == -1) |
| 368 | return NULL; |
| 369 | tm->tm_mday = value; |
| 370 | break; |
| 371 | case 'D': /* == "%m / %d / %y" */ |
| 372 | if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm))) |
| 373 | return NULL; |
| 374 | break; |
| 375 | case 'H': /* hour 0-23 */ |
| 376 | if((value = try_numeric_match(buf, limit, 0, 23)) == -1) |
| 377 | return NULL; |
| 378 | tm->tm_hour = value; |
| 379 | break; |
| 380 | case 'I': /* hour 1-12 */ |
| 381 | /* TODO */ |
| 382 | return NULL; |
| 383 | case 'j': /* day 1-366 */ |
| 384 | if((value = try_numeric_match(buf, limit, 1, 366)) == -1) |
| 385 | return NULL; |
| 386 | tm->tm_yday = value - 1; |
| 387 | return NULL; |
| 388 | case 'm': /* month 1-12 */ |
| 389 | if((value = try_numeric_match(buf, limit, 1, 12)) == -1) |
| 390 | return NULL; |
| 391 | tm->tm_mon = value - 1; |
| 392 | break; |
| 393 | case 'M': /* minute 0-59 */ |
| 394 | if((value = try_numeric_match(buf, limit, 0, 59)) == -1) |
| 395 | return NULL; |
| 396 | tm->tm_min = value; |
| 397 | break; |
| 398 | case 'n': case 't': /* any whitespace */ |
| 399 | goto matchwhitespace; |
| 400 | case 'p': /* locale am/pm */ |
| 401 | /* TODO */ |
| 402 | return NULL; |
| 403 | case 'r': /* == "%I : %M : %S %p" */ |
| 404 | /* TODO actually this is locale-dependent; and we don't implement %I |
| 405 | * anyway, so it's not going to work even as it stands. */ |
| 406 | if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm))) |
| 407 | return NULL; |
| 408 | break; |
| 409 | case 'R': /* == "%H : %M" */ |
| 410 | if(!(buf = my_strptime_guts(buf, "%H : %M", tm))) |
| 411 | return NULL; |
| 412 | break; |
| 413 | case 'S': /* seconds 0-60 */ |
| 414 | if((value = try_numeric_match(buf, limit, 0, 60)) == -1) |
| 415 | return NULL; |
| 416 | tm->tm_sec = value; |
| 417 | break; |
| 418 | case 'U': /* week number from Sunday 0-53 */ |
| 419 | /* TODO */ |
| 420 | return NULL; |
| 421 | case 'w': /* day number 0-6 from Sunday */ |
| 422 | if((value = try_numeric_match(buf, limit, 0, 6)) == -1) |
| 423 | return NULL; |
| 424 | tm->tm_wday = value; |
| 425 | break; |
| 426 | case 'W': /* week number from Monday 0-53 */ |
| 427 | /* TODO */ |
| 428 | return NULL; |
| 429 | case 'x': /* locale date format */ |
| 430 | USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y"); |
| 431 | break; |
| 432 | case 'X': /* locale time format */ |
| 433 | USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S"); |
| 434 | break; |
| 435 | case 'y': /* year mod 100 */ |
| 436 | if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1) |
| 437 | return NULL; |
| 438 | if(value >= 0 && value <= 68) |
| 439 | value = 2000 + value; |
| 440 | else if(value >= 69 && value <= 99) |
| 441 | value = 1900 + value; |
| 442 | tm->tm_year = value - 1900; |
| 443 | break; |
| 444 | case 'Y': /* year */ |
| 445 | if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1) |
| 446 | return NULL; |
| 447 | tm->tm_year = value - 1900; |
| 448 | break; |
| 449 | case '%': |
| 450 | goto matchself; |
| 451 | default: |
| 452 | /* The spec is a bit vague about what to do with invalid format |
| 453 | * strings. We return NULL immediately and hope someone will |
| 454 | * notice. */ |
| 455 | return NULL; |
| 456 | } |
| 457 | buf = limit; |
| 458 | } else if(isspace(fc)) { |
| 459 | matchwhitespace: |
| 460 | /* Any format whitespace matches any number of input whitespace |
| 461 | * characters. The directive can formally contain more than one |
| 462 | * whitespace character; for the second and subsequent ones we'll match 0 |
| 463 | * characters from the input. */ |
| 464 | while(isspace((unsigned char)*buf)) |
| 465 | ++buf; |
| 466 | } else { |
| 467 | matchself: |
| 468 | /* Non-% non-whitespace characters must match themselves exactly */ |
| 469 | if(fc != (unsigned char)*buf++) |
| 470 | return NULL; |
| 471 | } |
| 472 | } |
| 473 | /* When we run out of format string we return a pointer to the rest of the |
| 474 | * input. */ |
| 475 | return buf; |
| 476 | } |
| 477 | |
| 478 | /** @brief Reimplementation of strptime() |
| 479 | * @param buf Input buffer |
| 480 | * @param format Format string |
| 481 | * @param tm Where to put result |
| 482 | * @return Pointer to first unparsed input character, or NULL on error |
| 483 | * |
| 484 | * Based on <a |
| 485 | * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>. |
| 486 | */ |
| 487 | char *my_strptime(const char *buf, |
| 488 | const char *format, |
| 489 | struct tm *tm) { |
| 490 | /* Whether to overwrite or update is unspecified (rather bizarrely). This |
| 491 | * implementation does not overwrites, as xgetdate() depends on this |
| 492 | * behavior. */ |
| 493 | |
| 494 | if(!(buf = my_strptime_guts(buf, format, tm))) |
| 495 | return NULL; |
| 496 | /* TODO various things we could/should do: |
| 497 | * - infer day/month from %j+year |
| 498 | * - infer day/month from %U/%W+%w/%a+year |
| 499 | * - infer hour from %p+%I |
| 500 | * - fill wday/yday from other fields |
| 501 | */ |
| 502 | return (char *)buf; |
| 503 | } |
| 504 | |
| 505 | /* |
| 506 | Local Variables: |
| 507 | c-basic-offset:2 |
| 508 | comment-column:40 |
| 509 | fill-column:79 |
| 510 | indent-tabs-mode:nil |
| 511 | End: |
| 512 | */ |