| 1 | /* strptime.c - partial strptime() reimplementation |
| 2 | * |
| 3 | * (c) 2008 Richard Kettlewell. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * 3. The name of the author may not be used to endorse or promote products |
| 15 | * derived from this software without specific prior written permission. |
| 16 | * |
| 17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 27 | * SUCH DAMAGE. |
| 28 | */ |
| 29 | |
| 30 | /* strptime() is here reimplemented because the FreeBSD (and older MacOS) one |
| 31 | * is broken and does not report errors properly. See TODO remarks below for |
| 32 | * some missing bits. */ |
| 33 | |
| 34 | #include <ctype.h> |
| 35 | #include <limits.h> |
| 36 | #include <string.h> |
| 37 | #include <langinfo.h> |
| 38 | #include "strptime.h" |
| 39 | |
| 40 | struct locale_item_match { |
| 41 | nl_item key; |
| 42 | int value; |
| 43 | }; |
| 44 | |
| 45 | static const struct locale_item_match days[] = { |
| 46 | { DAY_1, 0 }, |
| 47 | { DAY_2, 1 }, |
| 48 | { DAY_3, 2 }, |
| 49 | { DAY_4, 3 }, |
| 50 | { DAY_5, 4 }, |
| 51 | { DAY_6, 5 }, |
| 52 | { DAY_7, 6 }, |
| 53 | { ABDAY_1, 0 }, |
| 54 | { ABDAY_2, 1 }, |
| 55 | { ABDAY_3, 2 }, |
| 56 | { ABDAY_4, 3 }, |
| 57 | { ABDAY_5, 4 }, |
| 58 | { ABDAY_6, 5 }, |
| 59 | { ABDAY_7, 6 }, |
| 60 | { -1, -1 } |
| 61 | }; |
| 62 | |
| 63 | static const struct locale_item_match months[] = { |
| 64 | { MON_1, 1 }, |
| 65 | { MON_2, 2 }, |
| 66 | { MON_3, 3 }, |
| 67 | { MON_4, 4 }, |
| 68 | { MON_5, 5 }, |
| 69 | { MON_6, 6 }, |
| 70 | { MON_7, 7 }, |
| 71 | { MON_8, 8 }, |
| 72 | { MON_9, 9 }, |
| 73 | { MON_10, 10 }, |
| 74 | { MON_11, 11 }, |
| 75 | { MON_12, 12 }, |
| 76 | { ABMON_1, 1 }, |
| 77 | { ABMON_2, 2 }, |
| 78 | { ABMON_3, 3 }, |
| 79 | { ABMON_4, 4 }, |
| 80 | { ABMON_5, 5 }, |
| 81 | { ABMON_6, 6 }, |
| 82 | { ABMON_7, 7 }, |
| 83 | { ABMON_8, 8 }, |
| 84 | { ABMON_9, 9 }, |
| 85 | { ABMON_10, 10 }, |
| 86 | { ABMON_11, 11 }, |
| 87 | { ABMON_12, 12 }, |
| 88 | { -1, -1 }, |
| 89 | }; |
| 90 | |
| 91 | /** @brief Match a string |
| 92 | * @param buf Start of subject |
| 93 | * @param limit End of subject |
| 94 | * @param match String to match subject against |
| 95 | * @return True if match == [buf,limit) otherwise false |
| 96 | * |
| 97 | * The match is case-independent at least in ASCII. |
| 98 | */ |
| 99 | static int try_match(const char *buf, |
| 100 | const char *limit, |
| 101 | const char *match) { |
| 102 | /* TODO this won't work well outside single-byte encodings. A good bet is |
| 103 | * probably to convert to Unicode and then use utf32_casefold_compat() (or |
| 104 | * utf8_casefold_compat(); using compatibility matching will ensure missing |
| 105 | * accents and so on aren't a problem. |
| 106 | * |
| 107 | * en_GB and en_US will probably be in any reasonable encoding for them. |
| 108 | */ |
| 109 | while(buf < limit && *match) { |
| 110 | if(tolower((unsigned char)*buf) != tolower((unsigned char)*match)) |
| 111 | return 0; |
| 112 | ++buf; |
| 113 | ++match; |
| 114 | } |
| 115 | if(buf != limit || *match) |
| 116 | return 0; |
| 117 | return 1; |
| 118 | } |
| 119 | |
| 120 | /** @brief Match from table of locale-specific strings |
| 121 | * @param buf Start of subject |
| 122 | * @param limit End of subject |
| 123 | * @param lim Table of locale lookups |
| 124 | * @return Looked up value or -1 |
| 125 | * |
| 126 | * The match is case-independent. |
| 127 | */ |
| 128 | static int try_locale_match(const char *buf, |
| 129 | const char *limit, |
| 130 | const struct locale_item_match *lim) { |
| 131 | /* This is not very efficient! A (correct) built-in implementation will |
| 132 | * presumably have more direct access to locale information. */ |
| 133 | while(lim->value != -1) { |
| 134 | if(try_match(buf, limit, nl_langinfo(lim->key))) |
| 135 | return lim->value; |
| 136 | ++lim; |
| 137 | } |
| 138 | return -1; |
| 139 | } |
| 140 | |
| 141 | static int try_numeric_match(const char *buf, |
| 142 | const char *limit, |
| 143 | unsigned low, |
| 144 | unsigned high) { |
| 145 | unsigned n = 0; |
| 146 | |
| 147 | while(buf < limit) { |
| 148 | int ch = (unsigned char)*buf++; |
| 149 | if(ch >= '0' && ch <= '9') { |
| 150 | if(n > INT_MAX / 10 |
| 151 | || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0')) |
| 152 | return -1; /* overflow */ |
| 153 | n = 10 * n + ch - '0'; |
| 154 | } else |
| 155 | return -1; |
| 156 | } |
| 157 | if(n < low || n > high) |
| 158 | return -1; |
| 159 | return (int)n; |
| 160 | } |
| 161 | |
| 162 | static const char *my_strptime_guts(const char *buf, |
| 163 | const char *format, |
| 164 | struct tm *tm) { |
| 165 | int fc, mod, spec, next, value; |
| 166 | const char *limit; |
| 167 | /* nl_langinfo() is allowed to trash its last return value so we copy. |
| 168 | * (We're relying on it being usable at all in multithreaded environments |
| 169 | * though.) */ |
| 170 | #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \ |
| 171 | const char *s; \ |
| 172 | char subformat[128]; \ |
| 173 | \ |
| 174 | if(mod == 'E') { \ |
| 175 | s = nl_langinfo(EITEM); \ |
| 176 | if(!s || !*s) \ |
| 177 | s = nl_langinfo(ITEM); \ |
| 178 | } else \ |
| 179 | s = nl_langinfo(ITEM); \ |
| 180 | if(!s || !*s) \ |
| 181 | s = DEF; \ |
| 182 | if(strlen(s) >= sizeof subformat) \ |
| 183 | s = DEF; \ |
| 184 | strcpy(subformat, s); \ |
| 185 | if(!(buf = my_strptime_guts(buf, subformat, tm))) \ |
| 186 | return NULL; \ |
| 187 | } while(0) |
| 188 | |
| 189 | while(*format) { |
| 190 | fc = (unsigned char)*format++; |
| 191 | if(fc == '%') { |
| 192 | /* Get the character defining the converstion specification */ |
| 193 | spec = (unsigned char)*format++; |
| 194 | if(spec == 'E' || spec == 'O') { |
| 195 | /* Oops, there's a modifier first */ |
| 196 | mod = spec; |
| 197 | spec = (unsigned char)*format++; |
| 198 | } else |
| 199 | mod = 0; |
| 200 | if(!spec) |
| 201 | return NULL; /* format string broken! */ |
| 202 | /* See what the next directive is. The specification is written in terms |
| 203 | * of stopping the match at a character that matches the next directive. |
| 204 | * This implementation mirrors this aspect of the specification |
| 205 | * directly. */ |
| 206 | next = (unsigned char)*format; |
| 207 | if(next) { |
| 208 | limit = buf; |
| 209 | if(isspace(next)) { |
| 210 | /* Next directive is whitespace, so bound the input string (at least) |
| 211 | * by that */ |
| 212 | while(*limit && !isspace((unsigned char)*limit)) |
| 213 | ++limit; |
| 214 | } else if(next == '%') { |
| 215 | /* Prohibited: "The application shall ensure that there is |
| 216 | * white-space or other non-alphanumeric characters between any two |
| 217 | * conversion specifications". In fact we let alphanumerics |
| 218 | * through. |
| 219 | * |
| 220 | * Forbidding even %% seems a bit harsh but is consistent with the |
| 221 | * specification as written. |
| 222 | */ |
| 223 | return NULL; |
| 224 | } else { |
| 225 | /* Next directive is a specific character, so bound the input string |
| 226 | * (at least) by that. This will work badly in the face of multibyte |
| 227 | * characters, but then the spec is vague about what kind of string |
| 228 | * we're dealing with anyway so you probably couldn't safely use them |
| 229 | * in the format string at least in any case. */ |
| 230 | while(*limit && *limit != next) |
| 231 | ++limit; |
| 232 | } |
| 233 | } else |
| 234 | limit = buf + strlen(buf); |
| 235 | switch(spec) { |
| 236 | case 'A': case 'a': /* day name (abbrev or full) */ |
| 237 | if((value = try_locale_match(buf, limit, days)) == -1) |
| 238 | return NULL; |
| 239 | tm->tm_wday = value; |
| 240 | break; |
| 241 | case 'B': case 'b': case 'h': /* month name (abbrev or full) */ |
| 242 | if((value = try_locale_match(buf, limit, months)) == -1) |
| 243 | return NULL; |
| 244 | tm->tm_mon = value - 1; |
| 245 | break; |
| 246 | case 'c': /* locale date+time */ |
| 247 | USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y"); |
| 248 | break; |
| 249 | case 'C': /* century number 0-99 */ |
| 250 | /* TODO */ |
| 251 | return NULL; |
| 252 | case 'd': case 'e': /* day of month 1-31 */ |
| 253 | if((value = try_numeric_match(buf, limit, 1, 31)) == -1) |
| 254 | return NULL; |
| 255 | tm->tm_mday = value; |
| 256 | break; |
| 257 | case 'D': /* == "%m / %d / %y" */ |
| 258 | if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm))) |
| 259 | return NULL; |
| 260 | break; |
| 261 | case 'H': /* hour 0-23 */ |
| 262 | if((value = try_numeric_match(buf, limit, 0, 23)) == -1) |
| 263 | return NULL; |
| 264 | tm->tm_hour = value; |
| 265 | break; |
| 266 | case 'I': /* hour 1-12 */ |
| 267 | /* TODO */ |
| 268 | return NULL; |
| 269 | case 'j': /* day 1-366 */ |
| 270 | if((value = try_numeric_match(buf, limit, 1, 366)) == -1) |
| 271 | return NULL; |
| 272 | tm->tm_yday = value - 1; |
| 273 | return NULL; |
| 274 | case 'm': /* month 1-12 */ |
| 275 | if((value = try_numeric_match(buf, limit, 1, 12)) == -1) |
| 276 | return NULL; |
| 277 | tm->tm_mon = value - 1; |
| 278 | break; |
| 279 | case 'M': /* minute 0-59 */ |
| 280 | if((value = try_numeric_match(buf, limit, 0, 59)) == -1) |
| 281 | return NULL; |
| 282 | tm->tm_min = value; |
| 283 | break; |
| 284 | case 'n': case 't': /* any whitespace */ |
| 285 | goto matchwhitespace; |
| 286 | case 'p': /* locale am/pm */ |
| 287 | /* TODO */ |
| 288 | return NULL; |
| 289 | case 'r': /* == "%I : %M : %S %p" */ |
| 290 | /* TODO actually this is locale-dependent; and we don't implement %I |
| 291 | * anyway, so it's not going to work even as it stands. */ |
| 292 | if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm))) |
| 293 | return NULL; |
| 294 | break; |
| 295 | case 'R': /* == "%H : %M" */ |
| 296 | if(!(buf = my_strptime_guts(buf, "%H : %M", tm))) |
| 297 | return NULL; |
| 298 | break; |
| 299 | case 'S': /* seconds 0-60 */ |
| 300 | if((value = try_numeric_match(buf, limit, 0, 60)) == -1) |
| 301 | return NULL; |
| 302 | tm->tm_sec = value; |
| 303 | break; |
| 304 | case 'U': /* week number from Sunday 0-53 */ |
| 305 | /* TODO */ |
| 306 | return NULL; |
| 307 | case 'w': /* day number 0-6 from Sunday */ |
| 308 | if((value = try_numeric_match(buf, limit, 0, 6)) == -1) |
| 309 | return NULL; |
| 310 | tm->tm_wday = value; |
| 311 | break; |
| 312 | case 'W': /* week number from Monday 0-53 */ |
| 313 | /* TODO */ |
| 314 | return NULL; |
| 315 | case 'x': /* locale date format */ |
| 316 | USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y"); |
| 317 | break; |
| 318 | case 'X': /* locale time format */ |
| 319 | USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S"); |
| 320 | break; |
| 321 | case 'y': /* year mod 100 */ |
| 322 | if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1) |
| 323 | return NULL; |
| 324 | if(value >= 0 && value <= 68) |
| 325 | value = 2000 + value; |
| 326 | else if(value >= 69 && value <= 99) |
| 327 | value = 1900 + value; |
| 328 | tm->tm_year = value - 1900; |
| 329 | break; |
| 330 | case 'Y': /* year */ |
| 331 | if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1) |
| 332 | return NULL; |
| 333 | tm->tm_year = value - 1900; |
| 334 | break; |
| 335 | case '%': |
| 336 | goto matchself; |
| 337 | default: |
| 338 | /* The spec is a bit vague about what to do with invalid format |
| 339 | * strings. We return NULL immediately and hope someone will |
| 340 | * notice. */ |
| 341 | return NULL; |
| 342 | } |
| 343 | buf = limit; |
| 344 | } else if(isspace(fc)) { |
| 345 | matchwhitespace: |
| 346 | /* Any format whitespace matches any number of input whitespace |
| 347 | * characters. The directive can formally contain more than one |
| 348 | * whitespace character; for the second and subsequent ones we'll match 0 |
| 349 | * characters from the input. */ |
| 350 | while(isspace((unsigned char)*buf)) |
| 351 | ++buf; |
| 352 | } else { |
| 353 | matchself: |
| 354 | /* Non-% non-whitespace characters must match themselves exactly */ |
| 355 | if(fc != (unsigned char)*buf++) |
| 356 | return NULL; |
| 357 | } |
| 358 | } |
| 359 | /* When we run out of format string we return a pointer to the rest of the |
| 360 | * input. */ |
| 361 | return buf; |
| 362 | } |
| 363 | |
| 364 | /** @brief Reimplementation of strptime() |
| 365 | * @param buf Input buffer |
| 366 | * @param format Format string |
| 367 | * @param tm Where to put result |
| 368 | * @return Pointer to first unparsed input character, or NULL on error |
| 369 | * |
| 370 | * Based on <a |
| 371 | * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>. |
| 372 | */ |
| 373 | char *my_strptime(const char *buf, |
| 374 | const char *format, |
| 375 | struct tm *tm) { |
| 376 | /* Whether to overwrite or update is unspecified (rather bizarrely). This |
| 377 | * implementation does not overwrites, as xgetdate() depends on this |
| 378 | * behavior. */ |
| 379 | |
| 380 | if(!(buf = my_strptime_guts(buf, format, tm))) |
| 381 | return NULL; |
| 382 | /* TODO various things we could/should do: |
| 383 | * - infer day/month from %j+year |
| 384 | * - infer day/month from %U/%W+%w/%a+year |
| 385 | * - infer hour from %p+%I |
| 386 | * - fill wday/yday from other fields |
| 387 | */ |
| 388 | return (char *)buf; |
| 389 | } |
| 390 | |
| 391 | /* |
| 392 | Local Variables: |
| 393 | c-basic-offset:2 |
| 394 | comment-column:40 |
| 395 | fill-column:79 |
| 396 | indent-tabs-mode:nil |
| 397 | End: |
| 398 | */ |