Commit | Line | Data |
---|---|---|
477f956c RK |
1 | /* strptime.c - partial strptime() reimplementation |
2 | * | |
3 | * (c) 2008 Richard Kettlewell. | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * 3. The name of the author may not be used to endorse or promote products | |
15 | * derived from this software without specific prior written permission. | |
16 | * | |
17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
27 | * SUCH DAMAGE. | |
28 | */ | |
29 | ||
30 | /* strptime() is here reimplemented because the FreeBSD (and older MacOS) one | |
31 | * is broken and does not report errors properly. See TODO remarks below for | |
32 | * some missing bits. */ | |
33 | ||
34 | #include <ctype.h> | |
35 | #include <limits.h> | |
36 | #include <string.h> | |
37 | #include <langinfo.h> | |
38 | #include "strptime.h" | |
39 | ||
598b07b7 | 40 | /** @brief Lookup table entry for locale-specific strings */ |
477f956c | 41 | struct locale_item_match { |
598b07b7 | 42 | /** @brief Locale key to try */ |
477f956c | 43 | nl_item key; |
598b07b7 RK |
44 | |
45 | /** @brief Value to return if value of @ref key matches subject string */ | |
477f956c RK |
46 | int value; |
47 | }; | |
48 | ||
49 | static const struct locale_item_match days[] = { | |
50 | { DAY_1, 0 }, | |
51 | { DAY_2, 1 }, | |
52 | { DAY_3, 2 }, | |
53 | { DAY_4, 3 }, | |
54 | { DAY_5, 4 }, | |
55 | { DAY_6, 5 }, | |
56 | { DAY_7, 6 }, | |
57 | { ABDAY_1, 0 }, | |
58 | { ABDAY_2, 1 }, | |
59 | { ABDAY_3, 2 }, | |
60 | { ABDAY_4, 3 }, | |
61 | { ABDAY_5, 4 }, | |
62 | { ABDAY_6, 5 }, | |
63 | { ABDAY_7, 6 }, | |
64 | { -1, -1 } | |
65 | }; | |
66 | ||
67 | static const struct locale_item_match months[] = { | |
68 | { MON_1, 1 }, | |
69 | { MON_2, 2 }, | |
70 | { MON_3, 3 }, | |
71 | { MON_4, 4 }, | |
72 | { MON_5, 5 }, | |
73 | { MON_6, 6 }, | |
74 | { MON_7, 7 }, | |
75 | { MON_8, 8 }, | |
76 | { MON_9, 9 }, | |
77 | { MON_10, 10 }, | |
78 | { MON_11, 11 }, | |
79 | { MON_12, 12 }, | |
80 | { ABMON_1, 1 }, | |
81 | { ABMON_2, 2 }, | |
82 | { ABMON_3, 3 }, | |
83 | { ABMON_4, 4 }, | |
84 | { ABMON_5, 5 }, | |
85 | { ABMON_6, 6 }, | |
86 | { ABMON_7, 7 }, | |
87 | { ABMON_8, 8 }, | |
88 | { ABMON_9, 9 }, | |
89 | { ABMON_10, 10 }, | |
90 | { ABMON_11, 11 }, | |
91 | { ABMON_12, 12 }, | |
92 | { -1, -1 }, | |
93 | }; | |
94 | ||
95 | /** @brief Match a string | |
96 | * @param buf Start of subject | |
97 | * @param limit End of subject | |
98 | * @param match String to match subject against | |
99 | * @return True if match == [buf,limit) otherwise false | |
100 | * | |
101 | * The match is case-independent at least in ASCII. | |
102 | */ | |
103 | static int try_match(const char *buf, | |
104 | const char *limit, | |
105 | const char *match) { | |
106 | /* TODO this won't work well outside single-byte encodings. A good bet is | |
107 | * probably to convert to Unicode and then use utf32_casefold_compat() (or | |
108 | * utf8_casefold_compat(); using compatibility matching will ensure missing | |
109 | * accents and so on aren't a problem. | |
110 | * | |
111 | * en_GB and en_US will probably be in any reasonable encoding for them. | |
112 | */ | |
113 | while(buf < limit && *match) { | |
114 | if(tolower((unsigned char)*buf) != tolower((unsigned char)*match)) | |
115 | return 0; | |
116 | ++buf; | |
117 | ++match; | |
118 | } | |
119 | if(buf != limit || *match) | |
120 | return 0; | |
121 | return 1; | |
122 | } | |
123 | ||
124 | /** @brief Match from table of locale-specific strings | |
125 | * @param buf Start of subject | |
126 | * @param limit End of subject | |
127 | * @param lim Table of locale lookups | |
128 | * @return Looked up value or -1 | |
129 | * | |
130 | * The match is case-independent. | |
131 | */ | |
132 | static int try_locale_match(const char *buf, | |
133 | const char *limit, | |
134 | const struct locale_item_match *lim) { | |
135 | /* This is not very efficient! A (correct) built-in implementation will | |
136 | * presumably have more direct access to locale information. */ | |
137 | while(lim->value != -1) { | |
138 | if(try_match(buf, limit, nl_langinfo(lim->key))) | |
139 | return lim->value; | |
140 | ++lim; | |
141 | } | |
142 | return -1; | |
143 | } | |
144 | ||
145 | static int try_numeric_match(const char *buf, | |
146 | const char *limit, | |
147 | unsigned low, | |
148 | unsigned high) { | |
149 | unsigned n = 0; | |
150 | ||
151 | while(buf < limit) { | |
152 | int ch = (unsigned char)*buf++; | |
153 | if(ch >= '0' && ch <= '9') { | |
154 | if(n > INT_MAX / 10 | |
155 | || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0')) | |
156 | return -1; /* overflow */ | |
157 | n = 10 * n + ch - '0'; | |
158 | } else | |
159 | return -1; | |
160 | } | |
161 | if(n < low || n > high) | |
162 | return -1; | |
163 | return (int)n; | |
164 | } | |
165 | ||
166 | static const char *my_strptime_guts(const char *buf, | |
167 | const char *format, | |
168 | struct tm *tm) { | |
169 | int fc, mod, spec, next, value; | |
170 | const char *limit; | |
171 | /* nl_langinfo() is allowed to trash its last return value so we copy. | |
172 | * (We're relying on it being usable at all in multithreaded environments | |
173 | * though.) */ | |
174 | #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \ | |
175 | const char *s; \ | |
176 | char subformat[128]; \ | |
177 | \ | |
178 | if(mod == 'E') { \ | |
179 | s = nl_langinfo(EITEM); \ | |
180 | if(!s || !*s) \ | |
181 | s = nl_langinfo(ITEM); \ | |
182 | } else \ | |
183 | s = nl_langinfo(ITEM); \ | |
184 | if(!s || !*s) \ | |
185 | s = DEF; \ | |
186 | if(strlen(s) >= sizeof subformat) \ | |
187 | s = DEF; \ | |
188 | strcpy(subformat, s); \ | |
189 | if(!(buf = my_strptime_guts(buf, subformat, tm))) \ | |
190 | return NULL; \ | |
191 | } while(0) | |
192 | ||
193 | while(*format) { | |
194 | fc = (unsigned char)*format++; | |
195 | if(fc == '%') { | |
196 | /* Get the character defining the converstion specification */ | |
197 | spec = (unsigned char)*format++; | |
198 | if(spec == 'E' || spec == 'O') { | |
199 | /* Oops, there's a modifier first */ | |
200 | mod = spec; | |
201 | spec = (unsigned char)*format++; | |
202 | } else | |
203 | mod = 0; | |
204 | if(!spec) | |
205 | return NULL; /* format string broken! */ | |
206 | /* See what the next directive is. The specification is written in terms | |
207 | * of stopping the match at a character that matches the next directive. | |
208 | * This implementation mirrors this aspect of the specification | |
209 | * directly. */ | |
210 | next = (unsigned char)*format; | |
211 | if(next) { | |
212 | limit = buf; | |
213 | if(isspace(next)) { | |
214 | /* Next directive is whitespace, so bound the input string (at least) | |
215 | * by that */ | |
216 | while(*limit && !isspace((unsigned char)*limit)) | |
217 | ++limit; | |
218 | } else if(next == '%') { | |
219 | /* Prohibited: "The application shall ensure that there is | |
220 | * white-space or other non-alphanumeric characters between any two | |
221 | * conversion specifications". In fact we let alphanumerics | |
222 | * through. | |
223 | * | |
224 | * Forbidding even %% seems a bit harsh but is consistent with the | |
225 | * specification as written. | |
226 | */ | |
227 | return NULL; | |
228 | } else { | |
229 | /* Next directive is a specific character, so bound the input string | |
230 | * (at least) by that. This will work badly in the face of multibyte | |
231 | * characters, but then the spec is vague about what kind of string | |
232 | * we're dealing with anyway so you probably couldn't safely use them | |
233 | * in the format string at least in any case. */ | |
234 | while(*limit && *limit != next) | |
235 | ++limit; | |
236 | } | |
237 | } else | |
238 | limit = buf + strlen(buf); | |
239 | switch(spec) { | |
240 | case 'A': case 'a': /* day name (abbrev or full) */ | |
241 | if((value = try_locale_match(buf, limit, days)) == -1) | |
242 | return NULL; | |
243 | tm->tm_wday = value; | |
244 | break; | |
245 | case 'B': case 'b': case 'h': /* month name (abbrev or full) */ | |
246 | if((value = try_locale_match(buf, limit, months)) == -1) | |
247 | return NULL; | |
248 | tm->tm_mon = value - 1; | |
249 | break; | |
250 | case 'c': /* locale date+time */ | |
251 | USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y"); | |
252 | break; | |
253 | case 'C': /* century number 0-99 */ | |
254 | /* TODO */ | |
255 | return NULL; | |
256 | case 'd': case 'e': /* day of month 1-31 */ | |
257 | if((value = try_numeric_match(buf, limit, 1, 31)) == -1) | |
258 | return NULL; | |
259 | tm->tm_mday = value; | |
260 | break; | |
261 | case 'D': /* == "%m / %d / %y" */ | |
262 | if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm))) | |
263 | return NULL; | |
264 | break; | |
265 | case 'H': /* hour 0-23 */ | |
266 | if((value = try_numeric_match(buf, limit, 0, 23)) == -1) | |
267 | return NULL; | |
268 | tm->tm_hour = value; | |
269 | break; | |
270 | case 'I': /* hour 1-12 */ | |
271 | /* TODO */ | |
272 | return NULL; | |
273 | case 'j': /* day 1-366 */ | |
274 | if((value = try_numeric_match(buf, limit, 1, 366)) == -1) | |
275 | return NULL; | |
276 | tm->tm_yday = value - 1; | |
277 | return NULL; | |
278 | case 'm': /* month 1-12 */ | |
279 | if((value = try_numeric_match(buf, limit, 1, 12)) == -1) | |
280 | return NULL; | |
281 | tm->tm_mon = value - 1; | |
282 | break; | |
283 | case 'M': /* minute 0-59 */ | |
284 | if((value = try_numeric_match(buf, limit, 0, 59)) == -1) | |
285 | return NULL; | |
286 | tm->tm_min = value; | |
287 | break; | |
288 | case 'n': case 't': /* any whitespace */ | |
289 | goto matchwhitespace; | |
290 | case 'p': /* locale am/pm */ | |
291 | /* TODO */ | |
292 | return NULL; | |
293 | case 'r': /* == "%I : %M : %S %p" */ | |
294 | /* TODO actually this is locale-dependent; and we don't implement %I | |
295 | * anyway, so it's not going to work even as it stands. */ | |
296 | if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm))) | |
297 | return NULL; | |
298 | break; | |
299 | case 'R': /* == "%H : %M" */ | |
300 | if(!(buf = my_strptime_guts(buf, "%H : %M", tm))) | |
301 | return NULL; | |
302 | break; | |
303 | case 'S': /* seconds 0-60 */ | |
304 | if((value = try_numeric_match(buf, limit, 0, 60)) == -1) | |
305 | return NULL; | |
306 | tm->tm_sec = value; | |
307 | break; | |
308 | case 'U': /* week number from Sunday 0-53 */ | |
309 | /* TODO */ | |
310 | return NULL; | |
311 | case 'w': /* day number 0-6 from Sunday */ | |
312 | if((value = try_numeric_match(buf, limit, 0, 6)) == -1) | |
313 | return NULL; | |
314 | tm->tm_wday = value; | |
315 | break; | |
316 | case 'W': /* week number from Monday 0-53 */ | |
317 | /* TODO */ | |
318 | return NULL; | |
319 | case 'x': /* locale date format */ | |
320 | USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y"); | |
321 | break; | |
322 | case 'X': /* locale time format */ | |
323 | USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S"); | |
324 | break; | |
325 | case 'y': /* year mod 100 */ | |
326 | if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1) | |
327 | return NULL; | |
328 | if(value >= 0 && value <= 68) | |
329 | value = 2000 + value; | |
330 | else if(value >= 69 && value <= 99) | |
331 | value = 1900 + value; | |
332 | tm->tm_year = value - 1900; | |
333 | break; | |
334 | case 'Y': /* year */ | |
335 | if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1) | |
336 | return NULL; | |
337 | tm->tm_year = value - 1900; | |
338 | break; | |
339 | case '%': | |
340 | goto matchself; | |
341 | default: | |
342 | /* The spec is a bit vague about what to do with invalid format | |
343 | * strings. We return NULL immediately and hope someone will | |
344 | * notice. */ | |
345 | return NULL; | |
346 | } | |
347 | buf = limit; | |
348 | } else if(isspace(fc)) { | |
349 | matchwhitespace: | |
350 | /* Any format whitespace matches any number of input whitespace | |
351 | * characters. The directive can formally contain more than one | |
352 | * whitespace character; for the second and subsequent ones we'll match 0 | |
353 | * characters from the input. */ | |
354 | while(isspace((unsigned char)*buf)) | |
355 | ++buf; | |
356 | } else { | |
357 | matchself: | |
358 | /* Non-% non-whitespace characters must match themselves exactly */ | |
359 | if(fc != (unsigned char)*buf++) | |
360 | return NULL; | |
361 | } | |
362 | } | |
363 | /* When we run out of format string we return a pointer to the rest of the | |
364 | * input. */ | |
365 | return buf; | |
366 | } | |
367 | ||
368 | /** @brief Reimplementation of strptime() | |
369 | * @param buf Input buffer | |
370 | * @param format Format string | |
371 | * @param tm Where to put result | |
372 | * @return Pointer to first unparsed input character, or NULL on error | |
373 | * | |
374 | * Based on <a | |
375 | * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>. | |
376 | */ | |
377 | char *my_strptime(const char *buf, | |
378 | const char *format, | |
379 | struct tm *tm) { | |
380 | /* Whether to overwrite or update is unspecified (rather bizarrely). This | |
381 | * implementation does not overwrites, as xgetdate() depends on this | |
382 | * behavior. */ | |
383 | ||
384 | if(!(buf = my_strptime_guts(buf, format, tm))) | |
385 | return NULL; | |
386 | /* TODO various things we could/should do: | |
387 | * - infer day/month from %j+year | |
388 | * - infer day/month from %U/%W+%w/%a+year | |
389 | * - infer hour from %p+%I | |
390 | * - fill wday/yday from other fields | |
391 | */ | |
392 | return (char *)buf; | |
393 | } | |
394 | ||
395 | /* | |
396 | Local Variables: | |
397 | c-basic-offset:2 | |
398 | comment-column:40 | |
399 | fill-column:79 | |
400 | indent-tabs-mode:nil | |
401 | End: | |
402 | */ |