chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/* strptime.c - partial strptime() reimplementation
	2	*
	3	* Copyright (c) 2008, 2011, 2013 Richard Kettlewell.
	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	* 1. Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* 2. Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* 3. The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	18	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	19	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	20	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	21	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	22	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	23	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	24	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	25	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	26	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	27	* SUCH DAMAGE.
	28	*/
	29	/** @file lib/strptime.c
	30	* @brief strptime() reimplementation
	31	*
	32	* strptime() is here reimplemented because the FreeBSD (and older MacOS) one
	33	* is broken and does not report errors properly. See TODO remarks below for
	34	* some missing bits.
	35	*/
	36
	37	#if HAVE_CONFIG_H
	38	# include <config.h>
	39	#endif
	40
	41	#include <ctype.h>
	42	#include <limits.h>
	43	#include <string.h>
	44	#if HAVE_LANGINFO_H
	45	# include <langinfo.h>
	46	#endif
	47	#include "strptime.h"
	48
	49	#if !HAVE_LANGINFO_H
	50	/* Fake plastic langinfo. Primarily for Windows.
	51	* TODO WIN32 can we get these values out of the win32 api instead? */
	52	typedef enum {
	53	DAY_1,
	54	DAY_2,
	55	DAY_3,
	56	DAY_4,
	57	DAY_5,
	58	DAY_6,
	59	DAY_7,
	60	ABDAY_1,
	61	ABDAY_2,
	62	ABDAY_3,
	63	ABDAY_4,
	64	ABDAY_5,
	65	ABDAY_6,
	66	ABDAY_7,
	67	MON_1,
	68	MON_2,
	69	MON_3,
	70	MON_4,
	71	MON_5,
	72	MON_6,
	73	MON_7,
	74	MON_8,
	75	MON_9,
	76	MON_10,
	77	MON_11,
	78	MON_12,
	79	ABMON_1,
	80	ABMON_2,
	81	ABMON_3,
	82	ABMON_4,
	83	ABMON_5,
	84	ABMON_6,
	85	ABMON_7,
	86	ABMON_8,
	87	ABMON_9,
	88	ABMON_10,
	89	ABMON_11,
	90	ABMON_12,
	91	D_FMT,
	92	T_FMT,
	93	D_T_FMT,
	94	ERA_D_FMT,
	95	ERA_T_FMT,
	96	ERA_D_T_FMT,
	97	} nl_item;
	98
	99	const char *nl_langinfo(nl_item item) {
	100	switch(item) {
	101	case DAY_1: return "Sunday";
	102	case DAY_2: return "Monday";
	103	case DAY_3: return "Tuesday";
	104	case DAY_4: return "Wednesday";
	105	case DAY_5: return "Thursday";
	106	case DAY_6: return "Friday";
	107	case DAY_7: return "Saturday";
	108	case ABDAY_1: return "Sun";
	109	case ABDAY_2: return "Mon";
	110	case ABDAY_3: return "Tue";
	111	case ABDAY_4: return "Wed";
	112	case ABDAY_5: return "Thu";
	113	case ABDAY_6: return "Fri";
	114	case ABDAY_7: return "Sat";
	115	case MON_1: return "January";
	116	case MON_2: return "February";
	117	case MON_3: return "March";
	118	case MON_4: return "April";
	119	case MON_5: return "May";
	120	case MON_6: return "June";
	121	case MON_7: return "July";
	122	case MON_8: return "August";
	123	case MON_9: return "September";
	124	case MON_10: return "October";
	125	case MON_11: return "November";
	126	case MON_12: return "December";
	127	case ABMON_1: return "Jan";
	128	case ABMON_2: return "Feb";
	129	case ABMON_3: return "Mar";
	130	case ABMON_4: return "Apr";
	131	case ABMON_5: return "May";
	132	case ABMON_6: return "Jun";
	133	case ABMON_7: return "Jul";
	134	case ABMON_8: return "Aug";
	135	case ABMON_9: return "Sep";
	136	case ABMON_10: return "Oct";
	137	case ABMON_11: return "Nov";
	138	case ABMON_12: return "Dec";
	139	case D_FMT: return "%d/%m/%y";
	140	case T_FMT: return "%H:%M:%S";
	141	case D_T_FMT: return "%a %d %b %Y %H:%M:%S %Z";
	142	case ERA_D_FMT: return "";
	143	case ERA_T_FMT: return "";
	144	case ERA_D_T_FMT: return "";
	145	default: return 0;
	146	}
	147	}
	148	#endif
	149
	150	/** @brief Lookup table entry for locale-specific strings */
	151	struct locale_item_match {
	152	/** @brief Locale key to try */
	153	nl_item key;
	154
	155	/** @brief Value to return if value of @ref key matches subject string */
	156	int value;
	157	};
	158
	159	static const struct locale_item_match days[] = {
	160	{ DAY_1, 0 },
	161	{ DAY_2, 1 },
	162	{ DAY_3, 2 },
	163	{ DAY_4, 3 },
	164	{ DAY_5, 4 },
	165	{ DAY_6, 5 },
	166	{ DAY_7, 6 },
	167	{ ABDAY_1, 0 },
	168	{ ABDAY_2, 1 },
	169	{ ABDAY_3, 2 },
	170	{ ABDAY_4, 3 },
	171	{ ABDAY_5, 4 },
	172	{ ABDAY_6, 5 },
	173	{ ABDAY_7, 6 },
	174	{ -1, -1 }
	175	};
	176
	177	static const struct locale_item_match months[] = {
	178	{ MON_1, 1 },
	179	{ MON_2, 2 },
	180	{ MON_3, 3 },
	181	{ MON_4, 4 },
	182	{ MON_5, 5 },
	183	{ MON_6, 6 },
	184	{ MON_7, 7 },
	185	{ MON_8, 8 },
	186	{ MON_9, 9 },
	187	{ MON_10, 10 },
	188	{ MON_11, 11 },
	189	{ MON_12, 12 },
	190	{ ABMON_1, 1 },
	191	{ ABMON_2, 2 },
	192	{ ABMON_3, 3 },
	193	{ ABMON_4, 4 },
	194	{ ABMON_5, 5 },
	195	{ ABMON_6, 6 },
	196	{ ABMON_7, 7 },
	197	{ ABMON_8, 8 },
	198	{ ABMON_9, 9 },
	199	{ ABMON_10, 10 },
	200	{ ABMON_11, 11 },
	201	{ ABMON_12, 12 },
	202	{ -1, -1 },
	203	};
	204
	205	/** @brief Match a string
	206	* @param buf Start of subject
	207	* @param limit End of subject
	208	* @param match String to match subject against
	209	* @return True if match == [buf,limit) otherwise false
	210	*
	211	* The match is case-independent at least in ASCII.
	212	*/
	213	static int try_match(const char *buf,
	214	const char *limit,
	215	const char *match) {
	216	/* TODO this won't work well outside single-byte encodings. A good bet is
	217	* probably to convert to Unicode and then use utf32_casefold_compat() (or
	218	* utf8_casefold_compat(); using compatibility matching will ensure missing
	219	* accents and so on aren't a problem.
	220	*
	221	* en_GB and en_US will probably be in any reasonable encoding for them.
	222	*/
	223	while(buf < limit && *match) {
	224	if(tolower((unsigned char)buf) != tolower((unsigned char)match))
	225	return 0;
	226	++buf;
	227	++match;
	228	}
	229	if(buf != limit \|\| *match)
	230	return 0;
	231	return 1;
	232	}
	233
	234	/** @brief Match from table of locale-specific strings
	235	* @param buf Start of subject
	236	* @param limit End of subject
	237	* @param lim Table of locale lookups
	238	* @return Looked up value or -1
	239	*
	240	* The match is case-independent.
	241	*/
	242	static int try_locale_match(const char *buf,
	243	const char *limit,
	244	const struct locale_item_match *lim) {
	245	/* This is not very efficient! A (correct) built-in implementation will
	246	* presumably have more direct access to locale information. */
	247	while(lim->value != -1) {
	248	if(try_match(buf, limit, nl_langinfo(lim->key)))
	249	return lim->value;
	250	++lim;
	251	}
	252	return -1;
	253	}
	254
	255	static int try_numeric_match(const char *buf,
	256	const char *limit,
	257	unsigned low,
	258	unsigned high) {
	259	unsigned n = 0;
	260
	261	while(buf < limit) {
	262	int ch = (unsigned char)*buf++;
	263	if(ch >= '0' && ch <= '9') {
	264	if(n > INT_MAX / 10
	265	\|\| (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
	266	return -1; /* overflow */
	267	n = 10 * n + ch - '0';
	268	} else
	269	return -1;
	270	}
	271	if(n < low \|\| n > high)
	272	return -1;
	273	return (int)n;
	274	}
	275
	276	static const char my_strptime_guts(const char buf,
	277	const char *format,
	278	struct tm *tm) {
	279	int fc, mod, spec, next, value;
	280	const char *limit;
	281	/* nl_langinfo() is allowed to trash its last return value so we copy.
	282	* (We're relying on it being usable at all in multithreaded environments
	283	* though.) */
	284	#define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
	285	const char *s; \
	286	char subformat[128]; \
	287	\
	288	if(mod == 'E') { \
	289	s = nl_langinfo(EITEM); \
	290	if(!s \|\| !*s) \
	291	s = nl_langinfo(ITEM); \
	292	} else \
	293	s = nl_langinfo(ITEM); \
	294	if(!s \|\| !*s) \
	295	s = DEF; \
	296	if(strlen(s) >= sizeof subformat) \
	297	s = DEF; \
	298	strcpy(subformat, s); \
	299	if(!(buf = my_strptime_guts(buf, subformat, tm))) \
	300	return NULL; \
	301	} while(0)
	302
	303	while(*format) {
	304	fc = (unsigned char)*format++;
	305	if(fc == '%') {
	306	/* Get the character defining the converstion specification */
	307	spec = (unsigned char)*format++;
	308	if(spec == 'E' \|\| spec == 'O') {
	309	/* Oops, there's a modifier first */
	310	mod = spec;
	311	spec = (unsigned char)*format++;
	312	} else
	313	mod = 0;
	314	if(!spec)
	315	return NULL; /* format string broken! */
	316	/* See what the next directive is. The specification is written in terms
	317	* of stopping the match at a character that matches the next directive.
	318	* This implementation mirrors this aspect of the specification
	319	* directly. */
	320	next = (unsigned char)*format;
	321	if(next) {
	322	limit = buf;
	323	if(isspace(next)) {
	324	/* Next directive is whitespace, so bound the input string (at least)
	325	* by that */
	326	while(limit && !isspace((unsigned char)limit))
	327	++limit;
	328	} else if(next == '%') {
	329	/* Prohibited: "The application shall ensure that there is
	330	* white-space or other non-alphanumeric characters between any two
	331	* conversion specifications". In fact we let alphanumerics
	332	* through.
	333	*
	334	* Forbidding even %% seems a bit harsh but is consistent with the
	335	* specification as written.
	336	*/
	337	return NULL;
	338	} else {
	339	/* Next directive is a specific character, so bound the input string
	340	* (at least) by that. This will work badly in the face of multibyte
	341	* characters, but then the spec is vague about what kind of string
	342	* we're dealing with anyway so you probably couldn't safely use them
	343	* in the format string at least in any case. */
	344	while(limit && limit != next)
	345	++limit;
	346	}
	347	} else
	348	limit = buf + strlen(buf);
	349	switch(spec) {
	350	case 'A': case 'a': /* day name (abbrev or full) */
	351	if((value = try_locale_match(buf, limit, days)) == -1)
	352	return NULL;
	353	tm->tm_wday = value;
	354	break;
	355	case 'B': case 'b': case 'h': /* month name (abbrev or full) */
	356	if((value = try_locale_match(buf, limit, months)) == -1)
	357	return NULL;
	358	tm->tm_mon = value - 1;
	359	break;
	360	case 'c': /* locale date+time */
	361	USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
	362	break;
	363	case 'C': /* century number 0-99 */
	364	/* TODO */
	365	return NULL;
	366	case 'd': case 'e': /* day of month 1-31 */
	367	if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
	368	return NULL;
	369	tm->tm_mday = value;
	370	break;
	371	case 'D': /* == "%m / %d / %y" */
	372	if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
	373	return NULL;
	374	break;
	375	case 'H': /* hour 0-23 */
	376	if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
	377	return NULL;
	378	tm->tm_hour = value;
	379	break;
	380	case 'I': /* hour 1-12 */
	381	/* TODO */
	382	return NULL;
	383	case 'j': /* day 1-366 */
	384	if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
	385	return NULL;
	386	tm->tm_yday = value - 1;
	387	return NULL;
	388	case 'm': /* month 1-12 */
	389	if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
	390	return NULL;
	391	tm->tm_mon = value - 1;
	392	break;
	393	case 'M': /* minute 0-59 */
	394	if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
	395	return NULL;
	396	tm->tm_min = value;
	397	break;
	398	case 'n': case 't': /* any whitespace */
	399	goto matchwhitespace;
	400	case 'p': /* locale am/pm */
	401	/* TODO */
	402	return NULL;
	403	case 'r': /* == "%I : %M : %S %p" */
	404	/* TODO actually this is locale-dependent; and we don't implement %I
	405	* anyway, so it's not going to work even as it stands. */
	406	if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
	407	return NULL;
	408	break;
	409	case 'R': /* == "%H : %M" */
	410	if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
	411	return NULL;
	412	break;
	413	case 'S': /* seconds 0-60 */
	414	if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
	415	return NULL;
	416	tm->tm_sec = value;
	417	break;
	418	case 'U': /* week number from Sunday 0-53 */
	419	/* TODO */
	420	return NULL;
	421	case 'w': /* day number 0-6 from Sunday */
	422	if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
	423	return NULL;
	424	tm->tm_wday = value;
	425	break;
	426	case 'W': /* week number from Monday 0-53 */
	427	/* TODO */
	428	return NULL;
	429	case 'x': /* locale date format */
	430	USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
	431	break;
	432	case 'X': /* locale time format */
	433	USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
	434	break;
	435	case 'y': /* year mod 100 */
	436	if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
	437	return NULL;
	438	if(value >= 0 && value <= 68)
	439	value = 2000 + value;
	440	else if(value >= 69 && value <= 99)
	441	value = 1900 + value;
	442	tm->tm_year = value - 1900;
	443	break;
	444	case 'Y': /* year */
	445	if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
	446	return NULL;
	447	tm->tm_year = value - 1900;
	448	break;
	449	case '%':
	450	goto matchself;
	451	default:
	452	/* The spec is a bit vague about what to do with invalid format
	453	* strings. We return NULL immediately and hope someone will
	454	* notice. */
	455	return NULL;
	456	}
	457	buf = limit;
	458	} else if(isspace(fc)) {
	459	matchwhitespace:
	460	/* Any format whitespace matches any number of input whitespace
	461	* characters. The directive can formally contain more than one
	462	* whitespace character; for the second and subsequent ones we'll match 0
	463	* characters from the input. */
	464	while(isspace((unsigned char)*buf))
	465	++buf;
	466	} else {
	467	matchself:
	468	/* Non-% non-whitespace characters must match themselves exactly */
	469	if(fc != (unsigned char)*buf++)
	470	return NULL;
	471	}
	472	}
	473	/* When we run out of format string we return a pointer to the rest of the
	474	* input. */
	475	return buf;
	476	}
	477
	478	/** @brief Reimplementation of strptime()
	479	* @param buf Input buffer
	480	* @param format Format string
	481	* @param tm Where to put result
	482	* @return Pointer to first unparsed input character, or NULL on error
	483	*
	484	* Based on <a
	485	* href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
	486	*/
	487	char my_strptime(const char buf,
	488	const char *format,
	489	struct tm *tm) {
	490	/* Whether to overwrite or update is unspecified (rather bizarrely). This
	491	* implementation does not overwrites, as xgetdate() depends on this
	492	* behavior. */
	493
	494	if(!(buf = my_strptime_guts(buf, format, tm)))
	495	return NULL;
	496	/* TODO various things we could/should do:
	497	* - infer day/month from %j+year
	498	* - infer day/month from %U/%W+%w/%a+year
	499	* - infer hour from %p+%I
	500	* - fill wday/yday from other fields
	501	*/
	502	return (char *)buf;
	503	}
	504
	505	/*
	506	Local Variables:
	507	c-basic-offset:2
	508	comment-column:40
	509	fill-column:79
	510	indent-tabs-mode:nil
	511	End:
	512	*/