chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/* strptime.c - partial strptime() reimplementation
	2	*
	3	* (c) 2008 Richard Kettlewell.
	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	* 1. Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* 2. Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* 3. The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	18	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	19	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	20	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	21	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	22	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	23	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	24	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	25	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	26	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	27	* SUCH DAMAGE.
	28	*/
	29
	30	/* strptime() is here reimplemented because the FreeBSD (and older MacOS) one
	31	* is broken and does not report errors properly. See TODO remarks below for
	32	* some missing bits. */
	33
	34	#include <ctype.h>
	35	#include <limits.h>
	36	#include <string.h>
	37	#include <langinfo.h>
	38	#include "strptime.h"
	39
	40	struct locale_item_match {
	41	nl_item key;
	42	int value;
	43	};
	44
	45	static const struct locale_item_match days[] = {
	46	{ DAY_1, 0 },
	47	{ DAY_2, 1 },
	48	{ DAY_3, 2 },
	49	{ DAY_4, 3 },
	50	{ DAY_5, 4 },
	51	{ DAY_6, 5 },
	52	{ DAY_7, 6 },
	53	{ ABDAY_1, 0 },
	54	{ ABDAY_2, 1 },
	55	{ ABDAY_3, 2 },
	56	{ ABDAY_4, 3 },
	57	{ ABDAY_5, 4 },
	58	{ ABDAY_6, 5 },
	59	{ ABDAY_7, 6 },
	60	{ -1, -1 }
	61	};
	62
	63	static const struct locale_item_match months[] = {
	64	{ MON_1, 1 },
	65	{ MON_2, 2 },
	66	{ MON_3, 3 },
	67	{ MON_4, 4 },
	68	{ MON_5, 5 },
	69	{ MON_6, 6 },
	70	{ MON_7, 7 },
	71	{ MON_8, 8 },
	72	{ MON_9, 9 },
	73	{ MON_10, 10 },
	74	{ MON_11, 11 },
	75	{ MON_12, 12 },
	76	{ ABMON_1, 1 },
	77	{ ABMON_2, 2 },
	78	{ ABMON_3, 3 },
	79	{ ABMON_4, 4 },
	80	{ ABMON_5, 5 },
	81	{ ABMON_6, 6 },
	82	{ ABMON_7, 7 },
	83	{ ABMON_8, 8 },
	84	{ ABMON_9, 9 },
	85	{ ABMON_10, 10 },
	86	{ ABMON_11, 11 },
	87	{ ABMON_12, 12 },
	88	{ -1, -1 },
	89	};
	90
	91	/** @brief Match a string
	92	* @param buf Start of subject
	93	* @param limit End of subject
	94	* @param match String to match subject against
	95	* @return True if match == [buf,limit) otherwise false
	96	*
	97	* The match is case-independent at least in ASCII.
	98	*/
	99	static int try_match(const char *buf,
	100	const char *limit,
	101	const char *match) {
	102	/* TODO this won't work well outside single-byte encodings. A good bet is
	103	* probably to convert to Unicode and then use utf32_casefold_compat() (or
	104	* utf8_casefold_compat(); using compatibility matching will ensure missing
	105	* accents and so on aren't a problem.
	106	*
	107	* en_GB and en_US will probably be in any reasonable encoding for them.
	108	*/
	109	while(buf < limit && *match) {
	110	if(tolower((unsigned char)buf) != tolower((unsigned char)match))
	111	return 0;
	112	++buf;
	113	++match;
	114	}
	115	if(buf != limit \|\| *match)
	116	return 0;
	117	return 1;
	118	}
	119
	120	/** @brief Match from table of locale-specific strings
	121	* @param buf Start of subject
	122	* @param limit End of subject
	123	* @param lim Table of locale lookups
	124	* @return Looked up value or -1
	125	*
	126	* The match is case-independent.
	127	*/
	128	static int try_locale_match(const char *buf,
	129	const char *limit,
	130	const struct locale_item_match *lim) {
	131	/* This is not very efficient! A (correct) built-in implementation will
	132	* presumably have more direct access to locale information. */
	133	while(lim->value != -1) {
	134	if(try_match(buf, limit, nl_langinfo(lim->key)))
	135	return lim->value;
	136	++lim;
	137	}
	138	return -1;
	139	}
	140
	141	static int try_numeric_match(const char *buf,
	142	const char *limit,
	143	unsigned low,
	144	unsigned high) {
	145	unsigned n = 0;
	146
	147	while(buf < limit) {
	148	int ch = (unsigned char)*buf++;
	149	if(ch >= '0' && ch <= '9') {
	150	if(n > INT_MAX / 10
	151	\|\| (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
	152	return -1; /* overflow */
	153	n = 10 * n + ch - '0';
	154	} else
	155	return -1;
	156	}
	157	if(n < low \|\| n > high)
	158	return -1;
	159	return (int)n;
	160	}
	161
	162	static const char my_strptime_guts(const char buf,
	163	const char *format,
	164	struct tm *tm) {
	165	int fc, mod, spec, next, value;
	166	const char *limit;
	167	/* nl_langinfo() is allowed to trash its last return value so we copy.
	168	* (We're relying on it being usable at all in multithreaded environments
	169	* though.) */
	170	#define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
	171	const char *s; \
	172	char subformat[128]; \
	173	\
	174	if(mod == 'E') { \
	175	s = nl_langinfo(EITEM); \
	176	if(!s \|\| !*s) \
	177	s = nl_langinfo(ITEM); \
	178	} else \
	179	s = nl_langinfo(ITEM); \
	180	if(!s \|\| !*s) \
	181	s = DEF; \
	182	if(strlen(s) >= sizeof subformat) \
	183	s = DEF; \
	184	strcpy(subformat, s); \
	185	if(!(buf = my_strptime_guts(buf, subformat, tm))) \
	186	return NULL; \
	187	} while(0)
	188
	189	while(*format) {
	190	fc = (unsigned char)*format++;
	191	if(fc == '%') {
	192	/* Get the character defining the converstion specification */
	193	spec = (unsigned char)*format++;
	194	if(spec == 'E' \|\| spec == 'O') {
	195	/* Oops, there's a modifier first */
	196	mod = spec;
	197	spec = (unsigned char)*format++;
	198	} else
	199	mod = 0;
	200	if(!spec)
	201	return NULL; /* format string broken! */
	202	/* See what the next directive is. The specification is written in terms
	203	* of stopping the match at a character that matches the next directive.
	204	* This implementation mirrors this aspect of the specification
	205	* directly. */
	206	next = (unsigned char)*format;
	207	if(next) {
	208	limit = buf;
	209	if(isspace(next)) {
	210	/* Next directive is whitespace, so bound the input string (at least)
	211	* by that */
	212	while(limit && !isspace((unsigned char)limit))
	213	++limit;
	214	} else if(next == '%') {
	215	/* Prohibited: "The application shall ensure that there is
	216	* white-space or other non-alphanumeric characters between any two
	217	* conversion specifications". In fact we let alphanumerics
	218	* through.
	219	*
	220	* Forbidding even %% seems a bit harsh but is consistent with the
	221	* specification as written.
	222	*/
	223	return NULL;
	224	} else {
	225	/* Next directive is a specific character, so bound the input string
	226	* (at least) by that. This will work badly in the face of multibyte
	227	* characters, but then the spec is vague about what kind of string
	228	* we're dealing with anyway so you probably couldn't safely use them
	229	* in the format string at least in any case. */
	230	while(limit && limit != next)
	231	++limit;
	232	}
	233	} else
	234	limit = buf + strlen(buf);
	235	switch(spec) {
	236	case 'A': case 'a': /* day name (abbrev or full) */
	237	if((value = try_locale_match(buf, limit, days)) == -1)
	238	return NULL;
	239	tm->tm_wday = value;
	240	break;
	241	case 'B': case 'b': case 'h': /* month name (abbrev or full) */
	242	if((value = try_locale_match(buf, limit, months)) == -1)
	243	return NULL;
	244	tm->tm_mon = value - 1;
	245	break;
	246	case 'c': /* locale date+time */
	247	USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
	248	break;
	249	case 'C': /* century number 0-99 */
	250	/* TODO */
	251	return NULL;
	252	case 'd': case 'e': /* day of month 1-31 */
	253	if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
	254	return NULL;
	255	tm->tm_mday = value;
	256	break;
	257	case 'D': /* == "%m / %d / %y" */
	258	if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
	259	return NULL;
	260	break;
	261	case 'H': /* hour 0-23 */
	262	if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
	263	return NULL;
	264	tm->tm_hour = value;
	265	break;
	266	case 'I': /* hour 1-12 */
	267	/* TODO */
	268	return NULL;
	269	case 'j': /* day 1-366 */
	270	if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
	271	return NULL;
	272	tm->tm_yday = value - 1;
	273	return NULL;
	274	case 'm': /* month 1-12 */
	275	if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
	276	return NULL;
	277	tm->tm_mon = value - 1;
	278	break;
	279	case 'M': /* minute 0-59 */
	280	if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
	281	return NULL;
	282	tm->tm_min = value;
	283	break;
	284	case 'n': case 't': /* any whitespace */
	285	goto matchwhitespace;
	286	case 'p': /* locale am/pm */
	287	/* TODO */
	288	return NULL;
	289	case 'r': /* == "%I : %M : %S %p" */
	290	/* TODO actually this is locale-dependent; and we don't implement %I
	291	* anyway, so it's not going to work even as it stands. */
	292	if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
	293	return NULL;
	294	break;
	295	case 'R': /* == "%H : %M" */
	296	if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
	297	return NULL;
	298	break;
	299	case 'S': /* seconds 0-60 */
	300	if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
	301	return NULL;
	302	tm->tm_sec = value;
	303	break;
	304	case 'U': /* week number from Sunday 0-53 */
	305	/* TODO */
	306	return NULL;
	307	case 'w': /* day number 0-6 from Sunday */
	308	if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
	309	return NULL;
	310	tm->tm_wday = value;
	311	break;
	312	case 'W': /* week number from Monday 0-53 */
	313	/* TODO */
	314	return NULL;
	315	case 'x': /* locale date format */
	316	USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
	317	break;
	318	case 'X': /* locale time format */
	319	USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
	320	break;
	321	case 'y': /* year mod 100 */
	322	if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
	323	return NULL;
	324	if(value >= 0 && value <= 68)
	325	value = 2000 + value;
	326	else if(value >= 69 && value <= 99)
	327	value = 1900 + value;
	328	tm->tm_year = value - 1900;
	329	break;
	330	case 'Y': /* year */
	331	if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
	332	return NULL;
	333	tm->tm_year = value - 1900;
	334	break;
	335	case '%':
	336	goto matchself;
	337	default:
	338	/* The spec is a bit vague about what to do with invalid format
	339	* strings. We return NULL immediately and hope someone will
	340	* notice. */
	341	return NULL;
	342	}
	343	buf = limit;
	344	} else if(isspace(fc)) {
	345	matchwhitespace:
	346	/* Any format whitespace matches any number of input whitespace
	347	* characters. The directive can formally contain more than one
	348	* whitespace character; for the second and subsequent ones we'll match 0
	349	* characters from the input. */
	350	while(isspace((unsigned char)*buf))
	351	++buf;
	352	} else {
	353	matchself:
	354	/* Non-% non-whitespace characters must match themselves exactly */
	355	if(fc != (unsigned char)*buf++)
	356	return NULL;
	357	}
	358	}
	359	/* When we run out of format string we return a pointer to the rest of the
	360	* input. */
	361	return buf;
	362	}
	363
	364	/** @brief Reimplementation of strptime()
	365	* @param buf Input buffer
	366	* @param format Format string
	367	* @param tm Where to put result
	368	* @return Pointer to first unparsed input character, or NULL on error
	369	*
	370	* Based on <a
	371	* href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
	372	*/
	373	char my_strptime(const char buf,
	374	const char *format,
	375	struct tm *tm) {
	376	/* Whether to overwrite or update is unspecified (rather bizarrely). This
	377	* implementation does not overwrites, as xgetdate() depends on this
	378	* behavior. */
	379
	380	if(!(buf = my_strptime_guts(buf, format, tm)))
	381	return NULL;
	382	/* TODO various things we could/should do:
	383	* - infer day/month from %j+year
	384	* - infer day/month from %U/%W+%w/%a+year
	385	* - infer hour from %p+%I
	386	* - fill wday/yday from other fields
	387	*/
	388	return (char *)buf;
	389	}
	390
	391	/*
	392	Local Variables:
	393	c-basic-offset:2
	394	comment-column:40
	395	fill-column:79
	396	indent-tabs-mode:nil
	397	End:
	398	*/