chiark - git - mdw - mLib/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* `printf'-style formatting for dynamic strings
	4	*
	5	* (c) 1999 Straylight/Edgeware
	6	*/
	7
	8	/----- Licensing notice --------------------------------------------------
	9	*
	10	* This file is part of the mLib utilities library.
	11	*
	12	* mLib is free software; you can redistribute it and/or modify
	13	* it under the terms of the GNU Library General Public License as
	14	* published by the Free Software Foundation; either version 2 of the
	15	* License, or (at your option) any later version.
	16	*
	17	* mLib is distributed in the hope that it will be useful,
	18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	* GNU Library General Public License for more details.
	21	*
	22	* You should have received a copy of the GNU Library General Public
	23	* License along with mLib; if not, write to the Free
	24	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	25	* MA 02111-1307, USA.
	26	*/
	27
	28	/----- Header files ------------------------------------------------------/
	29
	30	#include "config.h"
	31
	32	#include <assert.h>
	33	#include <ctype.h>
	34	#include <math.h>
	35	#include <stdarg.h>
	36	#include <stdio.h>
	37	#include <stdlib.h>
	38	#include <string.h>
	39
	40	#ifdef HAVE_FLOAT_H
	41	# include <float.h>
	42	#endif
	43
	44	#include "darray.h"
	45	#include "dstr.h"
	46
	47	/----- Tunable constants -------------------------------------------------/
	48
	49	/*
	50	* For each format specifier, at least @PUTFSTEP@ bytes are ensured before
	51	* writing the formatted result.
	52	*/
	53
	54	#define PUTFSTEP 64 /* Buffer size for @putf@ */
	55
	56	/----- Preliminary definitions -------------------------------------------/
	57
	58	#define OUTPUT_FMTTYPES(_) \
	59	_(i, unsigned int) \
	60	_(li, unsigned long) \
	61	_(s, char *) \
	62	_(p, void *) \
	63	_(f, double) \
	64	_(Lf, long double)
	65
	66	#define PERCENT_N_FMTTYPES(_) \
	67	_(hn, short *) \
	68	_(n, int *) \
	69	_(ln, long *)
	70
	71	#define FMTTYPES(_) \
	72	OUTPUT_FMTTYPES(_) \
	73	PERCENT_N_FMTTYPES(_)
	74
	75	enum {
	76	fmt_unset = 0,
	77	#define CODE(code, ty) fmt_##code,
	78	FMTTYPES(CODE)
	79	#undef CODE
	80	fmt__limit
	81	};
	82
	83	typedef struct {
	84	int fmt;
	85	union {
	86	#define MEMB(code, ty) ty code;
	87	FMTTYPES(MEMB)
	88	#undef MEMB
	89	} u;
	90	} fmtarg;
	91
	92	DA_DECL(fmtarg_v, fmtarg);
	93
	94	enum {
	95	len_std = 0,
	96	len_h,
	97	len_l,
	98	len_ll,
	99	len_L
	100	};
	101
	102	#define f_len 0x000fu
	103	#define f_wd 0x0010u
	104	#define f_wdarg 0x0020u
	105	#define f_prec 0x0040u
	106	#define f_precarg 0x0080u
	107	#define f_plus 0x0100u
	108	#define f_minus 0x0200u
	109	#define f_sharp 0x0400u
	110	#define f_zero 0x0800u
	111	#define f_posarg 0x1000u
	112
	113	typedef struct {
	114	const char *p;
	115	size_t n;
	116	unsigned f;
	117	int fmt, ch;
	118	int wd, prec;
	119	int arg;
	120	} fmtspec;
	121
	122	DA_DECL(fmtspec_v, fmtspec);
	123
	124	/----- Main code ---------------------------------------------------------/
	125
	126	/* --- @dstr_vputf@ --- *
	127	*
	128	* Arguments: @dstr *d@ = pointer to a dynamic string block
	129	* @const char *p@ = pointer to @printf@-style format string
	130	* @va_list *ap@ = argument handle
	131	*
	132	* Returns: The number of characters written to the string.
	133	*
	134	* Use: As for @dstr_putf@, but may be used as a back-end to user-
	135	* supplied functions with @printf@-style interfaces.
	136	*/
	137
	138	static void set_arg(fmtarg_v *av, size_t i, int fmt)
	139	{
	140	size_t j, n;
	141
	142	n = DA_LEN(av);
	143	if (i >= n) {
	144	DA_ENSURE(av, i + 1 - n);
	145	for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset;
	146	DA_UNSAFE_EXTEND(av, i + 1 - n);
	147	}
	148
	149	if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt;
	150	else assert(DA(av)[i].fmt == fmt);
	151	}
	152
	153	int dstr_vputf(dstr d, const char p, va_list *ap)
	154	{
	155	size_t n = d->len;
	156	size_t sz, mx;
	157	dstr dd = DSTR_INIT;
	158	fmtspec_v sv = DA_INIT;
	159	fmtarg_v av = DA_INIT;
	160	fmtarg fa, fal;
	161	fmtspec fs, fsl;
	162	unsigned f;
	163	int i, anext;
	164	int wd, prec;
	165
	166	/* --- Initial pass through the input, parsing format specifiers --- *
	167	*
	168	* We essentially compile the format string into a vector of @fmtspec@
	169	* objects, each of which represnts a chunk of literal text followed by a
	170	* (possibly imaginary, in the case of the final one) formatting directive.
	171	* Output then simply consists of interpreting these specifiers in order.
	172	*/
	173
	174	anext = 0;
	175
	176	while (*p) {
	177	f = 0;
	178	DA_ENSURE(&sv, 1);
	179	fs = &DA(&sv)[DA_LEN(&sv)];
	180	DA_UNSAFE_EXTEND(&sv, 1);
	181
	182	/* --- Find the end of this literal portion --- */
	183
	184	fs->p = p;
	185	while (p && p != '%') p++;
	186	fs->n = p - fs->p;
	187
	188	/* --- Some simple cases --- *
	189	*
	190	* We might have reached the end of the string, or maybe a `%%' escape.
	191	*/
	192
	193	if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; }
	194	p++;
	195	if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; }
	196
	197	/* --- Pick up initial flags --- */
	198
	199	flags:
	200	for (;;) {
	201	switch (*p) {
	202	case '+': f \|= f_plus; break;
	203	case '-': f \|= f_minus; break;
	204	case '#': f \|= f_sharp; break;
	205	case '0': f \|= f_zero; break;
	206	default: goto done_flags;
	207	}
	208	p++;
	209	}
	210
	211	/* --- Pick up the field width --- */
	212
	213	done_flags:
	214	i = 0;
	215	while (isdigit((unsigned char)p)) i = 10i + *p++ - '0';
	216
	217	/* --- Snag: this might have been an argument position indicator --- */
	218
	219	if (i && *p == '$' && (!f \|\| f == f_zero)) {
	220	f \|= f_posarg;
	221	fs->arg = i - 1;
	222	p++;
	223	goto flags;
	224	}
	225
	226	/* --- Set the field width --- *
	227	*
	228	* If @i@ is nonzero here then we have a numeric field width. Otherwise
	229	* it might be `*', maybe with an explicit argument number.
	230	*/
	231
	232	if (i) {
	233	f \|= f_wd;
	234	fs->wd = i;
	235	} else if (p == '') {
	236	p++;
	237	if (!isdigit((unsigned char)*p))
	238	i = anext++;
	239	else {
	240	i = *p++ - '0';
	241	while (isdigit((unsigned char)p)) i = 10i + *p++ - '0';
	242	assert(*p == '$'); p++;
	243	assert(i > 0); i--;
	244	}
	245	f \|= f_wd \| f_wdarg;
	246	set_arg(&av, i, fmt_i); fs->wd = i;
	247	}
	248
	249	/* --- Maybe we have a precision spec --- */
	250
	251	if (*p == '.') {
	252	p++;
	253	f \|= f_prec;
	254	if (isdigit((unsigned char)*p)) {
	255	i = *p++ - '0';
	256	while (isdigit((unsigned char)p)) i = 10i + *p++ - '0';
	257	fs->prec = i;
	258	} else if (p != '')
	259	fs->prec = 0;
	260	else {
	261	p++;
	262	if (!isdigit((unsigned char)*p))
	263	i = anext++;
	264	else {
	265	i = *p++ - '0';
	266	while (isdigit((unsigned char)p)) i = 10i + *p++ - '0';
	267	assert(*p == '$'); p++;
	268	assert(i > 0); i--;
	269	}
	270	f \|= f_precarg;
	271	set_arg(&av, i, fmt_i); fs->prec = i;
	272	}
	273	}
	274
	275	/* --- Maybe some length flags --- */
	276
	277	switch (*p) {
	278	case 'h': f \|= len_h; p++; break;
	279	case 'l': f \|= len_l; p++; break;
	280	case 'L': f \|= len_L; p++; break;
	281	}
	282
	283	/* --- The flags are now ready --- */
	284
	285	fs->f = f;
	286
	287	/* --- At the end, an actual directive --- */
	288
	289	fs->ch = *p;
	290	switch (*p++) {
	291	case '%':
	292	fs->fmt = fmt_unset;
	293	break;
	294	case 'd': case 'i': case 'x': case 'X': case 'o': case 'u':
	295	switch (f & f_len) {
	296	case len_l: fs->fmt = fmt_li; break;
	297	default: fs->fmt = fmt_i;
	298	}
	299	break;
	300	case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
	301	fs->fmt = (f & f_len) == len_L ? fmt_Lf : fmt_f;
	302	break;
	303	case 'c':
	304	fs->fmt = fmt_i;
	305	break;
	306	case 's':
	307	fs->fmt = fmt_s;
	308	break;
	309	case 'p':
	310	fs->fmt = fmt_p;
	311	break;
	312	case 'n':
	313	switch (f & f_len) {
	314	case len_h: fs->fmt = fmt_hn; break;
	315	case len_l: fs->fmt = fmt_ln; break;
	316	default: fs->fmt = fmt_n;
	317	}
	318	break;
	319	default:
	320	fprintf(stderr,
	321	"FATAL dstr_vputf: unknown format specifier `%c'\n", p[-1]);
	322	abort();
	323	}
	324
	325	/* --- Finally sort out the argument --- *
	326	*
	327	* If we don't have explicit argument positions then this comes after the
	328	* width and precision; and we don't know the type code until we've
	329	* parsed the specifier, so this seems the right place to handle it.
	330	*/
	331
	332	if (!(f & f_posarg)) fs->arg = anext++;
	333	set_arg(&av, fs->arg, fs->fmt);
	334	}
	335
	336	/* --- Quick pass over the argument vector to collect the arguments --- */
	337
	338	for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) {
	339	switch (fa->fmt) {
	340	#define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break;
	341	FMTTYPES(CASE)
	342	#undef CASE
	343	default: abort();
	344	}
	345	}
	346
	347	/* --- Final pass through the format string to produce output --- */
	348
	349	fa = DA(&av);
	350	for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) {
	351	f = fs->f;
	352
	353	/* --- Output the literal portion --- */
	354
	355	if (fs->n) DPUTM(d, fs->p, fs->n);
	356
	357	/* --- And now the variable portion --- */
	358
	359	if (fs->fmt == fmt_unset) {
	360	switch (fs->ch) {
	361	case 0: break;
	362	case '%': DPUTC(d, '%'); break;
	363	default: abort();
	364	}
	365	continue;
	366	}
	367
	368	DRESET(&dd);
	369	DPUTC(&dd, '%');
	370
	371	/* --- Resolve the width and precision --- */
	372
	373	if (!(f & f_wd))
	374	wd = 0;
	375	else {
	376	wd = (fs->f & f_wdarg) ? (int )&fa[fs->wd].u.i : fs->wd;
	377	if (wd < 0) { wd = -wd; f \|= f_minus; }
	378	}
	379
	380	if (!(f & f_prec))
	381	prec = 0;
	382	else {
	383	prec = (fs->f & f_precarg) ? (int )&fa[fs->prec].u.i : fs->prec;
	384	if (prec < 0) { prec = 0; f &= ~f_prec; }
	385	}
	386
	387	/* --- Write out the flags, width and precision --- */
	388
	389	if (f & f_plus) DPUTC(&dd, '+');
	390	if (f & f_minus) DPUTC(&dd, '-');
	391	if (f & f_sharp) DPUTC(&dd, '#');
	392	if (f & f_zero) DPUTC(&dd, '0');
	393
	394	if (f & f_wd) {
	395	DENSURE(&dd, PUTFSTEP);
	396	dd.len += sprintf(dd.buf + dd.len, "%d", wd);
	397	}
	398
	399	if (f & f_prec) {
	400	DENSURE(&dd, PUTFSTEP + 1);
	401	dd.len += sprintf(dd.buf + dd.len, ".%d", prec);
	402	}
	403
	404	/* --- Write out the length gadget --- */
	405
	406	switch (f & f_len) {
	407	case len_h: DPUTC(&dd, 'h'); break;
	408	case len_l: DPUTC(&dd, 'l'); break;
	409	case len_L: DPUTC(&dd, 'L'); break;
	410	case len_std: break;
	411	default: abort();
	412	}
	413
	414	/* --- And finally the actually important bit --- */
	415
	416	DPUTC(&dd, fs->ch);
	417	DPUTZ(&dd);
	418
	419	/* --- Make sure we have enough space for the output --- */
	420
	421	sz = PUTFSTEP;
	422	if (sz < wd) sz = wd;
	423	if (sz < prec + 16) sz = prec + 16;
	424	switch (fs->ch) {
	425	case 'a': case 'A':
	426	case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
	427	#ifdef HAVE_FLOAT_H
	428	if (fs->ch == 'f') {
	429	mx = ((fs->f & f_len) == len_L ?
	430	LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16;
	431	if (sz < mx) sz = mx;
	432	}
	433	break;
	434	#else
	435	DPUTS(d, "<no float support>");
	436	continue;
	437	#endif
	438	case 's':
	439	if (!(f & f_prec)) {
	440	n = strlen(fa[fs->arg].u.s);
	441	if (sz < n) sz = n;
	442	}
	443	break;
	444	case 'n':
	445	switch (fs->fmt) {
	446	#define CASE(code, ty) \
	447	case fmt_##code: *fa[fs->arg].u.code = d->len - n; break;
	448	PERCENT_N_FMTTYPES(CASE)
	449	#undef CASE
	450	default: abort();
	451	}
	452	continue;
	453	}
	454
	455	/* --- Finally do the output stage --- */
	456
	457	DENSURE(d, sz + 1);
	458	switch (fs->fmt) {
	459	#ifdef HAVE_SNPRINTF
	460	# define CASE(code, ty) case fmt_##code: \
	461	i = snprintf(d->buf + d->len, sz + 1, dd.buf, fa[fs->arg].u.code); \
	462	break;
	463	#else
	464	# define CASE(code, ty) case fmt_##code: \
	465	i = sprintf(d->buf + d->len, dd.buf, fa[fs->arg].u.code); \
	466	break;
	467	#endif
	468	OUTPUT_FMTTYPES(CASE)
	469	#undef CASE
	470	default: abort();
	471	}
	472	assert(0 <= i && i <= sz); d->len += i;
	473	}
	474
	475	/* --- We're done --- */
	476
	477	DPUTZ(d);
	478	DDESTROY(&dd);
	479	return (d->len - n);
	480	}
	481
	482	/* --- @dstr_putf@ --- *
	483	*
	484	* Arguments: @dstr *d@ = pointer to a dynamic string block
	485	* @const char *p@ = pointer to @printf@-style format string
	486	* @...@ = argument handle
	487	*
	488	* Returns: The number of characters written to the string.
	489	*
	490	* Use: Writes a piece of text to a dynamic string, doing @printf@-
	491	* style substitutions as it goes. Intended to be robust if
	492	* faced with malicious arguments, but not if the format string
	493	* itself is malicious.
	494	*/
	495
	496	int dstr_putf(dstr d, const char p, ...)
	497	{
	498	int n;
	499	va_list ap;
	500	va_start(ap, p);
	501	n = dstr_vputf(d, p, &ap);
	502	va_end(ap);
	503	return (n);
	504	}
	505
	506	/----- That's all, folks -------------------------------------------------/