chiark - git - mdw - runlisp/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* Common definitions for `runlisp'
	4	*
	5	* (c) 2020 Mark Wooding
	6	*/
	7
	8	/----- Licensing notice --------------------------------------------------
	9	*
	10	* This file is part of Runlisp, a tool for invoking Common Lisp scripts.
	11	*
	12	* Runlisp is free software: you can redistribute it and/or modify it
	13	* under the terms of the GNU General Public License as published by the
	14	* Free Software Foundation; either version 3 of the License, or (at your
	15	* option) any later version.
	16	*
	17	* Runlisp is distributed in the hope that it will be useful, but WITHOUT
	18	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	19	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	20	* for more details.
	21	*
	22	* You should have received a copy of the GNU General Public License
	23	* along with Runlisp. If not, see <https://www.gnu.org/licenses/>.
	24	*/
	25
	26	/----- Header files ------------------------------------------------------/
	27
	28	#include "config.h"
	29
	30	#include <assert.h>
	31
	32	#include <ctype.h>
	33	#include <errno.h>
	34	#include <stdarg.h>
	35	#include <stdio.h>
	36	#include <stdlib.h>
	37	#include <string.h>
	38
	39	#include <unistd.h>
	40
	41	#include "lib.h"
	42
	43	/----- Diagnostic utilities ----------------------------------------------/
	44
	45	const char *progname = "???";
	46	/* Our program name, for use in error messages. */
	47
	48	/* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
	49	void set_progname(const char *prog)
	50	{
	51	const char *p;
	52
	53	p = strrchr(prog, '/');
	54	progname = p ? p + 1 : prog;
	55	}
	56
	57	/* Report an error or warning in Unix style, given a captured argument
	58	* cursor.
	59	*/
	60	void vmoan(const char *msg, va_list ap)
	61	{
	62	fprintf(stderr, "%s: ", progname);
	63	vfprintf(stderr, msg, ap);
	64	fputc('\n', stderr);
	65	}
	66
	67	/* Issue a warning message. */
	68	void moan(const char *msg, ...)
	69	{ va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
	70
	71	/* Issue a fatal error message and exit unsuccessfully. */
	72	void lose(const char *msg, ...)
	73	{ va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
	74
	75	/----- Memory allocation -------------------------------------------------/
	76
	77	/* Allocate and return a pointer to N bytes, or report a fatal error.
	78	*
	79	* Release the pointer using `free' as usual. If N is zero, returns null
	80	* (but you are not expected to check for this).
	81	*/
	82	void *xmalloc(size_t n)
	83	{
	84	void *p;
	85
	86	if (!n) return (0);
	87	p = malloc(n); if (!p) lose("failed to allocate memory");
	88	return (p);
	89	}
	90
	91	/* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
	92	*
	93	* The block might (and probably will) move, so it returns the new address.
	94	* If N is zero, then the block is freed (if necessary) and a null pointer
	95	* returned; otherwise, if P is null then a fresh block is allocated. If
	96	* allocation fails, then a fatal error is reported.
	97	*/
	98	void xrealloc(void p, size_t n)
	99	{
	100	if (!n) { free(p); return (0); }
	101	else if (!p) return (xmalloc(n));
	102	p = realloc(p, n); if (!p) lose("failed to allocate memory");
	103	return (p);
	104	}
	105
	106	/* Allocate and return a copy of the N-byte string starting at P.
	107	*
	108	* The new string is null-terminated, though P need not be. If allocation
	109	* fails, then a fatal error is reported.
	110	*/
	111	char xstrndup(const char p, size_t n)
	112	{
	113	char *q = xmalloc(n + 1);
	114
	115	memcpy(q, p, n); q[n] = 0;
	116	return (q);
	117	}
	118
	119	/* Allocate and return a copy of the null-terminated string starting at P.
	120	*
	121	* If allocation fails, then a fatal error is reported.
	122	*/
	123	char xstrdup(const char p) { return (xstrndup(p, strlen(p))); }
	124
	125	/----- Dynamic strings ---------------------------------------------------/
	126
	127	/* Initialize the string D.
	128	*
	129	* Usually you'd use the static initializer `DSTR_INIT'.
	130	*/
	131	void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
	132
	133	/* Reset string D so it's empty again. */
	134	void dstr_reset(struct dstr *d) { d->len = 0; }
	135
	136	/* Ensure that D has at least N unused bytes available. */
	137	void dstr_ensure(struct dstr *d, size_t n)
	138	{
	139	size_t need = d->len + n, newsz;
	140
	141	if (need <= d->sz) return;
	142	newsz = d->sz ? 2*d->sz : 16;
	143	while (newsz < need) newsz *= 2;
	144	d->p = xrealloc(d->p, newsz); d->sz = newsz;
	145	}
	146
	147	/* Release the memory held by D.
	148	*
	149	* It must be reinitialized (e.g., by `dstr_init') before it can be used
	150	* again.
	151	*/
	152	void dstr_release(struct dstr *d) { free(d->p); }
	153
	154	/* Append the N-byte string at P to D.
	155	*
	156	* P need not be null-terminated. D will not be null-terminated
	157	* afterwards.
	158	*/
	159	void dstr_putm(struct dstr d, const void p, size_t n)
	160	{ dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
	161
	162	/* Append the null-terminated string P to D.
	163	*
	164	* D /is/ guaranteed to be null-terminated after this.
	165	*/
	166	void dstr_puts(struct dstr d, const char p)
	167	{
	168	size_t n = strlen(p);
	169
	170	dstr_ensure(d, n + 1);
	171	memcpy(d->p + d->len, p, n + 1);
	172	d->len += n;
	173	}
	174
	175	/* Append the single character CH to D.
	176	*
	177	* D will not be null-terminated afterwards.
	178	*/
	179	void dstr_putc(struct dstr *d, int ch)
	180	{ dstr_ensure(d, 1); d->p[d->len++] = ch; }
	181
	182	/* Append N copies of the character CH to D.
	183	*
	184	* D will not be null-terminated afterwards.
	185	*/
	186	void dstr_putcn(struct dstr *d, int ch, size_t n)
	187	{ dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
	188
	189	/* Null-terminate the string D.
	190	*
	191	* This doesn't change the length of D. If further stuff is appended then
	192	* the null terminator will be overwritten.
	193	*/
	194	void dstr_putz(struct dstr *d)
	195	{ dstr_ensure(d, 1); d->p[d->len] = 0; }
	196
	197	/* Append stuff to D, determined by printf(3) format string P and argument
	198	* tail AP.
	199	*
	200	* D will not be null-terminated afterwards.
	201	*/
	202	void dstr_vputf(struct dstr d, const char p, va_list ap)
	203	{
	204	va_list ap2;
	205	size_t r;
	206	int n;
	207
	208	r = d->sz - d->len;
	209	va_copy(ap2, ap);
	210	n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
	211	va_end(ap2);
	212	if (n >= r) {
	213	dstr_ensure(d, n + 1); r = d->sz - d->len;
	214	n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
	215	}
	216	d->len += n;
	217	}
	218
	219	/* Append stuff to D, determined by printf(3) format string P and arguments.
	220	*
	221	* D will not be null-terminated afterwards.
	222	*/
	223	PRINTF_LIKE(2, 3) void dstr_putf(struct dstr d, const char p, ...)
	224	{ va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
	225
	226	/* Append the next input line from FP to D.
	227	*
	228	* Return 0 on success, or -1 if reading immediately fails or encounters
	229	* end-of-file (call ferror(3) to distinguish). Any trailing newline is
	230	* discarded: it is not possible to determine whether the last line was ended
	231	* with a newline. D is guaranteed to be null-terminated afterwards.
	232	*/
	233	int dstr_readline(struct dstr d, FILE fp)
	234	{
	235	size_t n;
	236	int any = 0;
	237
	238	for (;;) {
	239	dstr_ensure(d, 2);
	240	if (!fgets(d->p + d->len, d->sz - d->len, fp)) break;
	241	n = strlen(d->p + d->len); assert(n > 0); any = 1;
	242	d->len += n;
	243	if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; }
	244	}
	245
	246	if (!any) return (-1);
	247	else return (0);
	248	}
	249
	250	/----- Dynamic vectors of strings ----------------------------------------/
	251
	252	/* Initialize the vector AV.
	253	*
	254	* Usually you'd use the static initializer `ARGV_INIT'.
	255	*/
	256	void argv_init(struct argv *av)
	257	{ av->v = 0; av->o = av->n = av->sz = 0; }
	258
	259	/* Reset the vector AV so that it's empty again. */
	260	void argv_reset(struct argv *av) { av->n = 0; }
	261
	262	/* Ensure that AV has at least N unused slots at the end. */
	263	void argv_ensure(struct argv *av, size_t n)
	264	{
	265	size_t need = av->n + av->o + n, newsz;
	266
	267	if (need <= av->sz) return;
	268	newsz = av->sz ? 2*av->sz : 8;
	269	while (newsz < need) newsz *= 2;
	270	av->v = xrealloc(av->v - av->o, newszsizeof(char )); av->v += av->o;
	271	av->sz = newsz;
	272	}
	273
	274	/* Ensure that AV has at least N unused slots at the /start/. */
	275	void argv_ensure_offset(struct argv *av, size_t n)
	276	{
	277	size_t newoff;
	278
	279	/* Stupid version. We won't, in practice, be prepending lots of stuff, so
	280	* avoid the extra bookkeeping involved in trying to make a double-ended
	281	* extendable array asymptotically efficient.
	282	*/
	283	if (av->o >= n) return;
	284	newoff = 16;
	285	while (newoff < n) newoff *= 2;
	286	argv_ensure(av, newoff - av->o);
	287	memmove(av->v + newoff - av->o, av->v, av->nsizeof(char ));
	288	av->v += newoff - av->o; av->o = newoff;
	289	}
	290
	291	/* Release the memory held by AV.
	292	*
	293	* It must be reinitialized (e.g., by `argv_init') before it can be used
	294	* again.
	295	*/
	296	void argv_release(struct argv *av) { free(av->v - av->o); }
	297
	298	/* Append the pointer P to AV. */
	299	void argv_append(struct argv av, char p)
	300	{ argv_ensure(av, 1); av->v[av->n++] = p; }
	301
	302	/* Append a null pointer to AV, without extending the vactor length.
	303	*
	304	* The null pointer will be overwritten when the next string is appended.
	305	*/
	306	void argv_appendz(struct argv *av)
	307	{ argv_ensure(av, 1); av->v[av->n] = 0; }
	308
	309	/* Append a N-element vector V of pointers to AV. */
	310	void argv_appendn(struct argv av, char const *v, size_t n)
	311	{
	312	argv_ensure(av, n);
	313	memcpy(av->v + av->n, v, nsizeof(const char ));
	314	av->n += n;
	315	}
	316
	317	/* Append the variable-length vector BV to AV. */
	318	void argv_appendav(struct argv av, const struct argv bv)
	319	{ argv_appendn(av, bv->v, bv->n); }
	320
	321	/* Append the pointers from a variable-length argument list AP to AV.
	322	*
	323	* The list is terminated by a null pointer.
	324	*/
	325	void argv_appendv(struct argv *av, va_list ap)
	326	{
	327	char *p;
	328	for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
	329	}
	330
	331	/* Append the argument pointers, terminated by a null pointer, to AV. */
	332	void argv_appendl(struct argv *av, ...)
	333	{ va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
	334
	335	/* Prepend the pointer P to AV. */
	336	void argv_prepend(struct argv av, char p)
	337	{ argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
	338
	339	/* Prepend a N-element vector V of pointers to AV. */
	340	void argv_prependn(struct argv av, char const *v, size_t n)
	341	{
	342	argv_ensure_offset(av, n);
	343	av->o -= n; av->v -= n; av->n += n;
	344	memcpy(av->v, v, nsizeof(const char ));
	345	}
	346
	347	/* Prepend the variable-length vector BV to AV. */
	348	void argv_prependav(struct argv av, const struct argv bv)
	349	{ argv_prependn(av, bv->v, bv->n); }
	350
	351	/* Prepend the pointers from a variable-length argument list AP to AV.
	352	*
	353	* The list is terminated by a null pointer.
	354	*/
	355	void argv_prependv(struct argv *av, va_list ap)
	356	{
	357	char p, *v;
	358	size_t n = 0;
	359
	360	for (;;) {
	361	p = va_arg(ap, char *); if (!p) break;
	362	argv_prepend(av, p); n++;
	363	}
	364	v = av->v;
	365	while (n >= 2) {
	366	p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
	367	v++; n -= 2;
	368	}
	369	}
	370
	371	/* Prepend the argument pointers, terminated by a null pointer, to AV. */
	372	void argv_prependl(struct argv *av, ...)
	373	{ va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
	374
	375	/----- Treaps ------------------------------------------------------------/
	376
	377	/* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
	378	* string B in a lexicographic ordering.
	379	*
	380	* All comparisons of keys is handled by this function.
	381	*/
	382	static int str_lt(const char a, size_t an, const char b, size_t bn)
	383	{
	384	/* This is a little subtle. We need only compare the first N bytes of the
	385	* strings, where N is the length of the shorter string. If this
	386	* distinguishes the two strings, then we're clearly done. Otherwise, if
	387	* the prefixes are equal then the shorter string is the smaller one. If
	388	* the two strings are the same length, then they're equal.
	389	*
	390	* Hence, if A is the strictly shorter string, then A precedes B if A
	391	* precedes or matches the prefix of B; otherwise A only precedes B if A
	392	* strictly precedes the prefix of B.
	393	*/
	394	if (an < bn) return (MEMCMP(a, <=, b, an));
	395	else return (MEMCMP(a, <, b, bn));
	396	}
	397
	398	/* Initialize the treap T.
	399	*
	400	* Usually you'd use the static initializer `TREAP_INIT'.
	401	*/
	402	void treap_init(struct treap *t) { t->root = 0; }
	403
	404	/* Look up the KN-byte key K in the treap T.
	405	*
	406	* Return a pointer to the matching node if one was found, or null otherwise.
	407	*/
	408	void treap_lookup(const struct treap t, const char *k, size_t kn)
	409	{
	410	struct treap_node n = t->root, candidate = 0;
	411
	412	/* This is a simple prototype for some of the search loops we'll encounter
	413	* later. Notice that we use a strict one-sided comparison, rather than
	414	* the more conventional two-sided comparison.
	415	*
	416	* The main loop will find the largest key not greater than K.
	417	*/
	418	while (n)
	419	/* Compare the node's key against our key. If the node is too large,
	420	* then we ignore it and move left. Otherwise remember this node for
	421	* later, and move right to see if we can find a better, larger node.
	422	*/
	423
	424	if (str_lt(k, kn, n->k, n->kn)) n = n->left;
	425	else { candidate = n; n = n->right; }
	426
	427	/* If the candidate node is less than our key then we failed. Otherwise,
	428	* by trichotomy, we have found the correct node.
	429	*/
	430	if (!candidate \|\| str_lt(candidate->k, candidate->kn, k, kn)) return (0);
	431	return (candidate);
	432	}
	433
	434	/* Look up the KN-byte K in the treap T, recording a path in P.
	435	*
	436	* This is similar to `treap_lookup', in that it returns the requested node
	437	* if it already exists, or null otherwise, but it also records in P
	438	* information to be used by `treap_insert' to insert a new node with the
	439	* given key if it's not there already.
	440	*/
	441	void treap_probe(struct treap t, const char *k, size_t kn,
	442	struct treap_path *p)
	443	{
	444	struct treap_node *nn = &t->root, candidate = 0;
	445	unsigned i = 0;
	446
	447	/* This walk is similar to `treap_lookup' above, except that we also record
	448	* the address of each node pointer we visit along the way.
	449	*/
	450	for (;;) {
	451	assert(i < TREAP_PATHMAX); p->path[i++] = nn;
	452	if (!*nn) break;
	453	if (str_lt(k, kn, (nn)->k, (nn)->kn)) nn = &(*nn)->left;
	454	else { candidate = nn; nn = &(nn)->right; }
	455	}
	456	p->nsteps = i;
	457
	458	/* Check to see whether we found the right node. */
	459	if (!candidate \|\| str_lt(candidate->k, candidate->kn, k, kn)) return (0);
	460	return (candidate);
	461	}
	462
	463	/* Insert a new node N into T, associating it with the KN-byte key K.
	464	*
	465	* Use the path data P, from `treap_probe', to help with insertion.
	466	*/
	467	void treap_insert(struct treap t, const struct treap_path p,
	468	struct treap_node n, const char k, size_t kn)
	469	{
	470	size_t i = p->nsteps;
	471	struct treap_node nn, uu, *u;
	472	unsigned wt;
	473
	474	/* Fill in the node structure. */
	475	n->k = xstrndup(k, kn); n->kn = kn;
	476	n->wt = wt = rand(); n->left = n->right = 0;
	477
	478	/* Prepare for the insertion.
	479	*
	480	* The path actually points to each of the links traversed when searching
	481	* for the node, starting with the `root' pointer, then the `left' or
	482	* `right' pointer of the root node, and so on; `nsteps' will always be
	483	* nonzero, since the path will always pass through the root, and the final
	484	* step, `path->path[path->nsteps - 1]' will always be the address of a
	485	* null pointer onto which the freshly inserted node could be hooked in
	486	* order to satisfy the binary-search-tree ordering. (Of course, this will
	487	* likely /not/ satisfy the heap condition, so more work needs to be done.)
	488	*
	489	* Throughout, NN is our current candidate for where to attach the node N.
	490	* As the loop progresses, NN will ascend to links further up the tree, and
	491	* N will be adjusted to accumulate pieces of the existing tree structure.
	492	* We'll stop when we find that the parent node's weight is larger than our
	493	* new node's weight, at which point we can just set *NN = N; or if we run
	494	* out of steps in the path, in which case *NN is the root pointer.
	495	*/
	496	assert(i); nn = p->path[--i];
	497	while (i--) {
	498
	499	/* Collect the next step in the path, and get the pointer to the node. */
	500	uu = p->path[i]; u = *uu;
	501
	502	/* If this node's weight is higher, then we've found the right level and
	503	* we can stop.
	504	*/
	505	if (wt <= u->wt) break;
	506
	507	/* The node U is lighter than our new node N, so we must rotate in order
	508	* to fix things. If we were currently planning to hook N as the left
	509	* subtree of U, then we rotate like this:
	510	*
	511	* \| \|
	512	* U (N)
	513	* / \ / \
	514	* (N) Z ---> X U
	515	* / \ / \
	516	* X Y Y Z
	517	*
	518	* On the other hand, if we ere planning to hook N as the right subtree
	519	* of U, then we do the opposite rotation:
	520	*
	521	* \| \|
	522	* U (N)
	523	* / \ / \
	524	* X (N) ---> U Z
	525	* / \ / \
	526	* Y Z X Y
	527	*
	528	* These transformations clearly preserve the ordering of nodes in the
	529	* binary search tree, and satisfy the heap condition in the subtree
	530	* headed by N.
	531	*/
	532	if (nn == &u->left) { u->left = n->right; n->right = u; }
	533	else { u->right = n->left; n->left = u; }
	534
	535	/* And this arrangement must be attached to UU, or some higher attachment
	536	* point. The subtree satisfies the heap condition, and can be attached
	537	* safely at the selected place.
	538	*/
	539	nn = uu;
	540	}
	541
	542	/* We've found the right spot. Hook the accumulated subtree into place. */
	543	*nn = n;
	544	}
	545
	546	/* Remove the node with the KN-byte K from T.
	547	*
	548	* Return the address of the node we removed, or null if it couldn't be
	549	* found.
	550	*/
	551	void treap_remove(struct treap t, const char *k, size_t kn)
	552	{
	553	struct treap_node nn = &t->root, candidate = 0, n, l, *r;
	554
	555	/* Search for the matching node, but keep track of the address of the link
	556	* which points to our target node.
	557	*/
	558	while (*nn)
	559	if (str_lt(k, kn, (nn)->k, (nn)->kn)) nn = &(*nn)->left;
	560	else { candidate = nn; nn = &(*nn)->right; }
	561
	562	/* If this isn't the right node then give up. */
	563	if (!candidate \|\| str_lt((candidate)->k, (candidate)->kn, k, kn))
	564	return (0);
	565
	566	/* Now we need to disentangle the node from the tree. This is essentially
	567	* the reverse of insertion: we pretend that this node is suddenly very
	568	* light, and mutate the tree so as to restore the heap condition until
	569	* eventually our node is a leaf and can be cut off without trouble.
	570	*
	571	* Throughout, the link *NN notionally points to N, but we don't actually
	572	* update it until we're certain what value it should finally take.
	573	*/
	574	nn = candidate; n = *nn; l = n->left; r = n->right;
	575	for (;;)
	576
	577	/* If its left subtree is empty then we can replace our node by its right
	578	* subtree and be done. Similarly, if the right subtree is empty then we
	579	* replace the node by its left subtree.
	580	*
	581	* \| \| \| \|
	582	* (N) ---> R ; (N) ---> L
	583	* / \ / \
	584	* * R L *
	585	*/
	586	if (!l) { *nn = r; break; }
	587	else if (!r) { *nn = l; break; }
	588
	589	/* Otherwise we need to rotate the pointers so that the heavier of the
	590	* two children takes the place of our node; thus we have either
	591	*
	592	* \| \|
	593	* (N) L
	594	* / \ / \
	595	* L R ---> X (N)
	596	* / \ / \
	597	* X Y Y R
	598	*
	599	* or
	600	*
	601	* \| \|
	602	* (N) R
	603	* / \ / \
	604	* L R ---> (N) Y
	605	* / \ / \
	606	* X Y L X
	607	*
	608	* Again, these transformations clearly preserve the ordering of nodes in
	609	* the binary search tree, and the heap condition.
	610	*/
	611	else if (l->wt > r->wt)
	612	{ *nn = l; nn = &l->right; l = n->left = l->right; }
	613	else
	614	{ *nn = r; nn = &r->left; r = n->right = r->left; }
	615
	616	/* Release the key buffer, and return the node that we've now detached. */
	617	free(n->k); return (n);
	618	}
	619
	620	/* Initialize an iterator I over T's nodes. */
	621	void treap_start_iter(struct treap t, struct treap_iter i)
	622	{
	623	struct treap_node *n = t->root;
	624	unsigned sp = 0;
	625
	626	/* The `stack' in the iterator structure is an empty ascending stack of
	627	* nodes which have been encountered, and their left subtrees investigated,
	628	* but not yet visited by the iteration.
	629	*
	630	* Iteration begins by stacking the root node, its left child, and so on,
	631	* At the end of this, the topmost entry on the stack is the least node of
	632	* the tree, followed by its parent, grandparent, and so on up to the root.
	633	*/
	634	while (n) {
	635	assert(sp < TREAP_PATHMAX);
	636	i->stack[sp++] = n; n = n->left;
	637	}
	638	i->sp = sp;
	639	}
	640
	641	/* Return the next node from I, in ascending order by key.
	642	*
	643	* If there are no more nodes, then return null.
	644	*/
	645	void treap_next(struct treap_iter i)
	646	{
	647	struct treap_node n, o;
	648	unsigned sp = i->sp;
	649
	650	/* We say that a node is /visited/ once it's been returned by this
	651	* iterator. To traverse a tree in order, then, we traverse its left
	652	* subtree, visit the tree root, and traverse its right subtree -- which is
	653	* a fine recursive definition, but we need a nonrecursive implementation.
	654	*
	655	* As is usual in this kind of essential structural recursion, we maintain
	656	* a stack. The invariant that we'll maintain is as follows.
	657	*
	658	* 1. If the stack is empty, then all nodes have been visited.
	659	*
	660	* 2, If the stack is nonempty then the topmost entry on the stack is the
	661	* least node which has not yet been visited -- and therefore is the
	662	* next node to visit.
	663	*
	664	* 3. The earlier entries in the stack are, in (top to bottom) order,
	665	* those of the topmost node's parent, grandparent, etc., up to the
	666	* root, which have not yet been visited. More specifically, a node
	667	* appears in the stack if and only if some node in its left subtree
	668	* is nearer the top of the stack.
	669	*
	670	* When we initialized the iterator state (in `treap_start_iter' above), we
	671	* traced a path to the leftmost leaf, stacking the root, its left-hand
	672	* child, and so on. The leftmost leaf is clearly the first node to be
	673	* visited, and its entire ancestry is on the stack since none of these
	674	* nodes has yet been visited. (If the tree is empty, then we have done
	675	* nothing, the stack is empty, and there are no nodes to visit.) This
	676	* establishes the base case for the induction.
	677	*/
	678
	679	/* So, if the stack is empty now, then (1) all of the nodes have been
	680	* visited and there's nothing left to do. Return null.
	681	*/
	682	if (!sp) return (0);
	683
	684	/* It's clear that, if we pop the topmost element of the stack, visit it,
	685	* and arrange to reestablish the invariant, then we'll visit the nodes in
	686	* the correct order, pretty much by definition.
	687	*
	688	* So, pop a node off the stack. This is the node we shall return. But
	689	* before we can do that, we must reestablish the above invariant.
	690	* Firstly, the current node is removed from the stack, because we're about
	691	* to visit it, and visited nodes don't belong on the stack. Then there
	692	* are two cases to consider.
	693	*
	694	* * If the current node's right subtree is not empty, then the next node
	695	* to be visited is the leftmost node in that subtree. All of the
	696	* nodes on the stack are ancestors of the current node, and the right
	697	* subtree consists of its descendants, so none of them are already on
	698	* the stack; and they're all greater than the current node, and
	699	* therefore haven't been visited. Therefore, we must push the current
	700	* node's right child, its /left/ child, and so on, proceeding
	701	* leftwards until we fall off the bottom of the tree.
	702	*
	703	* * Otherwise, we've finished traversing some subtree. Either we are
	704	* now done, or (3) we have just finished traversing the left subtree
	705	* of the next topmost item on the stack. This must therefore be the
	706	* next node to visit. The rest of the stack is already correct.
	707	*/
	708	n = i->stack[--sp];
	709	o = n->right;
	710	while (o) {
	711	assert(sp < TREAP_PATHMAX);
	712	i->stack[sp++] = o; o = o->left;
	713	}
	714	i->sp = sp;
	715	return (n);
	716	}
	717
	718	/* Recursively check the subtree headed by N.
	719	*
	720	* No node should have weight greater than MAXWT, to satisfy the heap
	721	* condition; if LO is not null, then all node keys should be strictly
	722	* greater than LO, and, similarly, if HI is not null, then all keys should
	723	* be strictly smaller than HI.
	724	*/
	725	static void check_subtree(struct treap_node *n, unsigned maxwt,
	726	const char klo, const char khi)
	727	{
	728	/* Check the heap condition. */
	729	assert(n->wt <= maxwt);
	730
	731	/* Check that the key is in bounds. (Use `strcmp' here to ensure that our
	732	* own `str_lt' is working correctly.)
	733	*/
	734	if (klo) assert(STRCMP(n->k, >, klo));
	735	if (khi) assert(STRCMP(n->k, <, khi));
	736
	737	/* Check the left subtree. Node weights must be bounded above by our own
	738	* weight. And everykey in the left subtree must be smaller than our
	739	* current key. We propagate the lower bound.
	740	*/
	741	if (n->left) check_subtree(n->left, n->wt, klo, n->k);
	742
	743	/* Finally, check the right subtree. This time, every key must be larger
	744	* than our key, and we propagate the upper bound.
	745	*/
	746	if (n->right) check_subtree(n->right, n->wt, n->k, khi);
	747	}
	748
	749	/* Check the treap structure rules for T. */
	750	void treap_check(struct treap *t)
	751	{ if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
	752
	753	/* Recursively dump the subtree headed by N, indenting the output lines by
	754	* IND spaces.
	755	*/
	756	static void dump_node(struct treap_node *n, int ind)
	757	{
	758	if (n->left) dump_node(n->left, ind + 1);
	759	printf(";;%s [%10u] `%s'\n", 2ind, "", n->wt, n->k);
	760	if (n->right) dump_node(n->right, ind + 1);
	761	}
	762
	763	/* Dump the treap T to standard output, for debugging purposes. */
	764	void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
	765
	766	/----- Configuration file parsing ----------------------------------------/
	767
	768	#ifndef DECL_ENVIRON
	769	extern char **environ;
	770	#endif
	771
	772	/* Advance P past a syntactically valid name, but no further than L.
	773	*
	774	* Return the new pointer. If no name is found, report an error, blaming
	775	* FILE and LINE; WHAT is an adjective for the kind of name that was
	776	* expected.
	777	*/
	778	static const char scan_name(const char what,
	779	const char p, const char l,
	780	const char *file, unsigned line)
	781	{
	782	const char *q = p;
	783
	784	while (q < l &&
	785	(ISALNUM(q) \|\| q == '-' \|\| q == '_' \|\| q == '.' \|\| *q == '/' \|\|
	786	q == '' \|\| q == '+' \|\| q == '%' \|\| *q == '@'))
	787	q++;
	788	if (q == p) lose("%s:%u: expected %s name", file, line, what);
	789	return (q);
	790	}
	791
	792	/* Initialize the configuration state CONF.
	793	*
	794	* Usually you'd use the static initializer `CONFIG_INIT'.
	795	*/
	796	void config_init(struct config *conf)
	797	{ treap_init(&conf->sections); }
	798
	799	/* Find and return the section with null-terminated NAME in CONF.
	800	*
	801	* If no section is found, the behaviour depends on whether `CF_CREAT' is set
	802	* in F: if so, an empty section is created and returned; otherwise, a null
	803	* pointer is returned.
	804	*/
	805	struct config_section config_find_section(struct config conf, unsigned f,
	806	const char *name)
	807	{ return (config_find_section_n(conf, f, name, strlen(name))); }
	808
	809	/* Find and return the section with the SZ-byte NAME in CONF.
	810	*
	811	* This works like `config_find_section', but with an explicit length for the
	812	* NAME rather than null-termination.
	813	*/
	814	struct config_section config_find_section_n(struct config conf, unsigned f,
	815	const char *name, size_t sz)
	816	{
	817	struct config_section *sect;
	818	struct treap_path path;
	819
	820	if (!(f&CF_CREAT))
	821	sect = treap_lookup(&conf->sections, name, sz);
	822	else {
	823	sect = treap_probe(&conf->sections, name, sz, &path);
	824	if (!sect) {
	825	sect = xmalloc(sizeof(*sect));
	826	if (!conf->head) conf->tail = &conf->head;
	827	sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
	828	sect->parents = 0; sect->nparents = SIZE_MAX;
	829	treap_init(&sect->vars); treap_init(&sect->cache);
	830	treap_insert(&conf->sections, &path, &sect->_node, name, sz);
	831	config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
	832	}
	833	}
	834	return (sect);
	835	}
	836
	837	/* Set the fallback section for CONF to be SECT.
	838	*
	839	* That is, if a section has no explicit parents, then by default it will
	840	* have a single parent which is SECT. If SECT is null then there is no
	841	* fallback section, and sections which don't have explicitly specified
	842	* parents have no parents at all. (This is the default situation.)
	843	*/
	844	void config_set_fallback(struct config conf, struct config_section sect)
	845	{ conf->fallback = sect; }
	846
	847	/* Arrange that SECT has PARENT as its single parent section.
	848	*
	849	* If PARENT is null, then arrange that SECT has no parents at all. In
	850	* either case, any `@parents' setting will be ignored.
	851	*/
	852	void config_set_parent(struct config_section *sect,
	853	struct config_section *parent)
	854	{
	855	if (!parent)
	856	sect->nparents = 0;
	857	else {
	858	sect->parents = xmalloc(sizeof(*sect->parents));
	859	sect->parents[0] = parent; sect->nparents = 1;
	860	}
	861	}
	862
	863	/* Initialize I to iterate over the sections defined in CONF. */
	864	void config_start_section_iter(struct config *conf,
	865	struct config_section_iter *i)
	866	{ i->sect = conf->head; }
	867
	868	/* Return the next section from I, in order of creation.
	869	*
	870	* If there are no more sections, then return null.
	871	*/
	872	struct config_section config_next_section(struct config_section_iter i)
	873	{
	874	struct config_section *sect;
	875
	876	sect = i->sect;
	877	if (sect) i->sect = sect->next;
	878	return (sect);
	879	}
	880
	881	/* Initialize the `parents' links of SECT, if they aren't set up already.
	882	*
	883	* If SECT contains a `@parents' setting then parse it to determine the
	884	* parents; otherwise use CONF's fallbeck section, as established by
	885	* `config_set_fallback'.
	886	*/
	887	static void set_config_section_parents(struct config *conf,
	888	struct config_section *sect)
	889	{
	890	struct config_section *parent;
	891	struct config_var *var;
	892	const char *file; unsigned line;
	893	size_t i, n;
	894	char p, q, *l;
	895	struct argv av = ARGV_INIT;
	896
	897	/* If the section already has parents established then there's nothing to
	898	* do.
	899	*/
	900	if (sect->nparents != SIZE_MAX) return;
	901
	902	/* Look up `@parents', without recursion! */
	903	var = treap_lookup(&sect->vars, "@parents", 8);
	904	if (!var) {
	905	/* No explicit setting: use the fallback setting. */
	906
	907	if (!conf->fallback \|\| conf->fallback == sect)
	908	sect->nparents = 0;
	909	else {
	910	sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
	911	sect->parents[0] = conf->fallback;
	912	}
	913	} else {
	914	/* Found a `@parents' list: parse it and set the parents list. */
	915
	916	file = var->file; line = var->line; if (!file) file = "<internal>";
	917
	918	/* We do this in two phases. First, we parse out the section names, and
	919	* record start/limit pointer pairs in `av'.
	920	*/
	921	p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
	922	while (*p) {
	923	q = p;
	924	p = (/unconst/ char *)scan_name("parent section", p, l, file, line);
	925	argv_append(&av, q); argv_append(&av, p);
	926	while (p < l && ISSPACE(*p)) p++;
	927	if (p >= l) break;
	928	if (p == ',') do p++; while (ISSPACE(p));
	929	}
	930
	931	/* Now that we've finished parsing, we know how many parents we're going
	932	* to have, so we can allocate the `parents' vector and fill it in.
	933	*/
	934	sect->nparents = av.n/2;
	935	sect->parents = xmalloc(sect->nparentssizeof(sect->parents));
	936	for (i = 0; i < av.n; i += 2) {
	937	n = av.v[i + 1] - av.v[i];
	938	parent = config_find_section_n(conf, 0, av.v[i], n);
	939	if (!parent)
	940	lose("%s:%u: unknown parent section `%.*s'",
	941	file, line, (int)n, av.v[i]);
	942	sect->parents[i/2] = parent;
	943	}
	944	}
	945
	946	/* All done. */
	947	argv_release(&av);
	948	}
	949
	950	/* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
	951	*
	952	* If successful, return a pointer to the variable; otherwise return null.
	953	* Inheritance cycles and ambiguous inheritance are diagnosed as fatal
	954	* errors.
	955	*/
	956	struct config_var search_recursive(struct config conf,
	957	struct config_section *sect,
	958	const char *name, size_t sz)
	959	{
	960	struct config_cache_entry *cache;
	961	struct treap_path path;
	962	struct config_var var, v;
	963	size_t i, j = j;
	964
	965	/* If the variable is defined locally then we can just return it. */
	966	var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
	967
	968	/* If we have no parents then there's no way we can find it. */
	969	set_config_section_parents(conf, sect);
	970	if (!sect->parents) return (0);
	971
	972	/* Otherwise we must visit the section's parents. We can avoid paying for
	973	* this on every lookup by using a cache. If there's already an entry for
	974	* this variable then we can return the result immediately (note that we
	975	* cache both positive and negative outcomes). Otherwise we create a new
	976	* cache entry, do the full recursive search, and fill in the result when
	977	* we're done.
	978	*
	979	* The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
	980	* new cache entry when it's first created, and clear it when we fill in
	981	* the result: if we encounter an open cache entry again, we know that
	982	* we've found a cycle.
	983	*/
	984	cache = treap_probe(&sect->cache, name, sz, &path);
	985	if (!cache) {
	986	cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
	987	treap_insert(&sect->cache, &path, &cache->_node, name, sz);
	988	} else if (cache->f&CF_OPEN)
	989	lose("inheritance cycle through section `%s'",
	990	CONFIG_SECTION_NAME(sect));
	991	else
	992	return (cache->var);
	993
	994	/* Recursively search in each parent. We insist that all parents that find
	995	* a variable find the same binding; otherwise we declare ambiguous
	996	* inheritance.
	997	*/
	998	for (i = 0; i < sect->nparents; i++) {
	999	v = search_recursive(conf, sect->parents[i], name, sz);
	1000	if (!v);
	1001	else if (!var) { var = v; j = i; }
	1002	else if (var != v)
	1003	lose("section `%s' inherits variable `%s' ambiguously "
	1004	"via `%s' and `%s'",
	1005	CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
	1006	CONFIG_SECTION_NAME(sect->parents[j]),
	1007	CONFIG_SECTION_NAME(sect->parents[i]));
	1008	}
	1009
	1010	/* All done: fill the cache entry in, clear the open flag, and return the
	1011	* result.
	1012	*/
	1013	cache->var = var; cache->f &= ~CF_OPEN;
	1014	return (var);
	1015	}
	1016
	1017	/* Find and return the variable with null-terminated NAME in SECT.
	1018	*
	1019	* If `CF_INHERIT' is set in F, then the function searches the section's
	1020	* parents recursively; otherwise, it only checks to see whether the variable
	1021	* is set directly in SECT.
	1022	*
	1023	* If no variable is found, the behaviour depends on whether `CF_CREAT' is
	1024	* set in F: if so, an empty variable is created and returned; otherwise, a
	1025	* null pointer is returned.
	1026	*
	1027	* Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
	1028	*/
	1029	struct config_var config_find_var(struct config conf,
	1030	struct config_section *sect,
	1031	unsigned f, const char *name)
	1032	{ return (config_find_var_n(conf, sect, f, name, strlen(name))); }
	1033
	1034	/* Find and return the variable with the given SZ-byte NAME in SECT.
	1035	*
	1036	* This works like `config_find_var', but with an explicit length for the
	1037	* NAME rather than null-termination.
	1038	*/
	1039	struct config_var config_find_var_n(struct config conf,
	1040	struct config_section *sect,
	1041	unsigned f, const char *name, size_t sz)
	1042	{
	1043	struct config_var *var;
	1044	struct treap_path path;
	1045
	1046	if (f&CF_INHERIT)
	1047	var = search_recursive(conf, sect, name, sz);
	1048	else if (!(f&CF_CREAT))
	1049	var = treap_lookup(&sect->vars, name, sz);
	1050	else {
	1051	var = treap_probe(&sect->vars, name, sz, &path);
	1052	if (!var) {
	1053	var = xmalloc(sizeof(*var));
	1054	var->val = 0; var->file = 0; var->f = 0; var->line = 1;
	1055	treap_insert(&sect->vars, &path, &var->_node, name, sz);
	1056	}
	1057	}
	1058	return (var);
	1059	}
	1060
	1061	/* Set variable NAME to VALUE in SECT, with associated flags F.
	1062	*
	1063	* The names are null-terminated. The flags are variable flags: see `struct
	1064	* config_var' for details. Returns the variable.
	1065	*
	1066	* If the variable is already set and has the `CF_OVERRIDE' flag, then this
	1067	* function does nothing unless `CF_OVERRIDE' is /also/ set in F.
	1068	*/
	1069	struct config_var config_set_var(struct config conf,
	1070	struct config_section *sect,
	1071	unsigned f,
	1072	const char name, const char value)
	1073	{
	1074	return (config_set_var_n(conf, sect, f,
	1075	name, strlen(name),
	1076	value, strlen(value)));
	1077	}
	1078
	1079	/* As `config_set_var', except that the variable NAME and VALUE have explicit
	1080	* lengths (NAMELEN and VALUELEN, respectively) rather than being null-
	1081	* terminated.
	1082	*/
	1083	struct config_var config_set_var_n(struct config conf,
	1084	struct config_section *sect,
	1085	unsigned f,
	1086	const char *name, size_t namelen,
	1087	const char *value, size_t valuelen)
	1088	{
	1089	struct config_var *var =
	1090	config_find_var_n(conf, sect, CF_CREAT, name, namelen);
	1091
	1092	if (var->f&~f&CF_OVERRIDE) return (var);
	1093	free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
	1094	var->f = f;
	1095	return (var);
	1096	}
	1097
	1098	/* Initialize I to iterate over the variables directly defined in SECT. */
	1099	void config_start_var_iter(struct config conf, struct config_section sect,
	1100	struct config_var_iter *i)
	1101	{ treap_start_iter(&sect->vars, &i->i); }
	1102
	1103	/* Return next variable from I, in ascending lexicographical order.
	1104	*
	1105	* If there are no more variables, then return null.
	1106	*/
	1107	struct config_var config_next_var(struct config_var_iter i)
	1108	{ return (treap_next(&i->i)); }
	1109
	1110	/* Read and parse configuration FILE, applying its settings to CONF.
	1111	*
	1112	* If all goes well, the function returns 0. If the file is not found, then
	1113	* the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
	1114	* function simply returns -1. Otherwise, a fatal error is reported. Note
	1115	* that this /only/ applies if the file does not exist (specifically, opening
	1116	* it fails with `ENOENT') -- any other problems are reported as fatal
	1117	* errors regardless of the flag setting.
	1118	*/
	1119	int config_read_file(struct config conf, const char file, unsigned f)
	1120	{
	1121	struct config_section *sect;
	1122	struct config_var *var;
	1123	struct dstr d = DSTR_INIT, dd = DSTR_INIT;
	1124	unsigned line = 0;
	1125	const char p, q, *r;
	1126	FILE *fp;
	1127
	1128	/* Try to open the file. */
	1129	fp = fopen(file, "r");
	1130	if (!fp) {
	1131	if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
	1132	lose("failed to open configuration file `%s': %s",
	1133	file, strerror(errno));
	1134	}
	1135
	1136	/* Find the initial section. */
	1137	sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
	1138
	1139	/* Work through the file, line by line. */
	1140	for (;;) {
	1141	dstr_reset(&d); if (dstr_readline(&d, fp)) break;
	1142	line++;
	1143
	1144	/* Trim trailing spaces from the line. The syntax is sensitive to
	1145	* leading spaces, so we can't trim those yet.
	1146	*/
	1147	while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
	1148	d.p[d.len] = 0;
	1149
	1150	if (!d.p \|\| d.p == ';')
	1151	/* Ignore comments entirely. (In particular, a comment doesn't
	1152	* interrupt a multiline variable value.)
	1153	*/
	1154	;
	1155
	1156	else if (ISSPACE(d.p[0])) {
	1157	/* The line starts with whitespace, so it's a continuation line. */
	1158
	1159	/* Skip the initial whitespace. */
	1160	p = d.p; while (ISSPACE(*p)) p++;
	1161
	1162	/* If we aren't collecting a variable value then this is an error.
	1163	* Otherwise, accumulate it into the current value.
	1164	*/
	1165	if (!var)
	1166	lose("%s:%u: continuation line, but no variable", file, line);
	1167	if (dd.len) dstr_putc(&dd, ' ');
	1168	dstr_putm(&dd, p, d.len - (p - d.p));
	1169
	1170	} else {
	1171	/* The line starts in the first column. */
	1172
	1173	/* If there's a value value being collected then we must commit it to
	1174	* its variable (unless there's already a setting there that says we
	1175	* shouldn't).
	1176	*/
	1177	if (var) {
	1178	if (!(var->f&CF_OVERRIDE))
	1179	{ var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
	1180	var = 0;
	1181	}
	1182
	1183	/* Now decide what kind of line this is. */
	1184	if (d.p[0] == '[') {
	1185	/* It's a section header. */
	1186
	1187	/* Parse the header. */
	1188	p = d.p + 1; while (ISSPACE(*p)) p++;
	1189	q = scan_name("section", p, d.p + d.len, file, line);
	1190	r = q; while (ISSPACE(*r)) r++;
	1191	if (*r != ']')
	1192	lose("%s:%u: expected `]' in section header", file, line);
	1193	if (r[1])
	1194	lose("%s:%u: trailing junk after `]' in section header",
	1195	file, line);
	1196
	1197	/* Create the new section. */
	1198	sect = config_find_section_n(conf, CF_CREAT, p, q - p);
	1199
	1200	} else {
	1201	/* It's a variable assignment. Parse the name out. */
	1202	p = scan_name("variable", d.p, d.p + d.len, file, line);
	1203	var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
	1204	while (ISSPACE(*p)) p++;
	1205	if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
	1206	p++; while (ISSPACE(*p)) p++;
	1207
	1208	/* Clear out the variable's initial value, unless we shouldn't
	1209	* override it.
	1210	*/
	1211	if (!(var->f&CF_OVERRIDE)) {
	1212	free(var->val); var->val = 0; var->f = 0;
	1213	free(var->file); var->file = xstrdup(file); var->line = line;
	1214	}
	1215	dstr_reset(&dd); dstr_puts(&dd, p);
	1216	}
	1217	}
	1218	}
	1219
	1220	/* If there's a value under construction then commit the result. */
	1221	if (var && !(var->f&CF_OVERRIDE))
	1222	{ var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
	1223
	1224	/* Close the file. */
	1225	if (fclose(fp))
	1226	lose("error reading configuration file `%s': %s", file, strerror(errno));
	1227
	1228	/* All done. */
	1229	dstr_release(&d); dstr_release(&dd);
	1230	return (0);
	1231	}
	1232
	1233	/* Populate SECT with environment variables.
	1234	*
	1235	* Environment variables are always set with `CF_LITERAL'.
	1236	*/
	1237	void config_read_env(struct config conf, struct config_section sect)
	1238	{
	1239	const char p, v;
	1240	size_t i;
	1241
	1242	for (i = 0; (p = environ[i]) != 0; i++) {
	1243	v = strchr(p, '='); if (!v) continue;
	1244	config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
	1245	}
	1246	}
	1247
	1248	/----- Substitution and quoting ------------------------------------------/
	1249
	1250	/* The substitution and word-splitting state.
	1251	*
	1252	* This only keeps track of the immutable parameters for the substitution
	1253	* task: stuff which changes (flags, filtering state, cursor position) is
	1254	* maintained separately.
	1255	*/
	1256	struct subst {
	1257	struct config config; / configuration state */
	1258	struct config_section home; / home section for lookups */
	1259	struct dstr d; / current word being constructed */
	1260	struct argv av; / output word list */
	1261	};
	1262
	1263	/* Flags for `subst' and related functions. */
	1264	#define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */
	1265	#define SF_QUOT 0x0002u /* currently within double quotes */
	1266	#define SF_SUBST 0x0004u /* apply `$-substitutions */
	1267	#define SF_SUBEXPR 0x0008u /* stop at delimiter `\|' or `}' */
	1268	#define SF_SPANMASK 0x00ffu /* mask for the above */
	1269
	1270	#define SF_WORD 0x0100u /* output word under construction */
	1271	#define SF_SKIP 0x0200u /* not producing output */
	1272	#define SF_LITERAL 0x0400u /* do not expand or substitute */
	1273	#define SF_UPCASE 0x0800u /* convert to uppercase */
	1274	#define SF_DOWNCASE 0x1000u /* convert to lowercase */
	1275	#define SF_CASEMASK 0x1800u /* mask for case conversions */
	1276
	1277	/* Apply filters encoded in QFILT and F to the text from P to L, and output.
	1278	*
	1279	* SB is the substitution state which, in particular, explains where the
	1280	* output should go.
	1281	*
	1282	* The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
	1283	* conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is
	1284	* encoded as the number of toothpicks to print: see `subst' for how this
	1285	* determined.)
	1286	*/
	1287	static void filter_string(const char p, const char l,
	1288	const struct subst *sb, unsigned qfilt, unsigned f)
	1289	{
	1290	size_t r, n;
	1291	char q; const char pp, *ll;
	1292
	1293	if (!qfilt && !(f&SF_CASEMASK))
	1294	/* Fast path: there's nothing to do: just write to the output. */
	1295	dstr_putm(sb->d, p, l - p);
	1296
	1297	else for (;;) {
	1298	/* We must be a bit more circumspect. */
	1299
	1300	/* Determine the length of the next span of characters which don't need
	1301	* escaping. (If QFILT is zero then this is everything.)
	1302	*/
	1303	r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
	1304	if (n > r) n = r;
	1305
	1306	if (!(f&SF_CASEMASK))
	1307	/* No case conversion: we can just emit this chunk. */
	1308
	1309	dstr_putm(sb->d, p, n);
	1310
	1311	else {
	1312	/* Case conversion to do. Arrange enough space for the output, and
	1313	* convert it character by character.
	1314	*/
	1315
	1316	dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
	1317	if (f&SF_DOWNCASE) while (pp < ll) q++ = TOLOWER(pp++);
	1318	else if (f&SF_UPCASE) while (pp < ll) q++ = TOUPPER(pp++);
	1319	sb->d->len += n;
	1320	}
	1321
	1322	/* If we've reached the end then stop. */
	1323	if (n >= r) break;
	1324
	1325	/* Otherwise we must have found a character which requires escaping.
	1326	* Emit enough toothpicks.
	1327	*/
	1328	dstr_putcn(sb->d, '\\', qfilt);
	1329
	1330	/* This character is now done, so we can skip over and see if there's
	1331	* another chunk of stuff we can do at high speed.
	1332	*/
	1333	dstr_putc(sb->d, p[n]); p += n + 1;
	1334	}
	1335	}
	1336
	1337	/* Scan and resolve a `[SECT:]VAR' specifier at P.
	1338	*
	1339	* Return the address of the next character following the specifier. L is a
	1340	* limit on the region of the buffer that we should process; SB is the
	1341	* substitution state which provides the home section if none is given
	1342	* explicitly; FILE and LINE are the source location to blame for problems.
	1343	*/
	1344	static const char retrieve_varspec(const char p, const char *l,
	1345	const struct subst *sb,
	1346	struct config_var **var_out,
	1347	const char *file, unsigned line)
	1348	{
	1349	struct config_section *sect = sb->home;
	1350	const char *t;
	1351
	1352	t = scan_name("section or variable", p, l, file, line);
	1353	if (t < l && *t == ':') {
	1354	sect = config_find_section_n(sb->config, 0, p, t - p);
	1355	p = t + 1; t = scan_name("variable", p, l, file, line);
	1356	}
	1357
	1358	if (!sect) *var_out = 0;
	1359	else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
	1360	return (t);
	1361	}
	1362
	1363	/* Substitute and/or word-split text.
	1364	*
	1365	* The input text starts at P, and continues to (just before) L. Context for
	1366	* the task is provided by SB; the source location to blame is FILE and LINE
	1367	* (FILE may be null so that this can be passed directly from a `config_var'
	1368	* without further checking); QFILT is the nesting depth in toothpick-
	1369	* escaping; and F holds a mask of `SF_...' flags.
	1370	*/
	1371	static const char subst(const char p, const char *l,
	1372	const struct subst *sb,
	1373	const char *file, unsigned line,
	1374	unsigned qfilt, unsigned f)
	1375	{
	1376	struct config_var *var;
	1377	const char q0, q1, *t;
	1378	unsigned subqfilt, ff;
	1379	size_t n;
	1380
	1381	/* It would be best if we could process literal text at high speed. To
	1382	* this end, we have a table, indexed by the low-order bits of F, to tell
	1383	* us which special characters we need to stop at. This way, we can use
	1384	* `strcspn' to skip over literal text and stop at the next character which
	1385	* needs special handling. Entries in this table with a null pointer
	1386	* correspond to impossible flag settings.
	1387	*/
	1388	static const char *const delimtab[] = {
	1389
	1390	#define ESCAPE "\\" /* always watch for `\'-escapes */
	1391	#define SUBST "$" /* check for `$' if `SF_SUBST' set */
	1392	#define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but
	1393	* not `SF_QUOT' */
	1394	#define QUOT "\"" /* only quotes if `SF_SPLIT' and
	1395	* `SF_QUOT' */
	1396	#define DELIM "\|}" /* end delimiters of `SF_SUBEXPR' */
	1397
	1398	ESCAPE,
	1399	ESCAPE WORDSEP,
	1400	0,
	1401	ESCAPE QUOT,
	1402	ESCAPE SUBST,
	1403	ESCAPE SUBST WORDSEP,
	1404	0,
	1405	ESCAPE SUBST QUOT,
	1406	ESCAPE DELIM,
	1407	ESCAPE DELIM WORDSEP,
	1408	0,
	1409	ESCAPE DELIM QUOT,
	1410	ESCAPE DELIM SUBST,
	1411	ESCAPE DELIM SUBST WORDSEP,
	1412	0,
	1413	ESCAPE DELIM SUBST QUOT
	1414
	1415	#undef COMMON
	1416	#undef WORDSEP
	1417	#undef SQUOT
	1418	#undef DELIM
	1419	};
	1420
	1421	/* Set FILE to be useful if it was null on entry. */
	1422	if (!file) file = "<internal>";
	1423
	1424	/* If the text is literal then hand off to `filter_string'. This obviously
	1425	* starts a word.
	1426	*/
	1427	if (f&SF_LITERAL) {
	1428	filter_string(p, l, sb, qfilt, f);
	1429	f \|= SF_WORD;
	1430	goto done;
	1431	}
	1432
	1433	/* Chew through the input until it's all gone. */
	1434	while (p < l) {
	1435
	1436	if ((f&(SF_SPLIT \| SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
	1437	/* This is whitespace, we're supposed to split, and we're not within
	1438	* quotes, so we should split here.
	1439	*/
	1440
	1441	/* If there's a word in progress then we should commit it. */
	1442	if (f&SF_WORD) {
	1443	if (!(f&SF_SKIP)) {
	1444	argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
	1445	dstr_reset(sb->d);
	1446	}
	1447	f &= ~SF_WORD;
	1448	}
	1449
	1450	/* Skip over further whitespace at high speed. */
	1451	do p++; while (p < l && ISSPACE(*p));
	1452
	1453	} else if (*p == '\\') {
	1454	/* This is a toothpick, so start a new word and add the next character
	1455	* to it.
	1456	*/
	1457
	1458	/* If there's no next charact3er then we should be upset. */
	1459	p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
	1460
	1461	if (!(f&SF_SKIP)) {
	1462
	1463	/* If this is a double quote or backslash then check DFLT to see if
	1464	* it needs escaping.
	1465	*/
	1466	if (qfilt && (p == '"' \|\| p == '\\'))
	1467	dstr_putcn(sb->d, '\\', qfilt);
	1468
	1469	/* Output the character. */
	1470	if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
	1471	else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
	1472	else dstr_putc(sb->d, *p);
	1473	}
	1474
	1475	/* Move past the escaped character. Remember we started a word. */
	1476	p++; f \|= SF_WORD;
	1477
	1478	} else if ((f&SF_SPLIT) && *p == '"') {
	1479	/* This is a double quote, and we're word splitting. We're definitely
	1480	* in a word now. Toggle whether we're within quotes.
	1481	*/
	1482
	1483	f ^= SF_QUOT; f \|= SF_WORD; p++;
	1484
	1485	} else if ((f&(SF_SPLIT \| SF_QUOT)) == SF_SPLIT && *p == '\'') {
	1486	/* This is a single quote, and we're word splitting but not within
	1487	* double quotes. Find the matching end quote, and just output
	1488	* everything between literally.
	1489	*/
	1490
	1491	p++; t = strchr(p, '\'');
	1492	if (!t \|\| t >= l) lose("%s:%u: missing `''", file, line);
	1493	if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
	1494	p = t + 1; f \|= SF_WORD;
	1495
	1496	} else if ((f&SF_SUBEXPR) && (p == '\|' \|\| p == '}')) {
	1497	/* This is an end delimiter, and we're supposed to stop here. */
	1498	break;
	1499
	1500	} else if ((f&SF_SUBST) && *p == '$') {
	1501	/* This is a `$' and we're supposed to do substitution. */
	1502
	1503	/* The kind of substitution is determined by the next character. */
	1504	p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
	1505
	1506	/* Prepare flags for a recursive substitution.
	1507	*
	1508	* Hide our quote state from the recursive call. If we're within a
	1509	* word, then disable word-splitting.
	1510	*/
	1511	ff = f&~(SF_QUOT \| (f&SF_WORD ? SF_SPLIT : 0));
	1512
	1513	/* Now dispatch based on the following character. */
	1514	switch (*p) {
	1515
	1516	case '?':
	1517	/* A conditional expression: $?VAR{CONSEQ[\|ALT]} */
	1518
	1519	/* Skip initial space. */
	1520	p++; while (p < l && ISSPACE(*p)) p++;
	1521
	1522	/* Find the variable. */
	1523	p = retrieve_varspec(p, l, sb, &var, file, line);
	1524
	1525	/* Skip whitespace again. */
	1526	while (p < l && ISSPACE(*p)) p++;
	1527
	1528	/* Expect the opening `{'. */
	1529	if (p > l \|\| *p != '{') lose("%s:%u: expected `{'", file, line);
	1530	p++;
	1531
	1532	/* We'll process the parts recursively, but we need to come back
	1533	* when we hit the appropriate delimiters, so arrange for that.
	1534	*/
	1535	ff \|= SF_SUBEXPR;
	1536
	1537	/* Process the consequent (skip if the variable wasn't found). */
	1538	p = subst(p, l, sb, file, line, qfilt,
	1539	ff \| (var ? 0 : SF_SKIP));
	1540
	1541	/* If there's a `\|' then process the alternative too (skip if the
	1542	* variable /was/ found).
	1543	*/
	1544	if (p < l && *p == '\|')
	1545	p = subst(p + 1, l, sb, file, line, qfilt,
	1546	ff \| (var ? SF_SKIP : 0));
	1547
	1548	/* We should now be past the closing `}'. */
	1549	if (p >= l \|\| *p != '}') lose("%s:%u: missing `}'", file, line);
	1550	p++;
	1551	break;
	1552
	1553	case '{':
	1554	/* A variable substitution: ${VAR[\|FILT]...[?ALT]} */
	1555
	1556	/* Skip initial whitespace. */
	1557	p++; while (p < l && ISSPACE(*p)) p++;
	1558
	1559	/* Find the variable. */
	1560	q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
	1561
	1562	/* Determine the filters to apply when substituting the variable
	1563	* value.
	1564	*/
	1565	subqfilt = qfilt;
	1566	for (;;) {
	1567
	1568	/* Skip spaces again. */
	1569	while (p < l && ISSPACE(*p)) p++;
	1570
	1571	/* If there's no `\|' then there are no more filters, so stop. */
	1572	if (p >= l \|\| *p != '\|') break;
	1573
	1574	/* Skip the `\|' and more spaces. */
	1575	p++; while (p < l && ISSPACE(*p)) p++;
	1576
	1577	/* Collect the filter name. */
	1578	t = scan_name("filter", p, l, file, line);
	1579
	1580	/* Dispatch on the filter name. */
	1581	if (t - p == 1 && *p == 'q')
	1582	/* `q' -- quote for Lisp string.
	1583	*
	1584	* We're currently adding Q `\' characters before each naughty
	1585	* character. But a backslash itself is naughty too, so that
	1586	* makes Q + 1 naughty characters, each of which needs a
	1587	* toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
	1588	*
	1589	* Calculate this here rather than at each point toothpicks
	1590	* needs to be deployed.
	1591	*/
	1592
	1593	subqfilt = 2*subqfilt + 1;
	1594
	1595	else if (t - p == 1 && *p == 'l')
	1596	/* `u' -- convert to uppercase.
	1597	*
	1598	* If a case conversion is already set, then that will override
	1599	* whatever we do here, so don't bother.
	1600	*/
	1601
	1602	{ if (!(ff&SF_CASEMASK)) ff \|= SF_DOWNCASE; }
	1603
	1604	else if (t - p == 1 && *p == 'u')
	1605	/* `u' -- convert to uppercase.
	1606	*
	1607	* If a case conversion is already set, then that will override
	1608	* whatever we do here, so don't bother.
	1609	*/
	1610	{ if (!(ff&SF_CASEMASK)) ff \|= SF_UPCASE; }
	1611
	1612	else
	1613	/* Something else we didn't understand. */
	1614	lose("%s:%u: unknown filter `%.*s'",
	1615	file, line, (int)(t - p), p);
	1616
	1617	/* Continue from after the filter name. */
	1618	p = t;
	1619	}
	1620
	1621	/* If we're not skipping, and we found a variable, then substitute
	1622	* its value. This is the point where we need to be careful about
	1623	* recursive expansion.
	1624	*/
	1625	if (!(f&SF_SKIP) && var) {
	1626	if (var->f&CF_EXPAND)
	1627	lose("%s:%u: recursive expansion of variable `%.*s'",
	1628	file, line, (int)(q1 - q0), q0);
	1629	var->f \|= CF_EXPAND;
	1630	subst(var->val, var->val + var->n, sb,
	1631	var->file, var->line, subqfilt,
	1632	ff \| (var->f&CF_LITERAL ? SF_LITERAL : 0));
	1633	var->f &= ~CF_EXPAND;
	1634	}
	1635
	1636	/* If there's an alternative, then we need to process (or maybe
	1637	* skip) it. Otherwise, we should complain if there was no
	1638	* veriable, and we're not skipping.
	1639	*/
	1640	if (p < l && *p == '?')
	1641	p = subst(p + 1, l, sb, file, line, subqfilt,
	1642	ff \| SF_SUBEXPR \| (var ? SF_SKIP : 0));
	1643	else if (!var && !(f&SF_SKIP))
	1644	lose("%s:%u: unknown variable `%.*s'",
	1645	file, line, (int)(q1 - q0), q0);
	1646
	1647	/* Expect a `}' here. (No need to skip spaces: we already did that
	1648	* after scanning for filters, and either there was no alternative,
	1649	* or we advanced to a delimiter character anyway.)
	1650	*/
	1651	if (p >= l \|\| *p != '}') lose("%s:%u: missing `}'", file, line);
	1652	p++;
	1653	break;
	1654
	1655	default:
	1656	/* Something else. That's a shame. */
	1657	lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
	1658	}
	1659
	1660	/* Complain if we started out in word-splitting state, and therefore
	1661	* have added a whole number of words to the output, but there's a
	1662	* word-fragment stuck onto the end of this substitution.
	1663	*/
	1664	if (p < l && !(~f&~(SF_WORD \| SF_SPLIT)) && !ISSPACE(*p) &&
	1665	!((f&SF_SUBEXPR) && (p == '\|' \|\| p == '}')))
	1666	lose("%s:%u: surprising word boundary "
	1667	"after splicing substitution",
	1668	file, line);
	1669	}
	1670
	1671	else {
	1672	/* Something else. Try to skip over this at high speed.
	1673	*
	1674	* This makes use of the table we set up earlier.
	1675	*/
	1676
	1677	n = strcspn(p, delimtab[f&SF_SPANMASK]);
	1678	if (n > l - p) n = l - p;
	1679	if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
	1680	p += n; f \|= SF_WORD;
	1681	}
	1682	}
	1683
	1684	done:
	1685	/* Sort out the wreckage. */
	1686
	1687	/* If we're still within quotes then something has gone wrong. */
	1688	if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
	1689
	1690	/* If we're within a word, and should be splitting, then commit the word to
	1691	* the output list.
	1692	*/
	1693	if ((f&(SF_WORD \| SF_SPLIT \| SF_SKIP)) == (SF_SPLIT \| SF_WORD)) {
	1694	argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
	1695	dstr_reset(sb->d);
	1696	}
	1697
	1698	/* And, with that, we're done. */
	1699	return (p);
	1700	}
	1701
	1702	/* Expand substitutions in a string.
	1703	*
	1704	* Expand the null-terminated string P relative to the HOME section, using
	1705	* configuration CONFIG, and appending the result to dynamic string D. Blame
	1706	* WHAT in any error messages.
	1707	*/
	1708	void config_subst_string(struct config config, struct config_section home,
	1709	const char what, const char p, struct dstr *d)
	1710	{
	1711	struct subst sb;
	1712
	1713	sb.config = config; sb.home = home; sb.d = d;
	1714	subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
	1715	dstr_putz(d);
	1716	}
	1717
	1718	/* Expand substitutions in a string.
	1719	*
	1720	* Expand the null-terminated string P relative to the HOME section, using
	1721	* configuration CONFIG, returning the result as a freshly malloc(3)ed
	1722	* string. Blame WHAT in any error messages.
	1723	*/
	1724	char config_subst_string_alloc(struct config config,
	1725	struct config_section *home,
	1726	const char what, const char p)
	1727	{
	1728	struct dstr d = DSTR_INIT;
	1729	char *q;
	1730
	1731	config_subst_string(config, home, what, p, &d);
	1732	q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
	1733	}
	1734
	1735	/* Expand substitutions in a variable.
	1736	*
	1737	* Expand the value of the variable VAR relative to the HOME section, using
	1738	* configuration CONFIG, appending the result to dynamic string D.
	1739	*/
	1740	void config_subst_var(struct config config, struct config_section home,
	1741	struct config_var var, struct dstr d)
	1742	{
	1743	struct subst sb;
	1744
	1745	sb.config = config; sb.home = home; sb.d = d;
	1746	var->f \|= CF_EXPAND;
	1747	subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
	1748	SF_SUBST \| (var->f&CF_LITERAL ? SF_LITERAL : 0));
	1749	var->f &= ~CF_EXPAND;
	1750	dstr_putz(d);
	1751	}
	1752
	1753	/* Expand substitutions in a variable.
	1754	*
	1755	* Expand the value of the variable VAR relative to the HOME section, using
	1756	* configuration CONFIG, returning the result as a freshly malloc(3)ed
	1757	* string.
	1758	*/
	1759	char config_subst_var_alloc(struct config config,
	1760	struct config_section *home,
	1761	struct config_var *var)
	1762	{
	1763	struct dstr d = DSTR_INIT;
	1764	char *q;
	1765
	1766	config_subst_var(config, home, var, &d);
	1767	q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
	1768	}
	1769
	1770	/* Expand substitutions in a variable and split into words.
	1771	*
	1772	* Expand and word-split the value of the variable VAR relative to the HOME
	1773	* section, using configuration CONFIG, appending the resulting words into
	1774	* the vector AV.
	1775	*/
	1776	void config_subst_split_var(struct config *config,
	1777	struct config_section *home,
	1778	struct config_var var, struct argv av)
	1779	{
	1780	struct dstr d = DSTR_INIT;
	1781	struct subst sb;
	1782
	1783	sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
	1784	var->f \|= CF_EXPAND;
	1785	subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
	1786	SF_SUBST \| SF_SPLIT \| (var->f&CF_LITERAL ? SF_LITERAL : 0));
	1787	var->f &= ~CF_EXPAND;
	1788	dstr_release(&d);
	1789	}
	1790
	1791	/----- That's all, folks -------------------------------------------------/