chiark - git - mdw - runlisp/blob - lib.c

   1 /* -*-c-*-
   2  *
   3  * Common definitions for `runlisp'
   4  *
   5  * (c) 2020 Mark Wooding
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
  11  *
  12  * Runlisp is free software: you can redistribute it and/or modify it
  13  * under the terms of the GNU General Public License as published by the
  14  * Free Software Foundation; either version 3 of the License, or (at your
  15  * option) any later version.
  16  *
  17  * Runlisp is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  20  * for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with Runlisp.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25
  26 /*----- Header files ------------------------------------------------------*/
  27
  28 #include "config.h"
  29
  30 #include <assert.h>
  31
  32 #include <ctype.h>
  33 #include <errno.h>
  34 #include <stdarg.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38
  39 #include <unistd.h>
  40
  41 #include "lib.h"
  42
  43 /*----- Diagnostic utilities ----------------------------------------------*/
  44
  45 const char *progname = "???";
  46         /* Our program name, for use in error messages. */
  47
  48 /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
  49 void set_progname(const char *prog)
  50 {
  51   const char *p;
  52
  53   p = strrchr(prog, '/');
  54   progname = p ? p + 1 : prog;
  55 }
  56
  57 /* Report an error or warning in Unix style, given a captured argument
  58  * cursor.
  59  */
  60 void vmoan(const char *msg, va_list ap)
  61 {
  62   fprintf(stderr, "%s: ", progname);
  63   vfprintf(stderr, msg, ap);
  64   fputc('\n', stderr);
  65 }
  66
  67 /* Issue a warning message. */
  68 void moan(const char *msg, ...)
  69   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
  70
  71 /* Issue a fatal error message and exit unsuccessfully. */
  72 void lose(const char *msg, ...)
  73   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
  74
  75 /*----- Memory allocation -------------------------------------------------*/
  76
  77 /* Allocate and return a pointer to N bytes, or report a fatal error.
  78  *
  79  * Release the pointer using `free' as usual.  If N is zero, returns null
  80  * (but you are not expected to check for this).
  81  */
  82 void *xmalloc(size_t n)
  83 {
  84   void *p;
  85
  86   if (!n) return (0);
  87   p = malloc(n); if (!p) lose("failed to allocate memory");
  88   return (p);
  89 }
  90
  91 /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
  92  *
  93  * The block might (and probably will) move, so it returns the new address.
  94  * If N is zero, then the block is freed (if necessary) and a null pointer
  95  * returned; otherwise, if P is null then a fresh block is allocated.  If
  96  * allocation fails, then a fatal error is reported.
  97  */
  98 void *xrealloc(void *p, size_t n)
  99 {
 100   if (!n) { free(p); return (0); }
 101   else if (!p) return (xmalloc(n));
 102   p = realloc(p, n); if (!p) lose("failed to allocate memory");
 103   return (p);
 104 }
 105
 106 /* Allocate and return a copy of the N-byte string starting at P.
 107  *
 108  * The new string is null-terminated, though P need not be.  If allocation
 109  * fails, then a fatal error is reported.
 110  */
 111 char *xstrndup(const char *p, size_t n)
 112 {
 113   char *q = xmalloc(n + 1);
 114
 115   memcpy(q, p, n); q[n] = 0;
 116   return (q);
 117 }
 118
 119 /* Allocate and return a copy of the null-terminated string starting at P.
 120  *
 121  * If allocation fails, then a fatal error is reported.
 122  */
 123 char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }
 124
 125 /*----- Dynamic strings ---------------------------------------------------*/
 126
 127 /* Initialize the string D.
 128  *
 129  * Usually you'd use the static initializer `DSTR_INIT'.
 130  */
 131 void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
 132
 133 /* Reset string D so it's empty again. */
 134 void dstr_reset(struct dstr *d) { d->len = 0; }
 135
 136 /* Ensure that D has at least N unused bytes available. */
 137 void dstr_ensure(struct dstr *d, size_t n)
 138 {
 139   size_t need = d->len + n, newsz;
 140
 141   if (need <= d->sz) return;
 142   newsz = d->sz ? 2*d->sz : 16;
 143   while (newsz < need) newsz *= 2;
 144   d->p = xrealloc(d->p, newsz); d->sz = newsz;
 145 }
 146
 147 /* Release the memory held by D.
 148  *
 149  * It must be reinitialized (e.g., by `dstr_init') before it can be used
 150  * again.
 151  */
 152 void dstr_release(struct dstr *d) { free(d->p); }
 153
 154 /* Append the N-byte string at P to D.
 155  *
 156  * P need not be null-terminated.  D will not be null-terminated
 157  * afterwards.
 158  */
 159 void dstr_putm(struct dstr *d, const void *p, size_t n)
 160   { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
 161
 162 /* Append the null-terminated string P to D.
 163  *
 164  * D /is/ guaranteed to be null-terminated after this.
 165  */
 166 void dstr_puts(struct dstr *d, const char *p)
 167 {
 168   size_t n = strlen(p);
 169
 170   dstr_ensure(d, n + 1);
 171   memcpy(d->p + d->len, p, n + 1);
 172   d->len += n;
 173 }
 174
 175 /* Append the single character CH to D.
 176  *
 177  * D will not be null-terminated afterwards.
 178  */
 179 void dstr_putc(struct dstr *d, int ch)
 180   { dstr_ensure(d, 1); d->p[d->len++] = ch; }
 181
 182 /* Append N copies of the character CH to D.
 183  *
 184  * D will not be null-terminated afterwards.
 185  */
 186 void dstr_putcn(struct dstr *d, int ch, size_t n)
 187   { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
 188
 189 /* Null-terminate the string D.
 190  *
 191  * This doesn't change the length of D.  If further stuff is appended then
 192  * the null terminator will be overwritten.
 193  */
 194 void dstr_putz(struct dstr *d)
 195   { dstr_ensure(d, 1); d->p[d->len] = 0; }
 196
 197 /* Append stuff to D, determined by printf(3) format string P and argument
 198  * tail AP.
 199  *
 200  * D will not be null-terminated afterwards.
 201  */
 202 void dstr_vputf(struct dstr *d, const char *p, va_list ap)
 203 {
 204   va_list ap2;
 205   size_t r;
 206   int n;
 207
 208   r = d->sz - d->len;
 209   va_copy(ap2, ap);
 210   n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
 211   va_end(ap2);
 212   if (n >= r) {
 213     dstr_ensure(d, n + 1); r = d->sz - d->len;
 214     n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
 215   }
 216   d->len += n;
 217 }
 218
 219 /* Append stuff to D, determined by printf(3) format string P and arguments.
 220  *
 221  * D will not be null-terminated afterwards.
 222  */
 223 PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
 224   { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
 225
 226 /* Append the next input line from FP to D.
 227  *
 228  * Return 0 on success, or -1 if reading immediately fails or encounters
 229  * end-of-file (call ferror(3) to distinguish).  Any trailing newline is
 230  * discarded: it is not possible to determine whether the last line was ended
 231  * with a newline.  D is guaranteed to be null-terminated afterwards.
 232  */
 233 int dstr_readline(struct dstr *d, FILE *fp)
 234 {
 235   size_t n;
 236   int any = 0;
 237
 238   for (;;) {
 239     dstr_ensure(d, 2);
 240     if (!fgets(d->p + d->len, d->sz - d->len, fp)) break;
 241     n = strlen(d->p + d->len); assert(n > 0); any = 1;
 242     d->len += n;
 243     if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; }
 244   }
 245
 246   if (!any) return (-1);
 247   else return (0);
 248 }
 249
 250 /*----- Dynamic vectors of strings ----------------------------------------*/
 251
 252 /* Initialize the vector AV.
 253  *
 254  * Usually you'd use the static initializer `ARGV_INIT'.
 255  */
 256 void argv_init(struct argv *av)
 257   { av->v = 0; av->o = av->n = av->sz = 0; }
 258
 259 /* Reset the vector AV so that it's empty again. */
 260 void argv_reset(struct argv *av) { av->n = 0; }
 261
 262 /* Ensure that AV has at least N unused slots at the end. */
 263 void argv_ensure(struct argv *av, size_t n)
 264 {
 265   size_t need = av->n + av->o + n, newsz;
 266
 267   if (need <= av->sz) return;
 268   newsz = av->sz ? 2*av->sz : 8;
 269   while (newsz < need) newsz *= 2;
 270   av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o;
 271   av->sz = newsz;
 272 }
 273
 274 /* Ensure that AV has at least N unused slots at the /start/. */
 275 void argv_ensure_offset(struct argv *av, size_t n)
 276 {
 277   size_t newoff;
 278
 279   /* Stupid version.  We won't, in practice, be prepending lots of stuff, so
 280    * avoid the extra bookkeeping involved in trying to make a double-ended
 281    * extendable array asymptotically efficient.
 282    */
 283   if (av->o >= n) return;
 284   newoff = 16;
 285   while (newoff < n) newoff *= 2;
 286   argv_ensure(av, newoff - av->o);
 287   memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
 288   av->v += newoff - av->o; av->o = newoff;
 289 }
 290
 291 /* Release the memory held by AV.
 292  *
 293  * It must be reinitialized (e.g., by `argv_init') before it can be used
 294  * again.
 295  */
 296 void argv_release(struct argv *av) { free(av->v - av->o); }
 297
 298 /* Append the pointer P to AV. */
 299 void argv_append(struct argv *av, char *p)
 300   { argv_ensure(av, 1); av->v[av->n++] = p; }
 301
 302 /* Append a null pointer to AV, without extending the vactor length.
 303  *
 304  * The null pointer will be overwritten when the next string is appended.
 305  */
 306 void argv_appendz(struct argv *av)
 307   { argv_ensure(av, 1); av->v[av->n] = 0; }
 308
 309 /* Append a N-element vector V of pointers to AV. */
 310 void argv_appendn(struct argv *av, char *const *v, size_t n)
 311 {
 312   argv_ensure(av, n);
 313   memcpy(av->v + av->n, v, n*sizeof(const char *));
 314   av->n += n;
 315 }
 316
 317 /* Append the variable-length vector BV to AV. */
 318 void argv_appendav(struct argv *av, const struct argv *bv)
 319   { argv_appendn(av, bv->v, bv->n); }
 320
 321 /* Append the pointers from a variable-length argument list AP to AV.
 322  *
 323  * The list is terminated by a null pointer.
 324  */
 325 void argv_appendv(struct argv *av, va_list ap)
 326 {
 327   char *p;
 328   for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
 329 }
 330
 331 /* Append the argument pointers, terminated by a null pointer, to AV. */
 332 void argv_appendl(struct argv *av, ...)
 333   { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
 334
 335 /* Prepend the pointer P to AV. */
 336 void argv_prepend(struct argv *av, char *p)
 337   { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
 338
 339 /* Prepend a N-element vector V of pointers to AV. */
 340 void argv_prependn(struct argv *av, char *const *v, size_t n)
 341 {
 342   argv_ensure_offset(av, n);
 343   av->o -= n; av->v -= n; av->n += n;
 344   memcpy(av->v, v, n*sizeof(const char *));
 345 }
 346
 347 /* Prepend the variable-length vector BV to AV. */
 348 void argv_prependav(struct argv *av, const struct argv *bv)
 349   { argv_prependn(av, bv->v, bv->n); }
 350
 351 /* Prepend the pointers from a variable-length argument list AP to AV.
 352  *
 353  * The list is terminated by a null pointer.
 354  */
 355 void argv_prependv(struct argv *av, va_list ap)
 356 {
 357   char *p, **v;
 358   size_t n = 0;
 359
 360   for (;;) {
 361     p = va_arg(ap, char *); if (!p) break;
 362     argv_prepend(av, p); n++;
 363   }
 364   v = av->v;
 365   while (n >= 2) {
 366     p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
 367     v++; n -= 2;
 368   }
 369 }
 370
 371 /* Prepend the argument pointers, terminated by a null pointer, to AV. */
 372 void argv_prependl(struct argv *av, ...)
 373   { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
 374
 375 /*----- Treaps ------------------------------------------------------------*/
 376
 377 /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
 378  * string B in a lexicographic ordering.
 379  *
 380  * All comparisons of keys is handled by this function.
 381  */
 382 static int str_lt(const char *a, size_t an, const char *b, size_t bn)
 383 {
 384   /* This is a little subtle.  We need only compare the first N bytes of the
 385    * strings, where N is the length of the shorter string.  If this
 386    * distinguishes the two strings, then we're clearly done.  Otherwise, if
 387    * the prefixes are equal then the shorter string is the smaller one.  If
 388    * the two strings are the same length, then they're equal.
 389    *
 390    * Hence, if A is the strictly shorter string, then A precedes B if A
 391    * precedes or matches the prefix of B; otherwise A only precedes B if A
 392    * strictly precedes the prefix of B.
 393    */
 394   if (an < bn) return (MEMCMP(a, <=, b, an));
 395   else return (MEMCMP(a, <, b, bn));
 396 }
 397
 398 /* Initialize the treap T.
 399  *
 400  * Usually you'd use the static initializer `TREAP_INIT'.
 401  */
 402 void treap_init(struct treap *t) { t->root = 0; }
 403
 404 /* Look up the KN-byte key K in the treap T.
 405  *
 406  * Return a pointer to the matching node if one was found, or null otherwise.
 407  */
 408 void *treap_lookup(const struct treap *t, const char *k, size_t kn)
 409 {
 410   struct treap_node *n = t->root, *candidate = 0;
 411
 412   /* This is a simple prototype for some of the search loops we'll encounter
 413    * later.  Notice that we use a strict one-sided comparison, rather than
 414    * the more conventional two-sided comparison.
 415    *
 416    * The main loop will find the largest key not greater than K.
 417    */
 418   while (n)
 419     /* Compare the node's key against our key.  If the node is too large,
 420      * then we ignore it and move left.  Otherwise remember this node for
 421      * later, and move right to see if we can find a better, larger node.
 422      */
 423
 424     if (str_lt(k, kn, n->k, n->kn)) n = n->left;
 425     else { candidate = n; n = n->right; }
 426
 427   /* If the candidate node is less than our key then we failed.  Otherwise,
 428    * by trichotomy, we have found the correct node.
 429    */
 430   if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
 431   return (candidate);
 432 }
 433
 434 /* Look up the KN-byte K in the treap T, recording a path in P.
 435  *
 436  * This is similar to `treap_lookup', in that it returns the requested node
 437  * if it already exists, or null otherwise, but it also records in P
 438  * information to be used by `treap_insert' to insert a new node with the
 439  * given key if it's not there already.
 440  */
 441 void *treap_probe(struct treap *t, const char *k, size_t kn,
 442                   struct treap_path *p)
 443 {
 444   struct treap_node **nn = &t->root, *candidate = 0;
 445   unsigned i = 0;
 446
 447   /* This walk is similar to `treap_lookup' above, except that we also record
 448    * the address of each node pointer we visit along the way.
 449    */
 450   for (;;) {
 451     assert(i < TREAP_PATHMAX); p->path[i++] = nn;
 452     if (!*nn) break;
 453     if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
 454     else { candidate = *nn; nn = &(*nn)->right; }
 455   }
 456   p->nsteps = i;
 457
 458   /* Check to see whether we found the right node. */
 459   if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
 460   return (candidate);
 461 }
 462
 463 /* Insert a new node N into T, associating it with the KN-byte key K.
 464  *
 465  * Use the path data P, from `treap_probe', to help with insertion.
 466  */
 467 void treap_insert(struct treap *t, const struct treap_path *p,
 468                   struct treap_node *n, const char *k, size_t kn)
 469 {
 470   size_t i = p->nsteps;
 471   struct treap_node **nn, **uu, *u;
 472   unsigned wt;
 473
 474   /* Fill in the node structure. */
 475   n->k = xstrndup(k, kn); n->kn = kn;
 476   n->wt = wt = rand(); n->left = n->right = 0;
 477
 478   /* Prepare for the insertion.
 479    *
 480    * The path actually points to each of the links traversed when searching
 481    * for the node, starting with the `root' pointer, then the `left' or
 482    * `right' pointer of the root node, and so on; `nsteps' will always be
 483    * nonzero, since the path will always pass through the root, and the final
 484    * step, `path->path[path->nsteps - 1]' will always be the address of a
 485    * null pointer onto which the freshly inserted node could be hooked in
 486    * order to satisfy the binary-search-tree ordering.  (Of course, this will
 487    * likely /not/ satisfy the heap condition, so more work needs to be done.)
 488    *
 489    * Throughout, NN is our current candidate for where to attach the node N.
 490    * As the loop progresses, NN will ascend to links further up the tree, and
 491    * N will be adjusted to accumulate pieces of the existing tree structure.
 492    * We'll stop when we find that the parent node's weight is larger than our
 493    * new node's weight, at which point we can just set *NN = N; or if we run
 494    * out of steps in the path, in which case *NN is the root pointer.
 495    */
 496   assert(i); nn = p->path[--i];
 497   while (i--) {
 498
 499     /* Collect the next step in the path, and get the pointer to the node. */
 500     uu = p->path[i]; u = *uu;
 501
 502     /* If this node's weight is higher, then we've found the right level and
 503      * we can stop.
 504      */
 505     if (wt <= u->wt) break;
 506
 507     /* The node U is lighter than our new node N, so we must rotate in order
 508      * to fix things.  If we were currently planning to hook N as the left
 509      * subtree of U, then we rotate like this:
 510      *
 511      *                  |                   |
 512      *                  U                  (N)
 513      *                /   \               /   \
 514      *             (N)      Z   --->    X       U
 515      *            /   \                       /   \
 516      *          X       Y                   Y       Z
 517      *
 518      * On the other hand, if we ere planning to hook N as the right subtree
 519      * of U, then we do the opposite rotation:
 520      *
 521      *              |                           |
 522      *              U                          (N)
 523      *            /   \                       /   \
 524      *          X      (N)      --->        U       Z
 525      *                /   \               /   \
 526      *              Y       Z           X       Y
 527      *
 528      * These transformations clearly preserve the ordering of nodes in the
 529      * binary search tree, and satisfy the heap condition in the subtree
 530      * headed by N.
 531      */
 532     if (nn == &u->left) { u->left = n->right; n->right = u; }
 533     else { u->right = n->left; n->left = u; }
 534
 535     /* And this arrangement must be attached to UU, or some higher attachment
 536      * point.  The subtree satisfies the heap condition, and can be attached
 537      * safely at the selected place.
 538      */
 539     nn = uu;
 540   }
 541
 542   /* We've found the right spot.  Hook the accumulated subtree into place. */
 543   *nn = n;
 544 }
 545
 546 /* Remove the node with the KN-byte K from T.
 547  *
 548  * Return the address of the node we removed, or null if it couldn't be
 549  * found.
 550  */
 551 void *treap_remove(struct treap *t, const char *k, size_t kn)
 552 {
 553   struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;
 554
 555   /* Search for the matching node, but keep track of the address of the link
 556    * which points to our target node.
 557    */
 558   while (*nn)
 559     if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
 560     else { candidate = nn; nn = &(*nn)->right; }
 561
 562   /* If this isn't the right node then give up. */
 563   if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
 564     return (0);
 565
 566   /* Now we need to disentangle the node from the tree.  This is essentially
 567    * the reverse of insertion: we pretend that this node is suddenly very
 568    * light, and mutate the tree so as to restore the heap condition until
 569    * eventually our node is a leaf and can be cut off without trouble.
 570    *
 571    * Throughout, the link *NN notionally points to N, but we don't actually
 572    * update it until we're certain what value it should finally take.
 573    */
 574   nn = candidate; n = *nn; l = n->left; r = n->right;
 575   for (;;)
 576
 577     /* If its left subtree is empty then we can replace our node by its right
 578      * subtree and be done.  Similarly, if the right subtree is empty then we
 579      * replace the node by its left subtree.
 580      *
 581      *              |           |               |               |
 582      *             (N)  --->    R ;            (N)      --->    L
 583      *            /   \                       /   \
 584      *          *       R                   L       *
 585      */
 586     if (!l) { *nn = r; break; }
 587     else if (!r) { *nn = l; break; }
 588
 589     /* Otherwise we need to rotate the pointers so that the heavier of the
 590      * two children takes the place of our node; thus we have either
 591      *
 592      *                  |                   |
 593      *                 (N)                  L
 594      *                /   \               /   \
 595      *              L       R   --->    X      (N)
 596      *            /   \                       /   \
 597      *          X       Y                   Y       R
 598      *
 599      * or
 600      *
 601      *              |                           |
 602      *             (N)                          R
 603      *            /   \                       /   \
 604      *          L       R       --->       (N)      Y
 605      *                /   \               /   \
 606      *              X       Y           L       X
 607      *
 608      * Again, these transformations clearly preserve the ordering of nodes in
 609      * the binary search tree, and the heap condition.
 610      */
 611     else if (l->wt > r->wt)
 612       { *nn = l; nn = &l->right; l = n->left = l->right; }
 613     else
 614       { *nn = r; nn = &r->left; r = n->right = r->left; }
 615
 616   /* Release the key buffer, and return the node that we've now detached. */
 617   free(n->k); return (n);
 618 }
 619
 620 /* Initialize an iterator I over T's nodes. */
 621 void treap_start_iter(struct treap *t, struct treap_iter *i)
 622 {
 623   struct treap_node *n = t->root;
 624   unsigned sp = 0;
 625
 626   /* The `stack' in the iterator structure is an empty ascending stack of
 627    * nodes which have been encountered, and their left subtrees investigated,
 628    * but not yet visited by the iteration.
 629    *
 630    * Iteration begins by stacking the root node, its left child, and so on,
 631    * At the end of this, the topmost entry on the stack is the least node of
 632    * the tree, followed by its parent, grandparent, and so on up to the root.
 633    */
 634   while (n) {
 635     assert(sp < TREAP_PATHMAX);
 636     i->stack[sp++] = n; n = n->left;
 637   }
 638   i->sp = sp;
 639 }
 640
 641 /* Return the next node from I, in ascending order by key.
 642  *
 643  * If there are no more nodes, then return null.
 644  */
 645 void *treap_next(struct treap_iter *i)
 646 {
 647   struct treap_node *n, *o;
 648   unsigned sp = i->sp;
 649
 650   /* We say that a node is /visited/ once it's been returned by this
 651    * iterator.  To traverse a tree in order, then, we traverse its left
 652    * subtree, visit the tree root, and traverse its right subtree -- which is
 653    * a fine recursive definition, but we need a nonrecursive implementation.
 654    *
 655    * As is usual in this kind of essential structural recursion, we maintain
 656    * a stack.  The invariant that we'll maintain is as follows.
 657    *
 658    *   1. If the stack is empty, then all nodes have been visited.
 659    *
 660    *   2, If the stack is nonempty then the topmost entry on the stack is the
 661    *      least node which has not yet been visited -- and therefore is the
 662    *      next node to visit.
 663    *
 664    *   3. The earlier entries in the stack are, in (top to bottom) order,
 665    *      those of the topmost node's parent, grandparent, etc., up to the
 666    *      root, which have not yet been visited.  More specifically, a node
 667    *      appears in the stack if and only if some node in its left subtree
 668    *      is nearer the top of the stack.
 669    *
 670    * When we initialized the iterator state (in `treap_start_iter' above), we
 671    * traced a path to the leftmost leaf, stacking the root, its left-hand
 672    * child, and so on.  The leftmost leaf is clearly the first node to be
 673    * visited, and its entire ancestry is on the stack since none of these
 674    * nodes has yet been visited.  (If the tree is empty, then we have done
 675    * nothing, the stack is empty, and there are no nodes to visit.)  This
 676    * establishes the base case for the induction.
 677    */
 678
 679   /* So, if the stack is empty now, then (1) all of the nodes have been
 680    * visited and there's nothing left to do.  Return null.
 681    */
 682   if (!sp) return (0);
 683
 684   /* It's clear that, if we pop the topmost element of the stack, visit it,
 685    * and arrange to reestablish the invariant, then we'll visit the nodes in
 686    * the correct order, pretty much by definition.
 687    *
 688    * So, pop a node off the stack.  This is the node we shall return.  But
 689    * before we can do that, we must reestablish the above invariant.
 690    * Firstly, the current node is removed from the stack, because we're about
 691    * to visit it, and visited nodes don't belong on the stack.  Then there
 692    * are two cases to consider.
 693    *
 694    *   * If the current node's right subtree is not empty, then the next node
 695    *     to be visited is the leftmost node in that subtree.  All of the
 696    *     nodes on the stack are ancestors of the current node, and the right
 697    *     subtree consists of its descendants, so none of them are already on
 698    *     the stack; and they're all greater than the current node, and
 699    *     therefore haven't been visited.  Therefore, we must push the current
 700    *     node's right child, its /left/ child, and so on, proceeding
 701    *     leftwards until we fall off the bottom of the tree.
 702    *
 703    *   * Otherwise, we've finished traversing some subtree.  Either we are
 704    *     now done, or (3) we have just finished traversing the left subtree
 705    *     of the next topmost item on the stack.  This must therefore be the
 706    *     next node to visit.  The rest of the stack is already correct.
 707    */
 708   n = i->stack[--sp];
 709   o = n->right;
 710   while (o) {
 711     assert(sp < TREAP_PATHMAX);
 712     i->stack[sp++] = o; o = o->left;
 713   }
 714   i->sp = sp;
 715   return (n);
 716 }
 717
 718 /* Recursively check the subtree headed by N.
 719  *
 720  * No node should have weight greater than MAXWT, to satisfy the heap
 721  * condition; if LO is not null, then all node keys should be strictly
 722  * greater than LO, and, similarly, if HI is not null, then all keys should
 723  * be strictly smaller than HI.
 724  */
 725 static void check_subtree(struct treap_node *n, unsigned maxwt,
 726                           const char *klo, const char *khi)
 727 {
 728   /* Check the heap condition. */
 729   assert(n->wt <= maxwt);
 730
 731   /* Check that the key is in bounds.  (Use `strcmp' here to ensure that our
 732    * own `str_lt' is working correctly.)
 733    */
 734   if (klo) assert(STRCMP(n->k, >, klo));
 735   if (khi) assert(STRCMP(n->k, <, khi));
 736
 737   /* Check the left subtree.  Node weights must be bounded above by our own
 738    * weight.  And everykey in the left subtree must be smaller than our
 739    * current key.  We propagate the lower bound.
 740    */
 741   if (n->left) check_subtree(n->left, n->wt, klo, n->k);
 742
 743   /* Finally, check the right subtree.  This time, every key must be larger
 744    * than our key, and we propagate the upper bound.
 745    */
 746   if (n->right) check_subtree(n->right, n->wt, n->k, khi);
 747 }
 748
 749 /* Check the treap structure rules for T. */
 750 void treap_check(struct treap *t)
 751   { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
 752
 753 /* Recursively dump the subtree headed by N, indenting the output lines by
 754  * IND spaces.
 755  */
 756 static void dump_node(struct treap_node *n, int ind)
 757 {
 758   if (n->left) dump_node(n->left, ind + 1);
 759   printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k);
 760   if (n->right) dump_node(n->right, ind + 1);
 761 }
 762
 763 /* Dump the treap T to standard output, for debugging purposes. */
 764 void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
 765
 766 /*----- Configuration file parsing ----------------------------------------*/
 767
 768 #ifndef DECL_ENVIRON
 769   extern char **environ;
 770 #endif
 771
 772 /* Advance P past a syntactically valid name, but no further than L.
 773  *
 774  * Return the new pointer.  If no name is found, report an error, blaming
 775  * FILE and LINE; WHAT is an adjective for the kind of name that was
 776  * expected.
 777  */
 778 static const char *scan_name(const char *what,
 779                              const char *p, const char *l,
 780                              const char *file, unsigned line)
 781 {
 782   const char *q = p;
 783
 784   while (q < l &&
 785          (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
 786                          *q == '*' || *q == '+' || *q == '%' || *q == '@'))
 787     q++;
 788   if (q == p) lose("%s:%u: expected %s name", file, line, what);
 789   return (q);
 790 }
 791
 792 /* Initialize the configuration state CONF.
 793  *
 794  * Usually you'd use the static initializer `CONFIG_INIT'.
 795  */
 796 void config_init(struct config *conf)
 797   { treap_init(&conf->sections); }
 798
 799 /* Find and return the section with null-terminated NAME in CONF.
 800  *
 801  * If no section is found, the behaviour depends on whether `CF_CREAT' is set
 802  * in F: if so, an empty section is created and returned; otherwise, a null
 803  * pointer is returned.
 804  */
 805 struct config_section *config_find_section(struct config *conf, unsigned f,
 806                                            const char *name)
 807   { return (config_find_section_n(conf, f, name, strlen(name))); }
 808
 809 /* Find and return the section with the SZ-byte NAME in CONF.
 810  *
 811  * This works like `config_find_section', but with an explicit length for the
 812  * NAME rather than null-termination.
 813  */
 814 struct config_section *config_find_section_n(struct config *conf, unsigned f,
 815                                              const char *name, size_t sz)
 816 {
 817   struct config_section *sect;
 818   struct treap_path path;
 819
 820   if (!(f&CF_CREAT))
 821     sect = treap_lookup(&conf->sections, name, sz);
 822   else {
 823     sect = treap_probe(&conf->sections, name, sz, &path);
 824     if (!sect) {
 825       sect = xmalloc(sizeof(*sect));
 826       if (!conf->head) conf->tail = &conf->head;
 827       sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
 828       sect->parents = 0; sect->nparents = SIZE_MAX;
 829       treap_init(&sect->vars); treap_init(&sect->cache);
 830       treap_insert(&conf->sections, &path, &sect->_node, name, sz);
 831       config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
 832     }
 833   }
 834   return (sect);
 835 }
 836
 837 /* Set the fallback section for CONF to be SECT.
 838  *
 839  * That is, if a section has no explicit parents, then by default it will
 840  * have a single parent which is SECT.  If SECT is null then there is no
 841  * fallback section, and sections which don't have explicitly specified
 842  * parents have no parents at all.  (This is the default situation.)
 843  */
 844 void config_set_fallback(struct config *conf, struct config_section *sect)
 845   { conf->fallback = sect; }
 846
 847 /* Arrange that SECT has PARENT as its single parent section.
 848  *
 849  * If PARENT is null, then arrange that SECT has no parents at all.  In
 850  * either case, any `@parents' setting will be ignored.
 851  */
 852 void config_set_parent(struct config_section *sect,
 853                        struct config_section *parent)
 854 {
 855   if (!parent)
 856     sect->nparents = 0;
 857   else {
 858     sect->parents = xmalloc(sizeof(*sect->parents));
 859     sect->parents[0] = parent; sect->nparents = 1;
 860   }
 861 }
 862
 863 /* Initialize I to iterate over the sections defined in CONF. */
 864 void config_start_section_iter(struct config *conf,
 865                                struct config_section_iter *i)
 866   { i->sect = conf->head; }
 867
 868 /* Return the next section from I, in order of creation.
 869  *
 870  * If there are no more sections, then return null.
 871  */
 872 struct config_section *config_next_section(struct config_section_iter *i)
 873 {
 874   struct config_section *sect;
 875
 876   sect = i->sect;
 877   if (sect) i->sect = sect->next;
 878   return (sect);
 879 }
 880
 881 /* Initialize the `parents' links of SECT, if they aren't set up already.
 882  *
 883  * If SECT contains a `@parents' setting then parse it to determine the
 884  * parents; otherwise use CONF's fallbeck section, as established by
 885  * `config_set_fallback'.
 886  */
 887 static void set_config_section_parents(struct config *conf,
 888                                        struct config_section *sect)
 889 {
 890   struct config_section *parent;
 891   struct config_var *var;
 892   const char *file; unsigned line;
 893   size_t i, n;
 894   char *p, *q, *l;
 895   struct argv av = ARGV_INIT;
 896
 897   /* If the section already has parents established then there's nothing to
 898    * do.
 899    */
 900   if (sect->nparents != SIZE_MAX) return;
 901
 902   /* Look up `@parents', without recursion! */
 903   var = treap_lookup(&sect->vars, "@parents", 8);
 904   if (!var) {
 905     /* No explicit setting: use the fallback setting. */
 906
 907     if (!conf->fallback || conf->fallback == sect)
 908       sect->nparents = 0;
 909     else {
 910       sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
 911       sect->parents[0] = conf->fallback;
 912     }
 913   } else {
 914     /* Found a `@parents' list: parse it and set the parents list. */
 915
 916     file = var->file; line = var->line; if (!file) file = "<internal>";
 917
 918     /* We do this in two phases.  First, we parse out the section names, and
 919      * record start/limit pointer pairs in `av'.
 920      */
 921     p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
 922     while (*p) {
 923       q = p;
 924       p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
 925       argv_append(&av, q); argv_append(&av, p);
 926       while (p < l && ISSPACE(*p)) p++;
 927       if (p >= l) break;
 928       if (*p == ',') do p++; while (ISSPACE(*p));
 929     }
 930
 931     /* Now that we've finished parsing, we know how many parents we're going
 932      * to have, so we can allocate the `parents' vector and fill it in.
 933      */
 934     sect->nparents = av.n/2;
 935     sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents));
 936     for (i = 0; i < av.n; i += 2) {
 937       n = av.v[i + 1] - av.v[i];
 938       parent = config_find_section_n(conf, 0, av.v[i], n);
 939       if (!parent)
 940         lose("%s:%u: unknown parent section `%.*s'",
 941              file, line, (int)n, av.v[i]);
 942       sect->parents[i/2] = parent;
 943     }
 944   }
 945
 946   /* All done. */
 947   argv_release(&av);
 948 }
 949
 950 /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
 951  *
 952  * If successful, return a pointer to the variable; otherwise return null.
 953  * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
 954  * errors.
 955  */
 956 struct config_var *search_recursive(struct config *conf,
 957                                     struct config_section *sect,
 958                                     const char *name, size_t sz)
 959 {
 960   struct config_cache_entry *cache;
 961   struct treap_path path;
 962   struct config_var *var, *v;
 963   size_t i, j = j;
 964
 965   /* If the variable is defined locally then we can just return it. */
 966   var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
 967
 968   /* If we have no parents then there's no way we can find it. */
 969   set_config_section_parents(conf, sect);
 970   if (!sect->parents) return (0);
 971
 972   /* Otherwise we must visit the section's parents.  We can avoid paying for
 973    * this on every lookup by using a cache.  If there's already an entry for
 974    * this variable then we can return the result immediately (note that we
 975    * cache both positive and negative outcomes).  Otherwise we create a new
 976    * cache entry, do the full recursive search, and fill in the result when
 977    * we're done.
 978    *
 979    * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
 980    * new cache entry when it's first created, and clear it when we fill in
 981    * the result: if we encounter an open cache entry again, we know that
 982    * we've found a cycle.
 983    */
 984   cache = treap_probe(&sect->cache, name, sz, &path);
 985   if (!cache) {
 986     cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
 987     treap_insert(&sect->cache, &path, &cache->_node, name, sz);
 988   } else if (cache->f&CF_OPEN)
 989     lose("inheritance cycle through section `%s'",
 990          CONFIG_SECTION_NAME(sect));
 991   else
 992     return (cache->var);
 993
 994   /* Recursively search in each parent.  We insist that all parents that find
 995    * a variable find the same binding; otherwise we declare ambiguous
 996    * inheritance.
 997    */
 998   for (i = 0; i < sect->nparents; i++) {
 999     v = search_recursive(conf, sect->parents[i], name, sz);
1000     if (!v);
1001     else if (!var) { var = v; j = i; }
1002     else if (var != v)
1003       lose("section `%s' inherits variable `%s' ambiguously "
1004            "via `%s' and `%s'",
1005            CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
1006            CONFIG_SECTION_NAME(sect->parents[j]),
1007            CONFIG_SECTION_NAME(sect->parents[i]));
1008   }
1009
1010   /* All done: fill the cache entry in, clear the open flag, and return the
1011    * result.
1012    */
1013   cache->var = var; cache->f &= ~CF_OPEN;
1014   return (var);
1015 }
1016
1017 /* Find and return the variable with null-terminated NAME in SECT.
1018  *
1019  * If `CF_INHERIT' is set in F, then the function searches the section's
1020  * parents recursively; otherwise, it only checks to see whether the variable
1021  * is set directly in SECT.
1022  *
1023  * If no variable is found, the behaviour depends on whether `CF_CREAT' is
1024  * set in F: if so, an empty variable is created and returned; otherwise, a
1025  * null pointer is returned.
1026  *
1027  * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
1028  */
1029 struct config_var *config_find_var(struct config *conf,
1030                                    struct config_section *sect,
1031                                    unsigned f, const char *name)
1032   { return (config_find_var_n(conf, sect, f, name, strlen(name))); }
1033
1034 /* Find and return the variable with the given SZ-byte NAME in SECT.
1035  *
1036  * This works like `config_find_var', but with an explicit length for the
1037  * NAME rather than null-termination.
1038  */
1039 struct config_var *config_find_var_n(struct config *conf,
1040                                      struct config_section *sect,
1041                                      unsigned f, const char *name, size_t sz)
1042 {
1043   struct config_var *var;
1044   struct treap_path path;
1045
1046   if (f&CF_INHERIT)
1047     var = search_recursive(conf, sect, name, sz);
1048   else if (!(f&CF_CREAT))
1049     var = treap_lookup(&sect->vars, name, sz);
1050   else {
1051     var = treap_probe(&sect->vars, name, sz, &path);
1052     if (!var) {
1053       var = xmalloc(sizeof(*var));
1054       var->val = 0; var->file = 0; var->f = 0; var->line = 1;
1055       treap_insert(&sect->vars, &path, &var->_node, name, sz);
1056     }
1057   }
1058   return (var);
1059 }
1060
1061 /* Set variable NAME to VALUE in SECT, with associated flags F.
1062  *
1063  * The names are null-terminated.  The flags are variable flags: see `struct
1064  * config_var' for details.  Returns the variable.
1065  *
1066  * If the variable is already set and has the `CF_OVERRIDE' flag, then this
1067  * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
1068  */
1069 struct config_var *config_set_var(struct config *conf,
1070                                   struct config_section *sect,
1071                                   unsigned f,
1072                                   const char *name, const char *value)
1073 {
1074   return (config_set_var_n(conf, sect, f,
1075                            name, strlen(name),
1076                            value, strlen(value)));
1077 }
1078
1079 /* As `config_set_var', except that the variable NAME and VALUE have explicit
1080  * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
1081  * terminated.
1082  */
1083 struct config_var *config_set_var_n(struct config *conf,
1084                                     struct config_section *sect,
1085                                     unsigned f,
1086                                     const char *name, size_t namelen,
1087                                     const char *value, size_t valuelen)
1088 {
1089   struct config_var *var =
1090     config_find_var_n(conf, sect, CF_CREAT, name, namelen);
1091
1092   if (var->f&~f&CF_OVERRIDE) return (var);
1093   free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
1094   var->f = f;
1095   return (var);
1096 }
1097
1098 /* Initialize I to iterate over the variables directly defined in SECT. */
1099 void config_start_var_iter(struct config *conf, struct config_section *sect,
1100                            struct config_var_iter *i)
1101   { treap_start_iter(&sect->vars, &i->i); }
1102
1103 /* Return next variable from I, in ascending lexicographical order.
1104  *
1105  * If there are no more variables, then return null.
1106  */
1107 struct config_var *config_next_var(struct config_var_iter *i)
1108   { return (treap_next(&i->i)); }
1109
1110 /* Read and parse configuration FILE, applying its settings to CONF.
1111  *
1112  * If all goes well, the function returns 0.  If the file is not found, then
1113  * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
1114  * function simply returns -1.  Otherwise, a fatal error is reported.  Note
1115  * that this /only/ applies if the file does not exist (specifically, opening
1116  * it fails with `ENOENT') -- any other problems are reported as fatal
1117  * errors regardless of the flag setting.
1118  */
1119 int config_read_file(struct config *conf, const char *file, unsigned f)
1120 {
1121   struct config_section *sect;
1122   struct config_var *var;
1123   struct dstr d = DSTR_INIT, dd = DSTR_INIT;
1124   unsigned line = 0;
1125   const char *p, *q, *r;
1126   FILE *fp;
1127
1128   /* Try to open the file. */
1129   fp = fopen(file, "r");
1130   if (!fp) {
1131     if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
1132     lose("failed to open configuration file `%s': %s",
1133          file, strerror(errno));
1134   }
1135
1136   /* Find the initial section. */
1137   sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
1138
1139   /* Work through the file, line by line. */
1140   for (;;) {
1141     dstr_reset(&d); if (dstr_readline(&d, fp)) break;
1142     line++;
1143
1144     /* Trim trailing spaces from the line.  The syntax is sensitive to
1145      * leading spaces, so we can't trim those yet.
1146      */
1147     while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
1148     d.p[d.len] = 0;
1149
1150     if (!*d.p || *d.p == ';')
1151       /* Ignore comments entirely.  (In particular, a comment doesn't
1152        * interrupt a multiline variable value.)
1153        */
1154       ;
1155
1156     else if (ISSPACE(d.p[0])) {
1157       /* The line starts with whitespace, so it's a continuation line. */
1158
1159       /* Skip the initial whitespace. */
1160       p = d.p; while (ISSPACE(*p)) p++;
1161
1162       /* If we aren't collecting a variable value then this is an error.
1163        * Otherwise, accumulate it into the current value.
1164        */
1165       if (!var)
1166         lose("%s:%u: continuation line, but no variable", file, line);
1167       if (dd.len) dstr_putc(&dd, ' ');
1168       dstr_putm(&dd, p, d.len - (p - d.p));
1169
1170     } else {
1171       /* The line starts in the first column. */
1172
1173       /* If there's a value value being collected then we must commit it to
1174        * its variable (unless there's already a setting there that says we
1175        * shouldn't).
1176        */
1177       if (var) {
1178         if (!(var->f&CF_OVERRIDE))
1179           { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1180         var = 0;
1181       }
1182
1183       /* Now decide what kind of line this is. */
1184       if (d.p[0] == '[') {
1185         /* It's a section header. */
1186
1187         /* Parse the header. */
1188         p = d.p + 1; while (ISSPACE(*p)) p++;
1189         q = scan_name("section", p, d.p + d.len, file, line);
1190         r = q; while (ISSPACE(*r)) r++;
1191         if (*r != ']')
1192           lose("%s:%u: expected `]' in section header", file, line);
1193         if (r[1])
1194           lose("%s:%u: trailing junk after `]' in section header",
1195                file, line);
1196
1197         /* Create the new section. */
1198         sect = config_find_section_n(conf, CF_CREAT, p, q - p);
1199
1200       } else {
1201         /* It's a variable assignment.  Parse the name out. */
1202         p = scan_name("variable", d.p, d.p + d.len, file, line);
1203         var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
1204         while (ISSPACE(*p)) p++;
1205         if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
1206         p++; while (ISSPACE(*p)) p++;
1207
1208         /* Clear out the variable's initial value, unless we shouldn't
1209          * override it.
1210          */
1211         if (!(var->f&CF_OVERRIDE)) {
1212           free(var->val); var->val = 0; var->f = 0;
1213           free(var->file); var->file = xstrdup(file); var->line = line;
1214         }
1215         dstr_reset(&dd); dstr_puts(&dd, p);
1216       }
1217     }
1218   }
1219
1220   /* If there's a value under construction then commit the result. */
1221   if (var && !(var->f&CF_OVERRIDE))
1222     { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1223
1224   /* Close the file. */
1225   if (fclose(fp))
1226     lose("error reading configuration file `%s': %s", file, strerror(errno));
1227
1228   /* All done. */
1229   dstr_release(&d); dstr_release(&dd);
1230   return (0);
1231 }
1232
1233 /* Populate SECT with environment variables.
1234  *
1235  * Environment variables are always set with `CF_LITERAL'.
1236  */
1237 void config_read_env(struct config *conf, struct config_section *sect)
1238 {
1239   const char *p, *v;
1240   size_t i;
1241
1242   for (i = 0; (p = environ[i]) != 0; i++) {
1243     v = strchr(p, '='); if (!v) continue;
1244     config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
1245   }
1246 }
1247
1248 /*----- Substitution and quoting ------------------------------------------*/
1249
1250 /* The substitution and word-splitting state.
1251  *
1252  * This only keeps track of the immutable parameters for the substitution
1253  * task: stuff which changes (flags, filtering state, cursor position) is
1254  *       maintained separately.
1255  */
1256 struct subst {
1257   struct config *config;                /* configuration state */
1258   struct config_section *home;          /* home section for lookups */
1259   struct dstr *d;                       /* current word being constructed */
1260   struct argv *av;                      /* output word list */
1261 };
1262
1263 /* Flags for `subst' and related functions. */
1264 #define SF_SPLIT 0x0001u                /* split at (unquoted) whitespace */
1265 #define SF_QUOT 0x0002u                 /* currently within double quotes */
1266 #define SF_SUBST 0x0004u                /* apply `$-substitutions */
1267 #define SF_SUBEXPR 0x0008u              /* stop at delimiter `|' or `}' */
1268 #define SF_SPANMASK 0x00ffu             /* mask for the above */
1269
1270 #define SF_WORD 0x0100u                 /* output word under construction */
1271 #define SF_SKIP 0x0200u                 /* not producing output */
1272 #define SF_LITERAL 0x0400u              /* do not expand or substitute */
1273 #define SF_UPCASE 0x0800u               /* convert to uppercase */
1274 #define SF_DOWNCASE 0x1000u             /* convert to lowercase */
1275 #define SF_CASEMASK 0x1800u             /* mask for case conversions */
1276
1277 /* Apply filters encoded in QFILT and F to the text from P to L, and output.
1278  *
1279  * SB is the substitution state which, in particular, explains where the
1280  * output should go.
1281  *
1282  * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
1283  * conversions, and a nesting depth QFILT for toothpick escaping.  (QFILT is
1284  * encoded as the number of toothpicks to print: see `subst' for how this
1285  * determined.)
1286  */
1287 static void filter_string(const char *p, const char *l,
1288                           const struct subst *sb, unsigned qfilt, unsigned f)
1289 {
1290   size_t r, n;
1291   char *q; const char *pp, *ll;
1292
1293   if (!qfilt && !(f&SF_CASEMASK))
1294     /* Fast path: there's nothing to do: just write to the output. */
1295     dstr_putm(sb->d, p, l - p);
1296
1297   else for (;;) {
1298     /* We must be a bit more circumspect. */
1299
1300     /* Determine the length of the next span of characters which don't need
1301      * escaping.  (If QFILT is zero then this is everything.)
1302      */
1303     r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
1304     if (n > r) n = r;
1305
1306     if (!(f&SF_CASEMASK))
1307       /* No case conversion: we can just emit this chunk. */
1308
1309       dstr_putm(sb->d, p, n);
1310
1311     else {
1312       /* Case conversion to do.  Arrange enough space for the output, and
1313        * convert it character by character.
1314        */
1315
1316       dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
1317       if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
1318       else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
1319       sb->d->len += n;
1320     }
1321
1322     /* If we've reached the end then stop. */
1323     if (n >= r) break;
1324
1325     /* Otherwise we must have found a character which requires escaping.
1326      * Emit enough toothpicks.
1327      */
1328     dstr_putcn(sb->d, '\\', qfilt);
1329
1330     /* This character is now done, so we can skip over and see if there's
1331      * another chunk of stuff we can do at high speed.
1332      */
1333     dstr_putc(sb->d, p[n]); p += n + 1;
1334   }
1335 }
1336
1337 /* Scan and resolve a `[SECT:]VAR' specifier at P.
1338  *
1339  * Return the address of the next character following the specifier.  L is a
1340  * limit on the region of the buffer that we should process; SB is the
1341  * substitution state which provides the home section if none is given
1342  * explicitly; FILE and LINE are the source location to blame for problems.
1343  */
1344 static const char *retrieve_varspec(const char *p, const char *l,
1345                                     const struct subst *sb,
1346                                     struct config_var **var_out,
1347                                     const char *file, unsigned line)
1348 {
1349   struct config_section *sect = sb->home;
1350   const char *t;
1351
1352   t = scan_name("section or variable", p, l, file, line);
1353   if (t < l && *t == ':') {
1354     sect = config_find_section_n(sb->config, 0, p, t - p);
1355     p = t + 1; t = scan_name("variable", p, l, file, line);
1356   }
1357
1358   if (!sect) *var_out = 0;
1359   else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
1360   return (t);
1361 }
1362
1363 /* Substitute and/or word-split text.
1364  *
1365  * The input text starts at P, and continues to (just before) L.  Context for
1366  * the task is provided by SB; the source location to blame is FILE and LINE
1367  * (FILE may be null so that this can be passed directly from a `config_var'
1368  * without further checking); QFILT is the nesting depth in toothpick-
1369  * escaping; and F holds a mask of `SF_...' flags.
1370  */
1371 static const char *subst(const char *p, const char *l,
1372                          const struct subst *sb,
1373                          const char *file, unsigned line,
1374                          unsigned qfilt, unsigned f)
1375 {
1376   struct config_var *var;
1377   const char *q0, *q1, *t;
1378   unsigned subqfilt, ff;
1379   size_t n;
1380
1381   /* It would be best if we could process literal text at high speed.  To
1382    * this end, we have a table, indexed by the low-order bits of F, to tell
1383    * us which special characters we need to stop at.  This way, we can use
1384    * `strcspn' to skip over literal text and stop at the next character which
1385    * needs special handling.  Entries in this table with a null pointer
1386    * correspond to impossible flag settings.
1387    */
1388   static const char *const delimtab[] = {
1389
1390 #define ESCAPE "\\"                     /* always watch for `\'-escapes */
1391 #define SUBST "$"                       /* check for `$' if `SF_SUBST' set */
1392 #define WORDSEP " \f\r\n\t\v'\""        /* space, quotes if `SF_SPLIT' but
1393                                          * not `SF_QUOT' */
1394 #define QUOT "\""                       /* only quotes if `SF_SPLIT' and
1395                                          * `SF_QUOT' */
1396 #define DELIM "|}"                      /* end delimiters of `SF_SUBEXPR' */
1397
1398     ESCAPE,
1399     ESCAPE             WORDSEP,
1400     0,
1401     ESCAPE             QUOT,
1402     ESCAPE       SUBST,
1403     ESCAPE       SUBST WORDSEP,
1404     0,
1405     ESCAPE       SUBST QUOT,
1406     ESCAPE DELIM,
1407     ESCAPE DELIM       WORDSEP,
1408     0,
1409     ESCAPE DELIM       QUOT,
1410     ESCAPE DELIM SUBST,
1411     ESCAPE DELIM SUBST WORDSEP,
1412     0,
1413     ESCAPE DELIM SUBST QUOT
1414
1415 #undef COMMON
1416 #undef WORDSEP
1417 #undef SQUOT
1418 #undef DELIM
1419   };
1420
1421   /* Set FILE to be useful if it was null on entry. */
1422   if (!file) file = "<internal>";
1423
1424   /* If the text is literal then hand off to `filter_string'.  This obviously
1425    * starts a word.
1426    */
1427   if (f&SF_LITERAL) {
1428     filter_string(p, l, sb, qfilt, f);
1429     f |= SF_WORD;
1430     goto done;
1431   }
1432
1433   /* Chew through the input until it's all gone. */
1434   while (p < l) {
1435
1436     if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
1437       /* This is whitespace, we're supposed to split, and we're not within
1438        * quotes, so we should split here.
1439        */
1440
1441       /* If there's a word in progress then we should commit it. */
1442       if (f&SF_WORD) {
1443         if (!(f&SF_SKIP)) {
1444           argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1445           dstr_reset(sb->d);
1446         }
1447         f &= ~SF_WORD;
1448       }
1449
1450       /* Skip over further whitespace at high speed. */
1451       do p++; while (p < l && ISSPACE(*p));
1452
1453     } else if (*p == '\\') {
1454       /* This is a toothpick, so start a new word and add the next character
1455        * to it.
1456        */
1457
1458       /* If there's no next charact3er then we should be upset. */
1459       p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
1460
1461       if (!(f&SF_SKIP)) {
1462
1463         /* If this is a double quote or backslash then check DFLT to see if
1464          * it needs escaping.
1465          */
1466         if (qfilt && (*p == '"' || *p == '\\'))
1467           dstr_putcn(sb->d, '\\', qfilt);
1468
1469         /* Output the character. */
1470         if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
1471         else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
1472         else dstr_putc(sb->d, *p);
1473       }
1474
1475       /* Move past the escaped character.  Remember we started a word. */
1476       p++; f |= SF_WORD;
1477
1478     } else if ((f&SF_SPLIT) && *p == '"') {
1479       /* This is a double quote, and we're word splitting.  We're definitely
1480        * in a word now.  Toggle whether we're within quotes.
1481        */
1482
1483       f ^= SF_QUOT; f |= SF_WORD; p++;
1484
1485     } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
1486       /* This is a single quote, and we're word splitting but not within
1487        * double quotes.  Find the matching end quote, and just output
1488        * everything between literally.
1489        */
1490
1491       p++; t = strchr(p, '\'');
1492       if (!t || t >= l) lose("%s:%u: missing `''", file, line);
1493       if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
1494       p = t + 1; f |= SF_WORD;
1495
1496     } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
1497       /* This is an end delimiter, and we're supposed to stop here. */
1498       break;
1499
1500     } else if ((f&SF_SUBST) && *p == '$') {
1501       /* This is a `$' and we're supposed to do substitution. */
1502
1503       /* The kind of substitution is determined by the next character. */
1504       p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
1505
1506       /* Prepare flags for a recursive substitution.
1507        *
1508        * Hide our quote state from the recursive call.  If we're within a
1509        * word, then disable word-splitting.
1510        */
1511       ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));
1512
1513       /* Now dispatch based on the following character. */
1514       switch (*p) {
1515
1516         case '?':
1517           /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
1518
1519           /* Skip initial space. */
1520           p++; while (p < l && ISSPACE(*p)) p++;
1521
1522           /* Find the variable. */
1523           p = retrieve_varspec(p, l, sb, &var, file, line);
1524
1525           /* Skip whitespace again. */
1526           while (p < l && ISSPACE(*p)) p++;
1527
1528           /* Expect the opening `{'. */
1529           if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
1530           p++;
1531
1532           /* We'll process the parts recursively, but we need to come back
1533            * when we hit the appropriate delimiters, so arrange for that.
1534            */
1535           ff |= SF_SUBEXPR;
1536
1537           /* Process the consequent (skip if the variable wasn't found). */
1538           p = subst(p, l, sb, file, line, qfilt,
1539                     ff | (var ? 0 : SF_SKIP));
1540
1541           /* If there's a `|' then process the alternative too (skip if the
1542            * variable /was/ found).
1543            */
1544           if (p < l && *p == '|')
1545             p = subst(p + 1, l, sb, file, line, qfilt,
1546                       ff | (var ? SF_SKIP : 0));
1547
1548           /* We should now be past the closing `}'. */
1549           if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1550           p++;
1551           break;
1552
1553         case '{':
1554           /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
1555
1556           /* Skip initial whitespace. */
1557           p++; while (p < l && ISSPACE(*p)) p++;
1558
1559           /* Find the variable. */
1560           q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
1561
1562           /* Determine the filters to apply when substituting the variable
1563            * value.
1564            */
1565           subqfilt = qfilt;
1566           for (;;) {
1567
1568             /* Skip spaces again. */
1569             while (p < l && ISSPACE(*p)) p++;
1570
1571             /* If there's no `|' then there are no more filters, so stop. */
1572             if (p >= l || *p != '|') break;
1573
1574             /* Skip the `|' and more spaces. */
1575             p++; while (p < l && ISSPACE(*p)) p++;
1576
1577             /* Collect the filter name. */
1578             t = scan_name("filter", p, l, file, line);
1579
1580             /* Dispatch on the filter name. */
1581             if (t - p == 1 && *p == 'q')
1582               /* `q' -- quote for Lisp string.
1583                *
1584                * We're currently adding Q `\' characters before each naughty
1585                * character.  But a backslash itself is naughty too, so that
1586                * makes Q + 1 naughty characters, each of which needs a
1587                * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
1588                *
1589                * Calculate this here rather than at each point toothpicks
1590                * needs to be deployed.
1591                */
1592
1593               subqfilt = 2*subqfilt + 1;
1594
1595             else if (t - p == 1 && *p == 'l')
1596               /* `u' -- convert to uppercase.
1597                *
1598                * If a case conversion is already set, then that will override
1599                * whatever we do here, so don't bother.
1600                */
1601
1602               { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }
1603
1604             else if (t - p == 1 && *p == 'u')
1605               /* `u' -- convert to uppercase.
1606                *
1607                * If a case conversion is already set, then that will override
1608                * whatever we do here, so don't bother.
1609                */
1610               { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }
1611
1612             else
1613               /* Something else we didn't understand. */
1614               lose("%s:%u: unknown filter `%.*s'",
1615                    file, line, (int)(t - p), p);
1616
1617             /* Continue from after the filter name. */
1618             p = t;
1619           }
1620
1621           /* If we're not skipping, and we found a variable, then substitute
1622            * its value.  This is the point where we need to be careful about
1623            * recursive expansion.
1624            */
1625           if (!(f&SF_SKIP) && var) {
1626             if (var->f&CF_EXPAND)
1627               lose("%s:%u: recursive expansion of variable `%.*s'",
1628                    file, line, (int)(q1 - q0), q0);
1629             var->f |= CF_EXPAND;
1630             subst(var->val, var->val + var->n, sb,
1631                   var->file, var->line, subqfilt,
1632                   ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1633             var->f &= ~CF_EXPAND;
1634           }
1635
1636           /* If there's an alternative, then we need to process (or maybe
1637            * skip) it.  Otherwise, we should complain if there was no
1638            * veriable, and we're not skipping.
1639            */
1640           if (p < l && *p == '?')
1641             p = subst(p + 1, l, sb, file, line, subqfilt,
1642                       ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
1643           else if (!var && !(f&SF_SKIP))
1644             lose("%s:%u: unknown variable `%.*s'",
1645                  file, line, (int)(q1 - q0), q0);
1646
1647           /* Expect a `}' here.  (No need to skip spaces: we already did that
1648            * after scanning for filters, and either there was no alternative,
1649            * or we advanced to a delimiter character anyway.)
1650            */
1651           if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1652           p++;
1653           break;
1654
1655         default:
1656           /* Something else.  That's a shame. */
1657           lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
1658       }
1659
1660       /* Complain if we started out in word-splitting state, and therefore
1661        * have added a whole number of words to the output, but there's a
1662        * word-fragment stuck onto the end of this substitution.
1663        */
1664       if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
1665           !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
1666         lose("%s:%u: surprising word boundary "
1667              "after splicing substitution",
1668              file, line);
1669     }
1670
1671     else {
1672       /* Something else.  Try to skip over this at high speed.
1673        *
1674        * This makes use of the table we set up earlier.
1675        */
1676
1677       n = strcspn(p, delimtab[f&SF_SPANMASK]);
1678       if (n > l - p) n = l - p;
1679       if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
1680       p += n; f |= SF_WORD;
1681     }
1682   }
1683
1684 done:
1685   /* Sort out the wreckage. */
1686
1687   /* If we're still within quotes then something has gone wrong. */
1688   if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
1689
1690   /* If we're within a word, and should be splitting, then commit the word to
1691    * the output list.
1692    */
1693   if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
1694     argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1695     dstr_reset(sb->d);
1696   }
1697
1698   /* And, with that, we're done. */
1699   return (p);
1700 }
1701
1702 /* Expand substitutions in a string.
1703  *
1704  * Expand the null-terminated string P relative to the HOME section, using
1705  * configuration CONFIG, and appending the result to dynamic string D.  Blame
1706  * WHAT in any error messages.
1707  */
1708 void config_subst_string(struct config *config, struct config_section *home,
1709                          const char *what, const char *p, struct dstr *d)
1710 {
1711   struct subst sb;
1712
1713   sb.config = config; sb.home = home; sb.d = d;
1714   subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
1715   dstr_putz(d);
1716 }
1717
1718 /* Expand substitutions in a string.
1719  *
1720  * Expand the null-terminated string P relative to the HOME section, using
1721  * configuration CONFIG, returning the result as a freshly malloc(3)ed
1722  * string.  Blame WHAT in any error messages.
1723  */
1724 char *config_subst_string_alloc(struct config *config,
1725                                 struct config_section *home,
1726                                 const char *what, const char *p)
1727 {
1728   struct dstr d = DSTR_INIT;
1729   char *q;
1730
1731   config_subst_string(config, home, what, p, &d);
1732   q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1733 }
1734
1735 /* Expand substitutions in a variable.
1736  *
1737  * Expand the value of the variable VAR relative to the HOME section, using
1738  * configuration CONFIG, appending the result to dynamic string D.
1739  */
1740 void config_subst_var(struct config *config, struct config_section *home,
1741                       struct config_var *var, struct dstr *d)
1742 {
1743   struct subst sb;
1744
1745   sb.config = config; sb.home = home; sb.d = d;
1746   var->f |= CF_EXPAND;
1747   subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1748         SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1749   var->f &= ~CF_EXPAND;
1750   dstr_putz(d);
1751 }
1752
1753 /* Expand substitutions in a variable.
1754  *
1755  * Expand the value of the variable VAR relative to the HOME section, using
1756  * configuration CONFIG, returning the result as a freshly malloc(3)ed
1757  * string.
1758  */
1759 char *config_subst_var_alloc(struct config *config,
1760                              struct config_section *home,
1761                              struct config_var *var)
1762 {
1763   struct dstr d = DSTR_INIT;
1764   char *q;
1765
1766   config_subst_var(config, home, var, &d);
1767   q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1768 }
1769
1770 /* Expand substitutions in a variable and split into words.
1771  *
1772  * Expand and word-split the value of the variable VAR relative to the HOME
1773  * section, using configuration CONFIG, appending the resulting words into
1774  * the vector AV.
1775  */
1776 void config_subst_split_var(struct config *config,
1777                             struct config_section *home,
1778                             struct config_var *var, struct argv *av)
1779 {
1780   struct dstr d = DSTR_INIT;
1781   struct subst sb;
1782
1783   sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
1784   var->f |= CF_EXPAND;
1785   subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1786         SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1787   var->f &= ~CF_EXPAND;
1788   dstr_release(&d);
1789 }
1790
1791 /*----- That's all, folks -------------------------------------------------*/