chiark - git - mdw - runlisp/blob - lib.c

   1 /* -*-c-*-
   2  *
   3  * Common definitions for `runlisp'
   4  *
   5  * (c) 2020 Mark Wooding
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
  11  *
  12  * Runlisp is free software: you can redistribute it and/or modify it
  13  * under the terms of the GNU General Public License as published by the
  14  * Free Software Foundation; either version 3 of the License, or (at your
  15  * option) any later version.
  16  *
  17  * Runlisp is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  20  * for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with Runlisp.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25
  26 /*----- Header files ------------------------------------------------------*/
  27
  28 #include "config.h"
  29
  30 #include <assert.h>
  31
  32 #include <ctype.h>
  33 #include <errno.h>
  34 #include <stdarg.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38
  39 #include <unistd.h>
  40
  41 #include "lib.h"
  42
  43 /*----- Diagnostic utilities ----------------------------------------------*/
  44
  45 const char *progname = "???";
  46         /* Our program name, for use in error messages. */
  47
  48 /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
  49 void set_progname(const char *prog)
  50 {
  51   const char *p;
  52
  53   p = strrchr(prog, '/');
  54   progname = p ? p + 1 : prog;
  55 }
  56
  57 /* Report an error or warning in Unix style, given a captured argument
  58  * cursor.
  59  */
  60 void vmoan(const char *msg, va_list ap)
  61 {
  62   fprintf(stderr, "%s: ", progname);
  63   vfprintf(stderr, msg, ap);
  64   fputc('\n', stderr);
  65 }
  66
  67 /* Issue a warning message. */
  68 void moan(const char *msg, ...)
  69   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
  70
  71 /* Issue a fatal error message and exit unsuccessfully. */
  72 void lose(const char *msg, ...)
  73   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
  74
  75 /*----- Memory allocation -------------------------------------------------*/
  76
  77 /* Allocate and return a pointer to N bytes, or report a fatal error.
  78  *
  79  * Release the pointer using `free' as usual.  If N is zero, returns null
  80  * (but you are not expected to check for this).
  81  */
  82 void *xmalloc(size_t n)
  83 {
  84   void *p;
  85
  86   if (!n) return (0);
  87   p = malloc(n); if (!p) lose("failed to allocate memory");
  88   return (p);
  89 }
  90
  91 /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
  92  *
  93  * The block might (and probably will) move, so it returns the new address.
  94  * If N is zero, then the block is freed (if necessary) and a null pointer
  95  * returned; otherwise, if P is null then a fresh block is allocated.  If
  96  * allocation fails, then a fatal error is reported.
  97  */
  98 void *xrealloc(void *p, size_t n)
  99 {
 100   if (!n) { free(p); return (0); }
 101   else if (!p) return (xmalloc(n));
 102   p = realloc(p, n); if (!p) lose("failed to allocate memory");
 103   return (p);
 104 }
 105
 106 /* Allocate and return a copy of the N-byte string starting at P.
 107  *
 108  * The new string is null-terminated, though P need not be.  If allocation
 109  * fails, then a fatal error is reported.
 110  */
 111 char *xstrndup(const char *p, size_t n)
 112 {
 113   char *q = xmalloc(n + 1);
 114
 115   memcpy(q, p, n); q[n] = 0;
 116   return (q);
 117 }
 118
 119 /* Allocate and return a copy of the null-terminated string starting at P.
 120  *
 121  * If allocation fails, then a fatal error is reported.
 122  */
 123 char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }
 124
 125 /*----- Dynamic strings ---------------------------------------------------*/
 126
 127 /* Initialize the string D.
 128  *
 129  * Usually you'd use the static initializer `DSTR_INIT'.
 130  */
 131 void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
 132
 133 /* Reset string D so it's empty again. */
 134 void dstr_reset(struct dstr *d) { d->len = 0; }
 135
 136 /* Ensure that D has at least N unused bytes available. */
 137 void dstr_ensure(struct dstr *d, size_t n)
 138 {
 139   size_t need = d->len + n, newsz;
 140
 141   if (need <= d->sz) return;
 142   newsz = d->sz ? 2*d->sz : 16;
 143   while (newsz < need) newsz *= 2;
 144   d->p = xrealloc(d->p, newsz); d->sz = newsz;
 145 }
 146
 147 /* Release the memory held by D.
 148  *
 149  * It must be reinitialized (e.g., by `dstr_init') before it can be used
 150  * again.
 151  */
 152 void dstr_release(struct dstr *d) { free(d->p); }
 153
 154 /* Append the N-byte string at P to D.
 155  *
 156  * P need not be null-terminated.  D will not be null-terminated
 157  * afterwards.
 158  */
 159 void dstr_putm(struct dstr *d, const void *p, size_t n)
 160   { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
 161
 162 /* Append the null-terminated string P to D.
 163  *
 164  * D /is/ guaranteed to be null-terminated after this.
 165  */
 166 void dstr_puts(struct dstr *d, const char *p)
 167 {
 168   size_t n = strlen(p);
 169
 170   dstr_ensure(d, n + 1);
 171   memcpy(d->p + d->len, p, n + 1);
 172   d->len += n;
 173 }
 174
 175 /* Append the single character CH to D.
 176  *
 177  * D will not be null-terminated afterwards.
 178  */
 179 void dstr_putc(struct dstr *d, int ch)
 180   { dstr_ensure(d, 1); d->p[d->len++] = ch; }
 181
 182 /* Append N copies of the character CH to D.
 183  *
 184  * D will not be null-terminated afterwards.
 185  */
 186 void dstr_putcn(struct dstr *d, int ch, size_t n)
 187   { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
 188
 189 /* Null-terminate the string D.
 190  *
 191  * This doesn't change the length of D.  If further stuff is appended then
 192  * the null terminator will be overwritten.
 193  */
 194 void dstr_putz(struct dstr *d)
 195   { dstr_ensure(d, 1); d->p[d->len] = 0; }
 196
 197 /* Append stuff to D, determined by printf(3) format string P and argument
 198  * tail AP.
 199  *
 200  * D will not be null-terminated afterwards.
 201  */
 202 void dstr_vputf(struct dstr *d, const char *p, va_list ap)
 203 {
 204   va_list ap2;
 205   size_t r;
 206   int n;
 207
 208   r = d->sz - d->len;
 209   va_copy(ap2, ap);
 210   n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
 211   va_end(ap2);
 212   if (n >= r) {
 213     dstr_ensure(d, n + 1); r = d->sz - d->len;
 214     n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
 215   }
 216   d->len += n;
 217 }
 218
 219 /* Append stuff to D, determined by printf(3) format string P and arguments.
 220  *
 221  * D will not be null-terminated afterwards.
 222  */
 223 PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
 224   { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
 225
 226 /* Append the next input line from FP to D.
 227  *
 228  * Return 0 on success, or -1 if reading immediately fails or encounters
 229  * end-of-file (call ferror(3) to distinguish).  Any trailing newline is
 230  * discarded: it is not possible to determine whether the last line was ended
 231  * with a newline.  D is guaranteed to be null-terminated afterwards.
 232  */
 233 int dstr_readline(struct dstr *d, FILE *fp)
 234 {
 235   char *p = d->p + d->len, *lim = d->p + d->sz;
 236   size_t n;
 237   int ch;
 238
 239   ch = getc(fp); if (ch == EOF) return (-1);
 240   for (;;) {
 241     if (p == lim) {
 242       n = d->len = p - d->p; dstr_ensure(d, 1);
 243       p = d->p + n; lim = d->p + d->sz;
 244     }
 245     if (ch == EOF || ch == '\n') break;
 246     *p++ = ch; ch = getc(fp);
 247   }
 248   d->len = p - d->p; *p++ = 0;
 249   return (0);
 250 }
 251
 252 /*----- Dynamic vectors of strings ----------------------------------------*/
 253
 254 /* Initialize the vector AV.
 255  *
 256  * Usually you'd use the static initializer `ARGV_INIT'.
 257  */
 258 void argv_init(struct argv *av)
 259   { av->v = 0; av->o = av->n = av->sz = 0; }
 260
 261 /* Reset the vector AV so that it's empty again. */
 262 void argv_reset(struct argv *av) { av->n = 0; }
 263
 264 /* Ensure that AV has at least N unused slots at the end. */
 265 void argv_ensure(struct argv *av, size_t n)
 266 {
 267   size_t need = av->n + av->o + n, newsz;
 268
 269   if (need <= av->sz) return;
 270   newsz = av->sz ? 2*av->sz : 8;
 271   while (newsz < need) newsz *= 2;
 272   av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o;
 273   av->sz = newsz;
 274 }
 275
 276 /* Ensure that AV has at least N unused slots at the /start/. */
 277 void argv_ensure_offset(struct argv *av, size_t n)
 278 {
 279   size_t newoff;
 280
 281   /* Stupid version.  We won't, in practice, be prepending lots of stuff, so
 282    * avoid the extra bookkeeping involved in trying to make a double-ended
 283    * extendable array asymptotically efficient.
 284    */
 285   if (av->o >= n) return;
 286   newoff = 16;
 287   while (newoff < n) newoff *= 2;
 288   argv_ensure(av, newoff - av->o);
 289   memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
 290   av->v += newoff - av->o; av->o = newoff;
 291 }
 292
 293 /* Release the memory held by AV.
 294  *
 295  * It must be reinitialized (e.g., by `argv_init') before it can be used
 296  * again.
 297  */
 298 void argv_release(struct argv *av) { free(av->v - av->o); }
 299
 300 /* Append the pointer P to AV. */
 301 void argv_append(struct argv *av, char *p)
 302   { argv_ensure(av, 1); av->v[av->n++] = p; }
 303
 304 /* Append a null pointer to AV, without extending the vactor length.
 305  *
 306  * The null pointer will be overwritten when the next string is appended.
 307  */
 308 void argv_appendz(struct argv *av)
 309   { argv_ensure(av, 1); av->v[av->n] = 0; }
 310
 311 /* Append a N-element vector V of pointers to AV. */
 312 void argv_appendn(struct argv *av, char *const *v, size_t n)
 313 {
 314   argv_ensure(av, n);
 315   memcpy(av->v + av->n, v, n*sizeof(const char *));
 316   av->n += n;
 317 }
 318
 319 /* Append the variable-length vector BV to AV. */
 320 void argv_appendav(struct argv *av, const struct argv *bv)
 321   { argv_appendn(av, bv->v, bv->n); }
 322
 323 /* Append the pointers from a variable-length argument list AP to AV.
 324  *
 325  * The list is terminated by a null pointer.
 326  */
 327 void argv_appendv(struct argv *av, va_list ap)
 328 {
 329   char *p;
 330   for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
 331 }
 332
 333 /* Append the argument pointers, terminated by a null pointer, to AV. */
 334 void argv_appendl(struct argv *av, ...)
 335   { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
 336
 337 /* Prepend the pointer P to AV. */
 338 void argv_prepend(struct argv *av, char *p)
 339   { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
 340
 341 /* Prepend a N-element vector V of pointers to AV. */
 342 void argv_prependn(struct argv *av, char *const *v, size_t n)
 343 {
 344   argv_ensure_offset(av, n);
 345   av->o -= n; av->v -= n; av->n += n;
 346   memcpy(av->v, v, n*sizeof(const char *));
 347 }
 348
 349 /* Prepend the variable-length vector BV to AV. */
 350 void argv_prependav(struct argv *av, const struct argv *bv)
 351   { argv_prependn(av, bv->v, bv->n); }
 352
 353 /* Prepend the pointers from a variable-length argument list AP to AV.
 354  *
 355  * The list is terminated by a null pointer.
 356  */
 357 void argv_prependv(struct argv *av, va_list ap)
 358 {
 359   char *p, **v;
 360   size_t n = 0;
 361
 362   for (;;) {
 363     p = va_arg(ap, char *); if (!p) break;
 364     argv_prepend(av, p); n++;
 365   }
 366   v = av->v;
 367   while (n >= 2) {
 368     p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
 369     v++; n -= 2;
 370   }
 371 }
 372
 373 /* Prepend the argument pointers, terminated by a null pointer, to AV. */
 374 void argv_prependl(struct argv *av, ...)
 375   { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
 376
 377 /*----- Treaps ------------------------------------------------------------*/
 378
 379 /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
 380  * string B in a lexicographic ordering.
 381  *
 382  * All comparison of keys is handled by this function.
 383  */
 384 static int str_lt(const char *a, size_t an, const char *b, size_t bn)
 385 {
 386   /* This is a little subtle.  We need only compare the first N bytes of the
 387    * strings, where N is the length of the shorter string.  If this
 388    * distinguishes the two strings, then we're clearly done.  Otherwise, if
 389    * the prefixes are equal then the shorter string is the smaller one.  If
 390    * the two strings are the same length, then they're equal.
 391    *
 392    * Hence, if A is the strictly shorter string, then A precedes B if A
 393    * precedes or matches the prefix of B; otherwise A only precedes B if A
 394    * strictly precedes the prefix of B.
 395    */
 396   if (an < bn) return (MEMCMP(a, <=, b, an));
 397   else return (MEMCMP(a, <, b, bn));
 398 }
 399
 400 /* Initialize the treap T.
 401  *
 402  * Usually you'd use the static initializer `TREAP_INIT'.
 403  */
 404 void treap_init(struct treap *t) { t->root = 0; }
 405
 406 /* Look up the KN-byte key K in the treap T.
 407  *
 408  * Return a pointer to the matching node if one was found, or null otherwise.
 409  */
 410 void *treap_lookup(const struct treap *t, const char *k, size_t kn)
 411 {
 412   struct treap_node *n = t->root, *candidate = 0;
 413
 414   /* This is a simple prototype for some of the search loops we'll encounter
 415    * later.  Notice that we use a strict one-sided comparison, rather than
 416    * the more conventional two-sided comparison.
 417    *
 418    * The main loop will find the largest key not greater than K.
 419    */
 420   while (n)
 421     /* Compare the node's key against our key.  If the node is too large,
 422      * then we ignore it and move left.  Otherwise remember this node for
 423      * later, and move right to see if we can find a better, larger node.
 424      */
 425
 426     if (str_lt(k, kn, n->k, n->kn)) n = n->left;
 427     else { candidate = n; n = n->right; }
 428
 429   /* If the candidate node is less than our key then we failed.  Otherwise,
 430    * by trichotomy, we have found the correct node.
 431    */
 432   if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
 433   return (candidate);
 434 }
 435
 436 /* Look up the KN-byte K in the treap T, recording a path in P.
 437  *
 438  * This is similar to `treap_lookup', in that it returns the requested node
 439  * if it already exists, or null otherwise, but it also records in P
 440  * information to be used by `treap_insert' to insert a new node with the
 441  * given key if it's not there already.
 442  */
 443 void *treap_probe(struct treap *t, const char *k, size_t kn,
 444                   struct treap_path *p)
 445 {
 446   struct treap_node **nn = &t->root, *candidate = 0;
 447   unsigned i = 0;
 448
 449   /* This walk is similar to `treap_lookup' above, except that we also record
 450    * the address of each node pointer we visit along the way.
 451    */
 452   for (;;) {
 453     assert(i < TREAP_PATHMAX); p->path[i++] = nn;
 454     if (!*nn) break;
 455     if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
 456     else { candidate = *nn; nn = &(*nn)->right; }
 457   }
 458   p->nsteps = i;
 459
 460   /* Check to see whether we found the right node. */
 461   if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
 462   return (candidate);
 463 }
 464
 465 /* Insert a new node N into T, associating it with the KN-byte key K.
 466  *
 467  * Use the path data P, from `treap_probe', to help with insertion.
 468  */
 469 void treap_insert(struct treap *t, const struct treap_path *p,
 470                   struct treap_node *n, const char *k, size_t kn)
 471 {
 472   size_t i = p->nsteps;
 473   struct treap_node **nn, **uu, *u;
 474   unsigned wt;
 475
 476   /* Fill in the node structure. */
 477   n->k = xstrndup(k, kn); n->kn = kn;
 478   n->wt = wt = rand(); n->left = n->right = 0;
 479
 480   /* Prepare for the insertion.
 481    *
 482    * The path actually points to each of the links traversed when searching
 483    * for the node, starting with the `root' pointer, then the `left' or
 484    * `right' pointer of the root node, and so on; `nsteps' will always be
 485    * nonzero, since the path will always pass through the root, and the final
 486    * step, `path->path[path->nsteps - 1]' will always be the address of a
 487    * null pointer onto which the freshly inserted node could be hooked in
 488    * order to satisfy the binary-search-tree ordering.  (Of course, this will
 489    * likely /not/ satisfy the heap condition, so more work needs to be done.)
 490    *
 491    * Throughout, NN is our current candidate for where to attach the node N.
 492    * As the loop progresses, NN will ascend to links further up the tree, and
 493    * N will be adjusted to accumulate pieces of the existing tree structure.
 494    * We'll stop when we find that the parent node's weight is larger than our
 495    * new node's weight, at which point we can just set *NN = N; or if we run
 496    * out of steps in the path, in which case *NN is the root pointer.
 497    */
 498   assert(i); nn = p->path[--i];
 499   while (i--) {
 500
 501     /* Collect the next step in the path, and get the pointer to the node. */
 502     uu = p->path[i]; u = *uu;
 503
 504     /* If this node's weight is higher, then we've found the right level and
 505      * we can stop.
 506      */
 507     if (wt <= u->wt) break;
 508
 509     /* The node U is lighter than our new node N, so we must rotate in order
 510      * to fix things.  If we were currently planning to hook N as the left
 511      * subtree of U, then we rotate like this:
 512      *
 513      *                  |                   |
 514      *                  U                  (N)
 515      *                /   \               /   \
 516      *             (N)      Z   --->    X       U
 517      *            /   \                       /   \
 518      *          X       Y                   Y       Z
 519      *
 520      * On the other hand, if we were planning to hook N as the right subtree
 521      * of U, then we do the opposite rotation:
 522      *
 523      *              |                           |
 524      *              U                          (N)
 525      *            /   \                       /   \
 526      *          X      (N)      --->        U       Z
 527      *                /   \               /   \
 528      *              Y       Z           X       Y
 529      *
 530      * These transformations clearly preserve the ordering of nodes in the
 531      * binary search tree, and satisfy the heap condition in the subtree
 532      * headed by N.
 533      */
 534     if (nn == &u->left) { u->left = n->right; n->right = u; }
 535     else { u->right = n->left; n->left = u; }
 536
 537     /* And this arrangement must be attached to UU, or some higher attachment
 538      * point.  The subtree satisfies the heap condition, and can be attached
 539      * safely at the selected place.
 540      */
 541     nn = uu;
 542   }
 543
 544   /* We've found the right spot.  Hook the accumulated subtree into place. */
 545   *nn = n;
 546 }
 547
 548 /* Remove the node with the KN-byte K from T.
 549  *
 550  * Return the address of the node we removed, or null if it couldn't be
 551  * found.
 552  */
 553 void *treap_remove(struct treap *t, const char *k, size_t kn)
 554 {
 555   struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;
 556
 557   /* Search for the matching node, but keep track of the address of the link
 558    * which points to our target node.
 559    */
 560   while (*nn)
 561     if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
 562     else { candidate = nn; nn = &(*nn)->right; }
 563
 564   /* If this isn't the right node then give up. */
 565   if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
 566     return (0);
 567
 568   /* Now we need to disentangle the node from the tree.  This is essentially
 569    * the reverse of insertion: we pretend that this node is suddenly very
 570    * light, and mutate the tree so as to restore the heap condition until
 571    * eventually our node is a leaf and can be cut off without trouble.
 572    *
 573    * Throughout, the link *NN notionally points to N, but we don't actually
 574    * update it until we're certain what value it should finally take.
 575    */
 576   nn = candidate; n = *nn; l = n->left; r = n->right;
 577   for (;;)
 578
 579     /* If its left subtree is empty then we can replace our node by its right
 580      * subtree and be done.  Similarly, if the right subtree is empty then we
 581      * replace the node by its left subtree.
 582      *
 583      *              |           |               |               |
 584      *             (N)  --->    R ;            (N)      --->    L
 585      *            /   \                       /   \
 586      *          *       R                   L       *
 587      */
 588     if (!l) { *nn = r; break; }
 589     else if (!r) { *nn = l; break; }
 590
 591     /* Otherwise we need to rotate the pointers so that the heavier of the
 592      * two children takes the place of our node; thus we have either
 593      *
 594      *                  |                   |
 595      *                 (N)                  L
 596      *                /   \               /   \
 597      *              L       R   --->    X      (N)
 598      *            /   \                       /   \
 599      *          X       Y                   Y       R
 600      *
 601      * or
 602      *
 603      *              |                           |
 604      *             (N)                          R
 605      *            /   \                       /   \
 606      *          L       R       --->       (N)      Y
 607      *                /   \               /   \
 608      *              X       Y           L       X
 609      *
 610      * Again, these transformations clearly preserve the ordering of nodes in
 611      * the binary search tree, and the heap condition.
 612      */
 613     else if (l->wt > r->wt)
 614       { *nn = l; nn = &l->right; l = n->left = l->right; }
 615     else
 616       { *nn = r; nn = &r->left; r = n->right = r->left; }
 617
 618   /* Release the key buffer, and return the node that we've now detached. */
 619   free(n->k); return (n);
 620 }
 621
 622 /* Initialize an iterator I over T's nodes. */
 623 void treap_start_iter(struct treap *t, struct treap_iter *i)
 624 {
 625   struct treap_node *n = t->root;
 626   unsigned sp = 0;
 627
 628   /* The `stack' in the iterator structure is an empty ascending stack of
 629    * nodes which have been encountered, and their left subtrees investigated,
 630    * but not yet visited by the iteration.
 631    *
 632    * Iteration begins by stacking the root node, its left child, and so on,
 633    * At the end of this, the topmost entry on the stack is the least node of
 634    * the tree, followed by its parent, grandparent, and so on up to the root.
 635    */
 636   while (n) {
 637     assert(sp < TREAP_PATHMAX);
 638     i->stack[sp++] = n; n = n->left;
 639   }
 640   i->sp = sp;
 641 }
 642
 643 /* Return the next node from I, in ascending order by key.
 644  *
 645  * If there are no more nodes, then return null.
 646  */
 647 void *treap_next(struct treap_iter *i)
 648 {
 649   struct treap_node *n, *o;
 650   unsigned sp = i->sp;
 651
 652   /* We say that a node is /visited/ once it's been returned by this
 653    * iterator.  To traverse a tree in order, then, we traverse its left
 654    * subtree, visit the tree root, and traverse its right subtree -- which is
 655    * a fine recursive definition, but we need a nonrecursive implementation.
 656    *
 657    * As is usual in this kind of essential structural recursion, we maintain
 658    * a stack.  The invariant that we'll maintain is as follows.
 659    *
 660    *   1. If the stack is empty, then all nodes have been visited.
 661    *
 662    *   2, If the stack is nonempty then the topmost entry on the stack is the
 663    *      least node which has not yet been visited -- and therefore is the
 664    *      next node to visit.
 665    *
 666    *   3. The earlier entries in the stack are, in (top to bottom) order,
 667    *      those of the topmost node's parent, grandparent, etc., up to the
 668    *      root, which have not yet been visited.  More specifically, a node
 669    *      appears in the stack if and only if some node in its left subtree
 670    *      is nearer the top of the stack.
 671    *
 672    * When we initialized the iterator state (in `treap_start_iter' above), we
 673    * traced a path to the leftmost leaf, stacking the root, its left-hand
 674    * child, and so on.  The leftmost leaf is clearly the first node to be
 675    * visited, and its entire ancestry is on the stack since none of these
 676    * nodes has yet been visited.  (If the tree is empty, then we have done
 677    * nothing, the stack is empty, and there are no nodes to visit.)  This
 678    * establishes the base case for the induction.
 679    */
 680
 681   /* So, if the stack is empty now, then (1) all of the nodes have been
 682    * visited and there's nothing left to do.  Return null.
 683    */
 684   if (!sp) return (0);
 685
 686   /* It's clear that, if we pop the topmost element of the stack, visit it,
 687    * and arrange to reestablish the invariant, then we'll visit the nodes in
 688    * the correct order, pretty much by definition.
 689    *
 690    * So, pop a node off the stack.  This is the node we shall return.  But
 691    * before we can do that, we must reestablish the above invariant.
 692    * Firstly, the current node is removed from the stack, because we're about
 693    * to visit it, and visited nodes don't belong on the stack.  Then there
 694    * are two cases to consider.
 695    *
 696    *   * If the current node's right subtree is not empty, then the next node
 697    *     to be visited is the leftmost node in that subtree.  All of the
 698    *     nodes on the stack are ancestors of the current node, and the right
 699    *     subtree consists of its descendants, so none of them are already on
 700    *     the stack; and they're all greater than the current node, and
 701    *     therefore haven't been visited.  Therefore, we must push the current
 702    *     node's right child, its /left/ child, and so on, proceeding
 703    *     leftwards until we fall off the bottom of the tree.
 704    *
 705    *   * Otherwise, we've finished traversing some subtree.  Either we are
 706    *     now done, or (3) we have just finished traversing the left subtree
 707    *     of the next topmost item on the stack.  This must therefore be the
 708    *     next node to visit.  The rest of the stack is already correct.
 709    */
 710   n = i->stack[--sp];
 711   o = n->right;
 712   while (o) {
 713     assert(sp < TREAP_PATHMAX);
 714     i->stack[sp++] = o; o = o->left;
 715   }
 716   i->sp = sp;
 717   return (n);
 718 }
 719
 720 /* Recursively check the subtree headed by N.
 721  *
 722  * No node should have weight greater than MAXWT, to satisfy the heap
 723  * condition; if LO is not null, then all node keys should be strictly
 724  * greater than LO, and, similarly, if HI is not null, then all keys should
 725  * be strictly smaller than HI.
 726  */
 727 static void check_subtree(struct treap_node *n, unsigned maxwt,
 728                           const char *klo, const char *khi)
 729 {
 730   /* Check the heap condition. */
 731   assert(n->wt <= maxwt);
 732
 733   /* Check that the key is in bounds.  (Use `strcmp' here to ensure that our
 734    * own `str_lt' is working correctly.)
 735    */
 736   if (klo) assert(STRCMP(n->k, >, klo));
 737   if (khi) assert(STRCMP(n->k, <, khi));
 738
 739   /* Check the left subtree.  Node weights must be bounded above by our own
 740    * weight.  And every key in the left subtree must be smaller than our
 741    * current key.  We propagate the lower bound.
 742    */
 743   if (n->left) check_subtree(n->left, n->wt, klo, n->k);
 744
 745   /* Finally, check the right subtree.  This time, every key must be larger
 746    * than our key, and we propagate the upper bound.
 747    */
 748   if (n->right) check_subtree(n->right, n->wt, n->k, khi);
 749 }
 750
 751 /* Check the treap structure rules for T. */
 752 void treap_check(struct treap *t)
 753   { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
 754
 755 /* Recursively dump the subtree headed by N, indenting the output lines by
 756  * IND spaces.
 757  */
 758 static void dump_node(struct treap_node *n, int ind)
 759 {
 760   if (n->left) dump_node(n->left, ind + 1);
 761   printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k);
 762   if (n->right) dump_node(n->right, ind + 1);
 763 }
 764
 765 /* Dump the treap T to standard output, for debugging purposes. */
 766 void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
 767
 768 /*----- Configuration file parsing ----------------------------------------*/
 769
 770 #ifndef DECL_ENVIRON
 771   extern char **environ;
 772 #endif
 773
 774 /* Advance P past a syntactically valid name, but no further than L.
 775  *
 776  * Return the new pointer.  If no name is found, report an error, blaming
 777  * FILE and LINE; WHAT is an adjective for the kind of name that was
 778  * expected.
 779  */
 780 static const char *scan_name(const char *what,
 781                              const char *p, const char *l,
 782                              const char *file, unsigned line)
 783 {
 784   const char *q = p;
 785
 786   while (q < l &&
 787          (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
 788                          *q == '*' || *q == '+' || *q == '%' || *q == '@'))
 789     q++;
 790   if (q == p) lose("%s:%u: expected %s name", file, line, what);
 791   return (q);
 792 }
 793
 794 /* Initialize the configuration state CONF.
 795  *
 796  * Usually you'd use the static initializer `CONFIG_INIT'.
 797  */
 798 void config_init(struct config *conf)
 799   { treap_init(&conf->sections); }
 800
 801 /* Find and return the section with null-terminated NAME in CONF.
 802  *
 803  * If no section is found, the behaviour depends on whether `CF_CREAT' is set
 804  * in F: if so, an empty section is created and returned; otherwise, a null
 805  * pointer is returned.
 806  */
 807 struct config_section *config_find_section(struct config *conf, unsigned f,
 808                                            const char *name)
 809   { return (config_find_section_n(conf, f, name, strlen(name))); }
 810
 811 /* Find and return the section with the SZ-byte NAME in CONF.
 812  *
 813  * This works like `config_find_section', but with an explicit length for the
 814  * NAME rather than null-termination.
 815  */
 816 struct config_section *config_find_section_n(struct config *conf, unsigned f,
 817                                              const char *name, size_t sz)
 818 {
 819   struct config_section *sect;
 820   struct treap_path path;
 821
 822   if (!(f&CF_CREAT))
 823     sect = treap_lookup(&conf->sections, name, sz);
 824   else {
 825     sect = treap_probe(&conf->sections, name, sz, &path);
 826     if (!sect) {
 827       sect = xmalloc(sizeof(*sect));
 828       if (!conf->head) conf->tail = &conf->head;
 829       sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
 830       sect->parents = 0; sect->nparents = SIZE_MAX;
 831       treap_init(&sect->vars); treap_init(&sect->cache);
 832       treap_insert(&conf->sections, &path, &sect->_node, name, sz);
 833       config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
 834     }
 835   }
 836   return (sect);
 837 }
 838
 839 /* Set the fallback section for CONF to be SECT.
 840  *
 841  * That is, if a section has no explicit parents, then by default it will
 842  * have a single parent which is SECT.  If SECT is null then there is no
 843  * fallback section, and sections which don't have explicitly specified
 844  * parents have no parents at all.  (This is the default situation.)
 845  */
 846 void config_set_fallback(struct config *conf, struct config_section *sect)
 847   { conf->fallback = sect; }
 848
 849 /* Arrange that SECT has PARENT as its single parent section.
 850  *
 851  * If PARENT is null, then arrange that SECT has no parents at all.  In
 852  * either case, any `@parents' setting will be ignored.
 853  */
 854 void config_set_parent(struct config_section *sect,
 855                        struct config_section *parent)
 856 {
 857   if (!parent)
 858     sect->nparents = 0;
 859   else {
 860     sect->parents = xmalloc(sizeof(*sect->parents));
 861     sect->parents[0] = parent; sect->nparents = 1;
 862   }
 863 }
 864
 865 /* Initialize I to iterate over the sections defined in CONF. */
 866 void config_start_section_iter(struct config *conf,
 867                                struct config_section_iter *i)
 868   { i->sect = conf->head; }
 869
 870 /* Return the next section from I, in order of creation.
 871  *
 872  * If there are no more sections, then return null.
 873  */
 874 struct config_section *config_next_section(struct config_section_iter *i)
 875 {
 876   struct config_section *sect;
 877
 878   sect = i->sect;
 879   if (sect) i->sect = sect->next;
 880   return (sect);
 881 }
 882
 883 /* Initialize the `parents' links of SECT, if they aren't set up already.
 884  *
 885  * If SECT contains a `@parents' setting then parse it to determine the
 886  * parents; otherwise use CONF's fallbeck section, as established by
 887  * `config_set_fallback'.
 888  */
 889 static void set_config_section_parents(struct config *conf,
 890                                        struct config_section *sect)
 891 {
 892   struct config_section *parent;
 893   struct config_var *var;
 894   const char *file; unsigned line;
 895   size_t i, n;
 896   char *p, *q, *l;
 897   struct argv av = ARGV_INIT;
 898
 899   /* If the section already has parents established then there's nothing to
 900    * do.
 901    */
 902   if (sect->nparents != SIZE_MAX) return;
 903
 904   /* Look up `@parents', without recursion! */
 905   var = treap_lookup(&sect->vars, "@parents", 8);
 906   if (!var) {
 907     /* No explicit setting: use the fallback setting. */
 908
 909     if (!conf->fallback || conf->fallback == sect)
 910       sect->nparents = 0;
 911     else {
 912       sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
 913       sect->parents[0] = conf->fallback;
 914     }
 915   } else {
 916     /* Found a `@parents' list: parse it and set the parents list. */
 917
 918     file = var->file; line = var->line; if (!file) file = "<internal>";
 919
 920     /* We do this in two phases.  First, we parse out the section names, and
 921      * record start/limit pointer pairs in `av'.
 922      */
 923     p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
 924     while (*p) {
 925       q = p;
 926       p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
 927       argv_append(&av, q); argv_append(&av, p);
 928       while (p < l && ISSPACE(*p)) p++;
 929       if (p >= l) break;
 930       if (*p == ',') do p++; while (ISSPACE(*p));
 931     }
 932
 933     /* Now that we've finished parsing, we know how many parents we're going
 934      * to have, so we can allocate the `parents' vector and fill it in.
 935      */
 936     sect->nparents = av.n/2;
 937     sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents));
 938     for (i = 0; i < av.n; i += 2) {
 939       n = av.v[i + 1] - av.v[i];
 940       parent = config_find_section_n(conf, 0, av.v[i], n);
 941       if (!parent)
 942         lose("%s:%u: unknown parent section `%.*s'",
 943              file, line, (int)n, av.v[i]);
 944       sect->parents[i/2] = parent;
 945     }
 946   }
 947
 948   /* All done. */
 949   argv_release(&av);
 950 }
 951
 952 /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
 953  *
 954  * If successful, return a pointer to the variable; otherwise return null.
 955  * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
 956  * errors.
 957  */
 958 static struct config_var *search_recursive(struct config *conf,
 959                                            struct config_section *sect,
 960                                            const char *name, size_t sz)
 961 {
 962   struct config_cache_entry *cache;
 963   struct treap_path path;
 964   struct config_var *var, *v;
 965   size_t i, j = j;
 966
 967   /* If the variable is defined locally then we can just return it. */
 968   var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
 969
 970   /* If we have no parents then there's no way we can find it. */
 971   set_config_section_parents(conf, sect);
 972   if (!sect->parents) return (0);
 973
 974   /* Otherwise we must visit the section's parents.  We can avoid paying for
 975    * this on every lookup by using a cache.  If there's already an entry for
 976    * this variable then we can return the result immediately (note that we
 977    * cache both positive and negative outcomes).  Otherwise we create a new
 978    * cache entry, do the full recursive search, and fill in the result when
 979    * we're done.
 980    *
 981    * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
 982    * new cache entry when it's first created, and clear it when we fill in
 983    * the result: if we encounter an open cache entry again, we know that
 984    * we've found a cycle.
 985    */
 986   cache = treap_probe(&sect->cache, name, sz, &path);
 987   if (!cache) {
 988     cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
 989     treap_insert(&sect->cache, &path, &cache->_node, name, sz);
 990   } else if (cache->f&CF_OPEN)
 991     lose("inheritance cycle through section `%s'",
 992          CONFIG_SECTION_NAME(sect));
 993   else
 994     return (cache->var);
 995
 996   /* Recursively search in each parent.  We insist that all parents that find
 997    * a variable find the same binding; otherwise we declare ambiguous
 998    * inheritance.
 999    */
1000   for (i = 0; i < sect->nparents; i++) {
1001     v = search_recursive(conf, sect->parents[i], name, sz);
1002     if (!v);
1003     else if (!var) { var = v; j = i; }
1004     else if (var != v)
1005       lose("section `%s' inherits variable `%s' ambiguously "
1006            "via `%s' and `%s'",
1007            CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
1008            CONFIG_SECTION_NAME(sect->parents[j]),
1009            CONFIG_SECTION_NAME(sect->parents[i]));
1010   }
1011
1012   /* All done: fill the cache entry in, clear the open flag, and return the
1013    * result.
1014    */
1015   cache->var = var; cache->f &= ~CF_OPEN;
1016   return (var);
1017 }
1018
1019 /* Find and return the variable with null-terminated NAME in SECT.
1020  *
1021  * If `CF_INHERIT' is set in F, then the function searches the section's
1022  * parents recursively; otherwise, it only checks to see whether the variable
1023  * is set directly in SECT.
1024  *
1025  * If no variable is found, the behaviour depends on whether `CF_CREAT' is
1026  * set in F: if so, an empty variable is created and returned; otherwise, a
1027  * null pointer is returned.
1028  *
1029  * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
1030  */
1031 struct config_var *config_find_var(struct config *conf,
1032                                    struct config_section *sect,
1033                                    unsigned f, const char *name)
1034   { return (config_find_var_n(conf, sect, f, name, strlen(name))); }
1035
1036 /* Find and return the variable with the given SZ-byte NAME in SECT.
1037  *
1038  * This works like `config_find_var', but with an explicit length for the
1039  * NAME rather than null-termination.
1040  */
1041 struct config_var *config_find_var_n(struct config *conf,
1042                                      struct config_section *sect,
1043                                      unsigned f, const char *name, size_t sz)
1044 {
1045   struct config_var *var;
1046   struct treap_path path;
1047
1048   if (f&CF_INHERIT)
1049     var = search_recursive(conf, sect, name, sz);
1050   else if (!(f&CF_CREAT))
1051     var = treap_lookup(&sect->vars, name, sz);
1052   else {
1053     var = treap_probe(&sect->vars, name, sz, &path);
1054     if (!var) {
1055       var = xmalloc(sizeof(*var));
1056       var->val = 0; var->file = 0; var->f = 0; var->line = 1;
1057       treap_insert(&sect->vars, &path, &var->_node, name, sz);
1058     }
1059   }
1060   return (var);
1061 }
1062
1063 /* Set variable NAME to VALUE in SECT, with associated flags F.
1064  *
1065  * The names are null-terminated.  The flags are variable flags: see `struct
1066  * config_var' for details.  Returns the variable.
1067  *
1068  * If the variable is already set and has the `CF_OVERRIDE' flag, then this
1069  * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
1070  */
1071 struct config_var *config_set_var(struct config *conf,
1072                                   struct config_section *sect,
1073                                   unsigned f,
1074                                   const char *name, const char *value)
1075 {
1076   return (config_set_var_n(conf, sect, f,
1077                            name, strlen(name),
1078                            value, strlen(value)));
1079 }
1080
1081 /* As `config_set_var', except that the variable NAME and VALUE have explicit
1082  * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
1083  * terminated.
1084  */
1085 struct config_var *config_set_var_n(struct config *conf,
1086                                     struct config_section *sect,
1087                                     unsigned f,
1088                                     const char *name, size_t namelen,
1089                                     const char *value, size_t valuelen)
1090 {
1091   struct config_var *var =
1092     config_find_var_n(conf, sect, CF_CREAT, name, namelen);
1093
1094   if (var->f&~f&CF_OVERRIDE) return (var);
1095   free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
1096   var->f = f;
1097   return (var);
1098 }
1099
1100 /* Initialize I to iterate over the variables directly defined in SECT. */
1101 void config_start_var_iter(struct config *conf, struct config_section *sect,
1102                            struct config_var_iter *i)
1103   { treap_start_iter(&sect->vars, &i->i); }
1104
1105 /* Return next variable from I, in ascending lexicographical order.
1106  *
1107  * If there are no more variables, then return null.
1108  */
1109 struct config_var *config_next_var(struct config_var_iter *i)
1110   { return (treap_next(&i->i)); }
1111
1112 /* Read and parse configuration FILE, applying its settings to CONF.
1113  *
1114  * If all goes well, the function returns 0.  If the file is not found, then
1115  * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
1116  * function simply returns -1.  Otherwise, a fatal error is reported.  Note
1117  * that this /only/ applies if the file does not exist (specifically, opening
1118  * it fails with `ENOENT') -- any other problems are reported as fatal
1119  * errors regardless of the flag setting.
1120  */
1121 int config_read_file(struct config *conf, const char *file, unsigned f)
1122 {
1123   struct config_section *sect;
1124   struct config_var *var;
1125   struct dstr d = DSTR_INIT, dd = DSTR_INIT;
1126   unsigned line = 0;
1127   const char *p, *q, *r;
1128   FILE *fp;
1129
1130   /* Try to open the file. */
1131   fp = fopen(file, "r");
1132   if (!fp) {
1133     if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
1134     lose("failed to open configuration file `%s': %s",
1135          file, strerror(errno));
1136   }
1137
1138   /* Find the initial section. */
1139   sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
1140
1141   /* Work through the file, line by line. */
1142   for (;;) {
1143     dstr_reset(&d); if (dstr_readline(&d, fp)) break;
1144     line++;
1145
1146     /* Trim trailing spaces from the line.  The syntax is sensitive to
1147      * leading spaces, so we can't trim those yet.
1148      */
1149     while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
1150     d.p[d.len] = 0;
1151
1152     if (!*d.p || *d.p == ';')
1153       /* Ignore comments entirely.  (In particular, a comment doesn't
1154        * interrupt a multiline variable value.)
1155        */
1156       ;
1157
1158     else if (ISSPACE(d.p[0])) {
1159       /* The line starts with whitespace, so it's a continuation line. */
1160
1161       /* Skip the initial whitespace. */
1162       p = d.p; while (ISSPACE(*p)) p++;
1163
1164       /* If we aren't collecting a variable value then this is an error.
1165        * Otherwise, accumulate it into the current value.
1166        */
1167       if (!var)
1168         lose("%s:%u: continuation line, but no variable", file, line);
1169       if (dd.len) dstr_putc(&dd, ' ');
1170       dstr_putm(&dd, p, d.len - (p - d.p));
1171
1172     } else {
1173       /* The line starts in the first column. */
1174
1175       /* If there's a value value being collected then we must commit it to
1176        * its variable (unless there's already a setting there that says we
1177        * shouldn't).
1178        */
1179       if (var) {
1180         if (!(var->f&CF_OVERRIDE))
1181           { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1182         var = 0;
1183       }
1184
1185       /* Now decide what kind of line this is. */
1186       if (d.p[0] == '[') {
1187         /* It's a section header. */
1188
1189         /* Parse the header. */
1190         p = d.p + 1; while (ISSPACE(*p)) p++;
1191         q = scan_name("section", p, d.p + d.len, file, line);
1192         r = q; while (ISSPACE(*r)) r++;
1193         if (*r != ']')
1194           lose("%s:%u: expected `]' in section header", file, line);
1195         if (r[1])
1196           lose("%s:%u: trailing junk after `]' in section header",
1197                file, line);
1198
1199         /* Create the new section. */
1200         sect = config_find_section_n(conf, CF_CREAT, p, q - p);
1201
1202       } else {
1203         /* It's a variable assignment.  Parse the name out. */
1204         p = scan_name("variable", d.p, d.p + d.len, file, line);
1205         var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
1206         while (ISSPACE(*p)) p++;
1207         if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
1208         p++; while (ISSPACE(*p)) p++;
1209
1210         /* Clear out the variable's initial value, unless we shouldn't
1211          * override it.
1212          */
1213         if (!(var->f&CF_OVERRIDE)) {
1214           free(var->val); var->val = 0; var->f = 0;
1215           free(var->file); var->file = xstrdup(file); var->line = line;
1216         }
1217         dstr_reset(&dd); dstr_puts(&dd, p);
1218       }
1219     }
1220   }
1221
1222   /* If there's a value under construction then commit the result. */
1223   if (var && !(var->f&CF_OVERRIDE))
1224     { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1225
1226   /* Close the file. */
1227   if (fclose(fp))
1228     lose("error reading configuration file `%s': %s", file, strerror(errno));
1229
1230   /* All done. */
1231   dstr_release(&d); dstr_release(&dd);
1232   return (0);
1233 }
1234
1235 /* Populate SECT with environment variables.
1236  *
1237  * Environment variables are always set with `CF_LITERAL'.
1238  */
1239 void config_read_env(struct config *conf, struct config_section *sect)
1240 {
1241   const char *p, *v;
1242   size_t i;
1243
1244   for (i = 0; (p = environ[i]) != 0; i++) {
1245     v = strchr(p, '='); if (!v) continue;
1246     config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
1247   }
1248 }
1249
1250 /*----- Substitution and quoting ------------------------------------------*/
1251
1252 /* The substitution and word-splitting state.
1253  *
1254  * This only keeps track of the immutable parameters for the substitution
1255  * task: stuff which changes (flags, filtering state, cursor position) is
1256  * maintained separately.
1257  */
1258 struct subst {
1259   struct config *config;                /* configuration state */
1260   struct config_section *home;          /* home section for lookups */
1261   struct dstr *d;                       /* current word being constructed */
1262   struct argv *av;                      /* output word list */
1263 };
1264
1265 /* Flags for `subst' and related functions. */
1266 #define SF_SPLIT 0x0001u                /* split at (unquoted) whitespace */
1267 #define SF_QUOT 0x0002u                 /* currently within double quotes */
1268 #define SF_SUBST 0x0004u                /* apply `$-substitutions */
1269 #define SF_SUBEXPR 0x0008u              /* stop at delimiter `|' or `}' */
1270 #define SF_SPANMASK 0x00ffu             /* mask for the above */
1271
1272 #define SF_WORD 0x0100u                 /* output word under construction */
1273 #define SF_SKIP 0x0200u                 /* not producing output */
1274 #define SF_LITERAL 0x0400u              /* do not expand or substitute */
1275 #define SF_UPCASE 0x0800u               /* convert to uppercase */
1276 #define SF_DOWNCASE 0x1000u             /* convert to lowercase */
1277 #define SF_CASEMASK 0x1800u             /* mask for case conversions */
1278
1279 /* Apply filters encoded in QFILT and F to the text from P to L, and output.
1280  *
1281  * SB is the substitution state which, in particular, explains where the
1282  * output should go.
1283  *
1284  * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
1285  * conversions, and a nesting depth QFILT for toothpick escaping.  (QFILT is
1286  * encoded as the number of toothpicks to print: see `subst' for how this
1287  * determined.)
1288  */
1289 static void filter_string(const char *p, const char *l,
1290                           const struct subst *sb, unsigned qfilt, unsigned f)
1291 {
1292   size_t r, n;
1293   char *q; const char *pp, *ll;
1294
1295   if (!qfilt && !(f&SF_CASEMASK))
1296     /* Fast path: there's nothing to do: just write to the output. */
1297     dstr_putm(sb->d, p, l - p);
1298
1299   else for (;;) {
1300     /* We must be a bit more circumspect. */
1301
1302     /* Determine the length of the next span of characters which don't need
1303      * escaping.  (If QFILT is zero then this is everything.)
1304      */
1305     r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
1306     if (n > r) n = r;
1307
1308     if (!(f&SF_CASEMASK))
1309       /* No case conversion: we can just emit this chunk. */
1310
1311       dstr_putm(sb->d, p, n);
1312
1313     else {
1314       /* Case conversion to do.  Arrange enough space for the output, and
1315        * convert it character by character.
1316        */
1317
1318       dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
1319       if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
1320       else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
1321       sb->d->len += n;
1322     }
1323
1324     /* If we've reached the end then stop. */
1325     if (n >= r) break;
1326
1327     /* Otherwise we must have found a character which requires escaping.
1328      * Emit enough toothpicks.
1329      */
1330     dstr_putcn(sb->d, '\\', qfilt);
1331
1332     /* This character is now done, so we can skip over and see if there's
1333      * another chunk of stuff we can do at high speed.
1334      */
1335     dstr_putc(sb->d, p[n]); p += n + 1;
1336   }
1337 }
1338
1339 /* Scan and resolve a `[SECT:]VAR' specifier at P.
1340  *
1341  * Return the address of the next character following the specifier; and set
1342  * *VAR_OUT to point to the variable we found, or null if it's not there.  L
1343  * is a limit on the region of the buffer that we should process; SB is the
1344  * substitution state which provides the home section if none is given
1345  * explicitly; FILE and LINE are the source location to blame for problems.
1346  */
1347 static const char *retrieve_varspec(const char *p, const char *l,
1348                                     const struct subst *sb,
1349                                     struct config_var **var_out,
1350                                     const char *file, unsigned line)
1351 {
1352   struct config_section *sect = sb->home;
1353   const char *t;
1354
1355   t = scan_name("section or variable", p, l, file, line);
1356   if (t < l && *t == ':') {
1357     sect = config_find_section_n(sb->config, 0, p, t - p);
1358     p = t + 1; t = scan_name("variable", p, l, file, line);
1359   }
1360
1361   if (!sect) *var_out = 0;
1362   else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
1363   return (t);
1364 }
1365
1366 /* Substitute and/or word-split text.
1367  *
1368  * The input text starts at P, and continues to (just before) L.  Context for
1369  * the task is provided by SB; the source location to blame is FILE and LINE
1370  * (FILE may be null so that this can be passed directly from a `config_var'
1371  * without further checking); QFILT is the nesting depth in toothpick-
1372  * escaping; and F holds a mask of `SF_...' flags.
1373  */
1374 static const char *subst(const char *p, const char *l,
1375                          const struct subst *sb,
1376                          const char *file, unsigned line,
1377                          unsigned qfilt, unsigned f)
1378 {
1379   struct config_var *var;
1380   const char *q0, *q1, *t;
1381   unsigned subqfilt, ff;
1382   size_t n;
1383
1384   /* It would be best if we could process literal text at high speed.  To
1385    * this end, we have a table, indexed by the low-order bits of F, to tell
1386    * us which special characters we need to stop at.  This way, we can use
1387    * `strcspn' to skip over literal text and stop at the next character which
1388    * needs special handling.  Entries in this table with a null pointer
1389    * correspond to impossible flag settings: notably, `SF_QUOT' can only be
1390    * set when `SF_SUBST' is also set.
1391    */
1392   static const char *const delimtab[] = {
1393
1394 #define ESCAPE "\\"                     /* always watch for `\'-escapes */
1395 #define SUBST "$"                       /* check for `$' if `SF_SUBST' set */
1396 #define WORDSEP " \f\r\n\t\v'\""        /* space, quotes if `SF_SPLIT' but
1397                                          * not `SF_QUOT' */
1398 #define QUOT "\""                       /* only quotes if `SF_SPLIT' and
1399                                          * `SF_QUOT' */
1400 #define DELIM "|}"                      /* end delimiters of `SF_SUBEXPR' */
1401
1402     ESCAPE,                             /* --- */
1403     ESCAPE             WORDSEP,         /* SPLIT */
1404     0,                                  /*         QUOT */
1405     ESCAPE             QUOT,            /* SPLIT | QUOT */
1406     ESCAPE       SUBST,                 /*                SUBST */
1407     ESCAPE       SUBST WORDSEP,         /* SPLIT |        SUBST */
1408     0,                                  /*         QUOT | SUBST */
1409     ESCAPE       SUBST QUOT,            /* SPLIT | QUOT | SUBST */
1410     ESCAPE DELIM,                       /*                        SUBEXPR */
1411     ESCAPE DELIM       WORDSEP,         /* SPLIT |                SUBEXPR */
1412     0,                                  /*         QUOT |         SUBEXPR */
1413     ESCAPE DELIM       QUOT,            /* SPLIT | QUOT |         SUBEXPR */
1414     ESCAPE DELIM SUBST,                 /*                SUBST | SUBEXPR */
1415     ESCAPE DELIM SUBST WORDSEP,         /* SPLIT |        SUBST | SUBEXPR */
1416     0,                                  /*         QUOT | SUBST | SUBEXPR */
1417     ESCAPE DELIM SUBST QUOT             /* SPLIT | QUOT | SUBST | SUBEXPR */
1418
1419 #undef ESCAPE
1420 #undef SUBST
1421 #undef WORDSEP
1422 #undef QUOT
1423 #undef DELIM
1424   };
1425
1426   /* Set FILE to be useful if it was null on entry. */
1427   if (!file) file = "<internal>";
1428
1429   /* If the text is literal then hand off to `filter_string'.  This obviously
1430    * starts a word.
1431    */
1432   if (f&SF_LITERAL) {
1433     filter_string(p, l, sb, qfilt, f);
1434     f |= SF_WORD;
1435     goto done;
1436   }
1437
1438   /* Chew through the input until it's all gone. */
1439   while (p < l) {
1440
1441     if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
1442       /* This is whitespace, we're supposed to split, and we're not within
1443        * quotes, so we should split here.
1444        */
1445
1446       /* If there's a word in progress then we should commit it. */
1447       if (f&SF_WORD) {
1448         if (!(f&SF_SKIP)) {
1449           argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1450           dstr_reset(sb->d);
1451         }
1452         f &= ~SF_WORD;
1453       }
1454
1455       /* Skip over further whitespace at high speed. */
1456       do p++; while (p < l && ISSPACE(*p));
1457
1458     } else if (*p == '\\') {
1459       /* This is a toothpick, so start a new word and add the next character
1460        * to it.
1461        */
1462
1463       /* If there's no next character then we should be upset. */
1464       p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
1465
1466       if (!(f&SF_SKIP)) {
1467
1468         /* If this is a double quote or backslash then check QFILT to see if
1469          * it needs escaping.
1470          */
1471         if (qfilt && (*p == '"' || *p == '\\'))
1472           dstr_putcn(sb->d, '\\', qfilt);
1473
1474         /* Output the character. */
1475         if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
1476         else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
1477         else dstr_putc(sb->d, *p);
1478       }
1479
1480       /* Move past the escaped character.  Remember we started a word. */
1481       p++; f |= SF_WORD;
1482
1483     } else if ((f&SF_SPLIT) && *p == '"') {
1484       /* This is a double quote, and we're word splitting.  We're definitely
1485        * in a word now.  Toggle whether we're within quotes.
1486        */
1487
1488       f ^= SF_QUOT; f |= SF_WORD; p++;
1489
1490     } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
1491       /* This is a single quote, and we're word splitting but not within
1492        * double quotes.  Find the matching end quote, and just output
1493        * everything between literally.
1494        */
1495
1496       p++; t = strchr(p, '\'');
1497       if (!t || t >= l) lose("%s:%u: missing `''", file, line);
1498       if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
1499       p = t + 1; f |= SF_WORD;
1500
1501     } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
1502       /* This is an end delimiter, and we're supposed to stop here. */
1503       break;
1504
1505     } else if ((f&SF_SUBST) && *p == '$') {
1506       /* This is a `$' and we're supposed to do substitution. */
1507
1508       /* The kind of substitution is determined by the next character. */
1509       p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
1510
1511       /* Prepare flags for a recursive substitution.
1512        *
1513        * Hide our quote state from the recursive call.  If we're within a
1514        * word, then disable word-splitting.
1515        */
1516       ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));
1517
1518       /* Now dispatch based on the following character. */
1519       switch (*p) {
1520
1521         case '?':
1522           /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
1523
1524           /* Skip initial space. */
1525           p++; while (p < l && ISSPACE(*p)) p++;
1526
1527           /* Find the variable. */
1528           p = retrieve_varspec(p, l, sb, &var, file, line);
1529
1530           /* Skip whitespace again. */
1531           while (p < l && ISSPACE(*p)) p++;
1532
1533           /* Expect the opening `{'. */
1534           if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
1535           p++;
1536
1537           /* We'll process the parts recursively, but we need to come back
1538            * when we hit the appropriate delimiters, so arrange for that.
1539            */
1540           ff |= SF_SUBEXPR;
1541
1542           /* Process the consequent (skip if the variable wasn't found). */
1543           p = subst(p, l, sb, file, line, qfilt,
1544                     ff | (var ? 0 : SF_SKIP));
1545
1546           /* If there's a `|' then process the alternative too (skip if the
1547            * variable /was/ found).
1548            */
1549           if (p < l && *p == '|')
1550             p = subst(p + 1, l, sb, file, line, qfilt,
1551                       ff | (var ? SF_SKIP : 0));
1552
1553           /* We should now be past the closing `}'. */
1554           if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1555           p++;
1556           break;
1557
1558         case '{':
1559           /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
1560
1561           /* Skip initial whitespace. */
1562           p++; while (p < l && ISSPACE(*p)) p++;
1563
1564           /* Find the variable. */
1565           q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
1566
1567           /* Determine the filters to apply when substituting the variable
1568            * value.
1569            */
1570           subqfilt = qfilt;
1571           for (;;) {
1572
1573             /* Skip spaces again. */
1574             while (p < l && ISSPACE(*p)) p++;
1575
1576             /* If there's no `|' then there are no more filters, so stop. */
1577             if (p >= l || *p != '|') break;
1578
1579             /* Skip the `|' and more spaces. */
1580             p++; while (p < l && ISSPACE(*p)) p++;
1581
1582             /* Collect the filter name. */
1583             t = scan_name("filter", p, l, file, line);
1584
1585             /* Dispatch on the filter name. */
1586             if (t - p == 1 && *p == 'q')
1587               /* `q' -- quote for Lisp string.
1588                *
1589                * We're currently adding Q `\' characters before each naughty
1590                * character.  But a backslash itself is naughty too, so that
1591                * makes Q + 1 naughty characters, each of which needs a
1592                * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
1593                *
1594                * Calculate this here rather than at each point toothpicks
1595                * need to be deployed.
1596                */
1597
1598               subqfilt = 2*subqfilt + 1;
1599
1600             else if (t - p == 1 && *p == 'l')
1601               /* `l' -- convert to lowercase.
1602                *
1603                * If a case conversion is already set, then that will override
1604                * whatever we do here, so don't bother.
1605                */
1606
1607               { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }
1608
1609             else if (t - p == 1 && *p == 'u')
1610               /* `u' -- convert to uppercase.
1611                *
1612                * If a case conversion is already set, then that will override
1613                * whatever we do here, so don't bother.
1614                */
1615               { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }
1616
1617             else
1618               /* Something else we didn't understand. */
1619               lose("%s:%u: unknown filter `%.*s'",
1620                    file, line, (int)(t - p), p);
1621
1622             /* Continue from after the filter name. */
1623             p = t;
1624           }
1625
1626           /* If we're not skipping, and we found a variable, then substitute
1627            * its value.  This is the point where we need to be careful about
1628            * recursive expansion.
1629            */
1630           if (!(f&SF_SKIP) && var) {
1631             if (var->f&CF_EXPAND)
1632               lose("%s:%u: recursive expansion of variable `%.*s'",
1633                    file, line, (int)(q1 - q0), q0);
1634             var->f |= CF_EXPAND;
1635             subst(var->val, var->val + var->n, sb,
1636                   var->file, var->line, subqfilt,
1637                   ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1638             var->f &= ~CF_EXPAND;
1639           }
1640
1641           /* If there's an alternative, then we need to process (or maybe
1642            * skip) it.  Otherwise, we should complain if there was no
1643            * variable, and we're not skipping.
1644            */
1645           if (p < l && *p == '?')
1646             p = subst(p + 1, l, sb, file, line, subqfilt,
1647                       ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
1648           else if (!var && !(f&SF_SKIP))
1649             lose("%s:%u: unknown variable `%.*s'",
1650                  file, line, (int)(q1 - q0), q0);
1651
1652           /* Expect a `}' here.  (No need to skip spaces: we already did that
1653            * after scanning for filters, and either there was no alternative,
1654            * or we advanced to a delimiter character anyway.)
1655            */
1656           if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1657           p++;
1658           break;
1659
1660         default:
1661           /* Something else.  That's a shame. */
1662           lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
1663       }
1664
1665       /* Complain if we started out in word-splitting state, and therefore
1666        * have added a whole number of words to the output, but there's a
1667        * word-fragment stuck onto the end of this substitution.
1668        */
1669       if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
1670           !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
1671         lose("%s:%u: surprising word boundary "
1672              "after splicing substitution",
1673              file, line);
1674     }
1675
1676     else {
1677       /* Something else.  Try to skip over this at high speed.
1678        *
1679        * This makes use of the table we set up earlier.
1680        */
1681
1682       n = strcspn(p, delimtab[f&SF_SPANMASK]);
1683       if (n > l - p) n = l - p;
1684       if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
1685       p += n; f |= SF_WORD;
1686     }
1687   }
1688
1689 done:
1690   /* Sort out the wreckage. */
1691
1692   /* If we're still within quotes then something has gone wrong. */
1693   if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
1694
1695   /* If we're within a word, and should be splitting, then commit the word to
1696    * the output list.
1697    */
1698   if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
1699     argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1700     dstr_reset(sb->d);
1701   }
1702
1703   /* And, with that, we're done. */
1704   return (p);
1705 }
1706
1707 /* Expand substitutions in a string.
1708  *
1709  * Expand the null-terminated string P relative to the HOME section, using
1710  * configuration CONFIG, and appending the result to dynamic string D.  Blame
1711  * WHAT in any error messages.
1712  */
1713 void config_subst_string(struct config *config, struct config_section *home,
1714                          const char *what, const char *p, struct dstr *d)
1715 {
1716   struct subst sb;
1717
1718   sb.config = config; sb.home = home; sb.d = d;
1719   subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
1720   dstr_putz(d);
1721 }
1722
1723 /* Expand substitutions in a string.
1724  *
1725  * Expand the null-terminated string P relative to the HOME section, using
1726  * configuration CONFIG, returning the result as a freshly malloc(3)ed
1727  * string.  Blame WHAT in any error messages.
1728  */
1729 char *config_subst_string_alloc(struct config *config,
1730                                 struct config_section *home,
1731                                 const char *what, const char *p)
1732 {
1733   struct dstr d = DSTR_INIT;
1734   char *q;
1735
1736   config_subst_string(config, home, what, p, &d);
1737   q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1738 }
1739
1740 /* Expand substitutions in a variable.
1741  *
1742  * Expand the value of the variable VAR relative to the HOME section, using
1743  * configuration CONFIG, appending the result to dynamic string D.
1744  */
1745 void config_subst_var(struct config *config, struct config_section *home,
1746                       struct config_var *var, struct dstr *d)
1747 {
1748   struct subst sb;
1749
1750   sb.config = config; sb.home = home; sb.d = d;
1751   var->f |= CF_EXPAND;
1752   subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1753         SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1754   var->f &= ~CF_EXPAND;
1755   dstr_putz(d);
1756 }
1757
1758 /* Expand substitutions in a variable.
1759  *
1760  * Expand the value of the variable VAR relative to the HOME section, using
1761  * configuration CONFIG, returning the result as a freshly malloc(3)ed
1762  * string.
1763  */
1764 char *config_subst_var_alloc(struct config *config,
1765                              struct config_section *home,
1766                              struct config_var *var)
1767 {
1768   struct dstr d = DSTR_INIT;
1769   char *q;
1770
1771   config_subst_var(config, home, var, &d);
1772   q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1773 }
1774
1775 /* Expand substitutions in a variable and split into words.
1776  *
1777  * Expand and word-split the value of the variable VAR relative to the HOME
1778  * section, using configuration CONFIG, appending the resulting words into
1779  * the vector AV.
1780  */
1781 void config_subst_split_var(struct config *config,
1782                             struct config_section *home,
1783                             struct config_var *var, struct argv *av)
1784 {
1785   struct dstr d = DSTR_INIT;
1786   struct subst sb;
1787
1788   sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
1789   var->f |= CF_EXPAND;
1790   subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1791         SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1792   var->f &= ~CF_EXPAND;
1793   dstr_release(&d);
1794 }
1795
1796 /*----- That's all, folks -------------------------------------------------*/