| 1 | /* -*-c-*- |
| 2 | * |
| 3 | * Common definitions for `runlisp' |
| 4 | * |
| 5 | * (c) 2020 Mark Wooding |
| 6 | */ |
| 7 | |
| 8 | /*----- Licensing notice --------------------------------------------------* |
| 9 | * |
| 10 | * This file is part of Runlisp, a tool for invoking Common Lisp scripts. |
| 11 | * |
| 12 | * Runlisp is free software: you can redistribute it and/or modify it |
| 13 | * under the terms of the GNU General Public License as published by the |
| 14 | * Free Software Foundation; either version 3 of the License, or (at your |
| 15 | * option) any later version. |
| 16 | * |
| 17 | * Runlisp is distributed in the hope that it will be useful, but WITHOUT |
| 18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 20 | * for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU General Public License |
| 23 | * along with Runlisp. If not, see <https://www.gnu.org/licenses/>. |
| 24 | */ |
| 25 | |
| 26 | /*----- Header files ------------------------------------------------------*/ |
| 27 | |
| 28 | #include "config.h" |
| 29 | |
| 30 | #include <assert.h> |
| 31 | |
| 32 | #include <ctype.h> |
| 33 | #include <errno.h> |
| 34 | #include <stdarg.h> |
| 35 | #include <stdio.h> |
| 36 | #include <stdlib.h> |
| 37 | #include <string.h> |
| 38 | |
| 39 | #include <unistd.h> |
| 40 | |
| 41 | #include "lib.h" |
| 42 | |
| 43 | /*----- Diagnostic utilities ----------------------------------------------*/ |
| 44 | |
| 45 | const char *progname = "???"; |
| 46 | /* Our program name, for use in error messages. */ |
| 47 | |
| 48 | /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */ |
| 49 | void set_progname(const char *prog) |
| 50 | { |
| 51 | const char *p; |
| 52 | |
| 53 | p = strrchr(prog, '/'); |
| 54 | progname = p ? p + 1 : prog; |
| 55 | } |
| 56 | |
| 57 | /* Report an error or warning in Unix style, given a captured argument |
| 58 | * cursor. |
| 59 | */ |
| 60 | void vmoan(const char *msg, va_list ap) |
| 61 | { |
| 62 | fprintf(stderr, "%s: ", progname); |
| 63 | vfprintf(stderr, msg, ap); |
| 64 | fputc('\n', stderr); |
| 65 | } |
| 66 | |
| 67 | /* Issue a warning message. */ |
| 68 | void moan(const char *msg, ...) |
| 69 | { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); } |
| 70 | |
| 71 | /* Issue a fatal error message and exit unsuccessfully. */ |
| 72 | void lose(const char *msg, ...) |
| 73 | { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); } |
| 74 | |
| 75 | /*----- Memory allocation -------------------------------------------------*/ |
| 76 | |
| 77 | /* Allocate and return a pointer to N bytes, or report a fatal error. |
| 78 | * |
| 79 | * Release the pointer using `free' as usual. If N is zero, returns null |
| 80 | * (but you are not expected to check for this). |
| 81 | */ |
| 82 | void *xmalloc(size_t n) |
| 83 | { |
| 84 | void *p; |
| 85 | |
| 86 | if (!n) return (0); |
| 87 | p = malloc(n); if (!p) lose("failed to allocate memory"); |
| 88 | return (p); |
| 89 | } |
| 90 | |
| 91 | /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long. |
| 92 | * |
| 93 | * The block might (and probably will) move, so it returns the new address. |
| 94 | * If N is zero, then the block is freed (if necessary) and a null pointer |
| 95 | * returned; otherwise, if P is null then a fresh block is allocated. If |
| 96 | * allocation fails, then a fatal error is reported. |
| 97 | */ |
| 98 | void *xrealloc(void *p, size_t n) |
| 99 | { |
| 100 | if (!n) { free(p); return (0); } |
| 101 | else if (!p) return (xmalloc(n)); |
| 102 | p = realloc(p, n); if (!p) lose("failed to allocate memory"); |
| 103 | return (p); |
| 104 | } |
| 105 | |
| 106 | /* Allocate and return a copy of the N-byte string starting at P. |
| 107 | * |
| 108 | * The new string is null-terminated, though P need not be. If allocation |
| 109 | * fails, then a fatal error is reported. |
| 110 | */ |
| 111 | char *xstrndup(const char *p, size_t n) |
| 112 | { |
| 113 | char *q = xmalloc(n + 1); |
| 114 | |
| 115 | memcpy(q, p, n); q[n] = 0; |
| 116 | return (q); |
| 117 | } |
| 118 | |
| 119 | /* Allocate and return a copy of the null-terminated string starting at P. |
| 120 | * |
| 121 | * If allocation fails, then a fatal error is reported. |
| 122 | */ |
| 123 | char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); } |
| 124 | |
| 125 | /*----- Dynamic strings ---------------------------------------------------*/ |
| 126 | |
| 127 | /* Initialize the string D. |
| 128 | * |
| 129 | * Usually you'd use the static initializer `DSTR_INIT'. |
| 130 | */ |
| 131 | void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; } |
| 132 | |
| 133 | /* Reset string D so it's empty again. */ |
| 134 | void dstr_reset(struct dstr *d) { d->len = 0; } |
| 135 | |
| 136 | /* Ensure that D has at least N unused bytes available. */ |
| 137 | void dstr_ensure(struct dstr *d, size_t n) |
| 138 | { |
| 139 | size_t need = d->len + n, newsz; |
| 140 | |
| 141 | if (need <= d->sz) return; |
| 142 | newsz = d->sz ? 2*d->sz : 16; |
| 143 | while (newsz < need) newsz *= 2; |
| 144 | d->p = xrealloc(d->p, newsz); d->sz = newsz; |
| 145 | } |
| 146 | |
| 147 | /* Release the memory held by D. |
| 148 | * |
| 149 | * It must be reinitialized (e.g., by `dstr_init') before it can be used |
| 150 | * again. |
| 151 | */ |
| 152 | void dstr_release(struct dstr *d) { free(d->p); } |
| 153 | |
| 154 | /* Append the N-byte string at P to D. |
| 155 | * |
| 156 | * P need not be null-terminated. D will not be null-terminated |
| 157 | * afterwards. |
| 158 | */ |
| 159 | void dstr_putm(struct dstr *d, const void *p, size_t n) |
| 160 | { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; } |
| 161 | |
| 162 | /* Append the null-terminated string P to D. |
| 163 | * |
| 164 | * D /is/ guaranteed to be null-terminated after this. |
| 165 | */ |
| 166 | void dstr_puts(struct dstr *d, const char *p) |
| 167 | { |
| 168 | size_t n = strlen(p); |
| 169 | |
| 170 | dstr_ensure(d, n + 1); |
| 171 | memcpy(d->p + d->len, p, n + 1); |
| 172 | d->len += n; |
| 173 | } |
| 174 | |
| 175 | /* Append the single character CH to D. |
| 176 | * |
| 177 | * D will not be null-terminated afterwards. |
| 178 | */ |
| 179 | void dstr_putc(struct dstr *d, int ch) |
| 180 | { dstr_ensure(d, 1); d->p[d->len++] = ch; } |
| 181 | |
| 182 | /* Append N copies of the character CH to D. |
| 183 | * |
| 184 | * D will not be null-terminated afterwards. |
| 185 | */ |
| 186 | void dstr_putcn(struct dstr *d, int ch, size_t n) |
| 187 | { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; } |
| 188 | |
| 189 | /* Null-terminate the string D. |
| 190 | * |
| 191 | * This doesn't change the length of D. If further stuff is appended then |
| 192 | * the null terminator will be overwritten. |
| 193 | */ |
| 194 | void dstr_putz(struct dstr *d) |
| 195 | { dstr_ensure(d, 1); d->p[d->len] = 0; } |
| 196 | |
| 197 | /* Append stuff to D, determined by printf(3) format string P and argument |
| 198 | * tail AP. |
| 199 | * |
| 200 | * D will not be null-terminated afterwards. |
| 201 | */ |
| 202 | void dstr_vputf(struct dstr *d, const char *p, va_list ap) |
| 203 | { |
| 204 | va_list ap2; |
| 205 | size_t r; |
| 206 | int n; |
| 207 | |
| 208 | r = d->sz - d->len; |
| 209 | va_copy(ap2, ap); |
| 210 | n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0); |
| 211 | va_end(ap2); |
| 212 | if (n >= r) { |
| 213 | dstr_ensure(d, n + 1); r = d->sz - d->len; |
| 214 | n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r); |
| 215 | } |
| 216 | d->len += n; |
| 217 | } |
| 218 | |
| 219 | /* Append stuff to D, determined by printf(3) format string P and arguments. |
| 220 | * |
| 221 | * D will not be null-terminated afterwards. |
| 222 | */ |
| 223 | PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...) |
| 224 | { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); } |
| 225 | |
| 226 | /* Append the next input line from FP to D. |
| 227 | * |
| 228 | * Return 0 on success, or -1 if reading immediately fails or encounters |
| 229 | * end-of-file (call ferror(3) to distinguish). Any trailing newline is |
| 230 | * discarded: it is not possible to determine whether the last line was ended |
| 231 | * with a newline. D is guaranteed to be null-terminated afterwards. |
| 232 | */ |
| 233 | int dstr_readline(struct dstr *d, FILE *fp) |
| 234 | { |
| 235 | size_t n; |
| 236 | int any = 0; |
| 237 | |
| 238 | for (;;) { |
| 239 | dstr_ensure(d, 2); |
| 240 | if (!fgets(d->p + d->len, d->sz - d->len, fp)) break; |
| 241 | n = strlen(d->p + d->len); assert(n > 0); any = 1; |
| 242 | d->len += n; |
| 243 | if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; } |
| 244 | } |
| 245 | |
| 246 | if (!any) return (-1); |
| 247 | else return (0); |
| 248 | } |
| 249 | |
| 250 | /*----- Dynamic vectors of strings ----------------------------------------*/ |
| 251 | |
| 252 | /* Initialize the vector AV. |
| 253 | * |
| 254 | * Usually you'd use the static initializer `ARGV_INIT'. |
| 255 | */ |
| 256 | void argv_init(struct argv *av) |
| 257 | { av->v = 0; av->o = av->n = av->sz = 0; } |
| 258 | |
| 259 | /* Reset the vector AV so that it's empty again. */ |
| 260 | void argv_reset(struct argv *av) { av->n = 0; } |
| 261 | |
| 262 | /* Ensure that AV has at least N unused slots at the end. */ |
| 263 | void argv_ensure(struct argv *av, size_t n) |
| 264 | { |
| 265 | size_t need = av->n + av->o + n, newsz; |
| 266 | |
| 267 | if (need <= av->sz) return; |
| 268 | newsz = av->sz ? 2*av->sz : 8; |
| 269 | while (newsz < need) newsz *= 2; |
| 270 | av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o; |
| 271 | av->sz = newsz; |
| 272 | } |
| 273 | |
| 274 | /* Ensure that AV has at least N unused slots at the /start/. */ |
| 275 | void argv_ensure_offset(struct argv *av, size_t n) |
| 276 | { |
| 277 | size_t newoff; |
| 278 | |
| 279 | /* Stupid version. We won't, in practice, be prepending lots of stuff, so |
| 280 | * avoid the extra bookkeeping involved in trying to make a double-ended |
| 281 | * extendable array asymptotically efficient. |
| 282 | */ |
| 283 | if (av->o >= n) return; |
| 284 | newoff = 16; |
| 285 | while (newoff < n) newoff *= 2; |
| 286 | argv_ensure(av, newoff - av->o); |
| 287 | memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *)); |
| 288 | av->v += newoff - av->o; av->o = newoff; |
| 289 | } |
| 290 | |
| 291 | /* Release the memory held by AV. |
| 292 | * |
| 293 | * It must be reinitialized (e.g., by `argv_init') before it can be used |
| 294 | * again. |
| 295 | */ |
| 296 | void argv_release(struct argv *av) { free(av->v - av->o); } |
| 297 | |
| 298 | /* Append the pointer P to AV. */ |
| 299 | void argv_append(struct argv *av, char *p) |
| 300 | { argv_ensure(av, 1); av->v[av->n++] = p; } |
| 301 | |
| 302 | /* Append a null pointer to AV, without extending the vactor length. |
| 303 | * |
| 304 | * The null pointer will be overwritten when the next string is appended. |
| 305 | */ |
| 306 | void argv_appendz(struct argv *av) |
| 307 | { argv_ensure(av, 1); av->v[av->n] = 0; } |
| 308 | |
| 309 | /* Append a N-element vector V of pointers to AV. */ |
| 310 | void argv_appendn(struct argv *av, char *const *v, size_t n) |
| 311 | { |
| 312 | argv_ensure(av, n); |
| 313 | memcpy(av->v + av->n, v, n*sizeof(const char *)); |
| 314 | av->n += n; |
| 315 | } |
| 316 | |
| 317 | /* Append the variable-length vector BV to AV. */ |
| 318 | void argv_appendav(struct argv *av, const struct argv *bv) |
| 319 | { argv_appendn(av, bv->v, bv->n); } |
| 320 | |
| 321 | /* Append the pointers from a variable-length argument list AP to AV. |
| 322 | * |
| 323 | * The list is terminated by a null pointer. |
| 324 | */ |
| 325 | void argv_appendv(struct argv *av, va_list ap) |
| 326 | { |
| 327 | char *p; |
| 328 | for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); } |
| 329 | } |
| 330 | |
| 331 | /* Append the argument pointers, terminated by a null pointer, to AV. */ |
| 332 | void argv_appendl(struct argv *av, ...) |
| 333 | { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); } |
| 334 | |
| 335 | /* Prepend the pointer P to AV. */ |
| 336 | void argv_prepend(struct argv *av, char *p) |
| 337 | { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; } |
| 338 | |
| 339 | /* Prepend a N-element vector V of pointers to AV. */ |
| 340 | void argv_prependn(struct argv *av, char *const *v, size_t n) |
| 341 | { |
| 342 | argv_ensure_offset(av, n); |
| 343 | av->o -= n; av->v -= n; av->n += n; |
| 344 | memcpy(av->v, v, n*sizeof(const char *)); |
| 345 | } |
| 346 | |
| 347 | /* Prepend the variable-length vector BV to AV. */ |
| 348 | void argv_prependav(struct argv *av, const struct argv *bv) |
| 349 | { argv_prependn(av, bv->v, bv->n); } |
| 350 | |
| 351 | /* Prepend the pointers from a variable-length argument list AP to AV. |
| 352 | * |
| 353 | * The list is terminated by a null pointer. |
| 354 | */ |
| 355 | void argv_prependv(struct argv *av, va_list ap) |
| 356 | { |
| 357 | char *p, **v; |
| 358 | size_t n = 0; |
| 359 | |
| 360 | for (;;) { |
| 361 | p = va_arg(ap, char *); if (!p) break; |
| 362 | argv_prepend(av, p); n++; |
| 363 | } |
| 364 | v = av->v; |
| 365 | while (n >= 2) { |
| 366 | p = v[0]; v[0] = v[n - 1]; v[n - 1] = p; |
| 367 | v++; n -= 2; |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | /* Prepend the argument pointers, terminated by a null pointer, to AV. */ |
| 372 | void argv_prependl(struct argv *av, ...) |
| 373 | { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); } |
| 374 | |
| 375 | /*----- Treaps ------------------------------------------------------------*/ |
| 376 | |
| 377 | /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte |
| 378 | * string B in a lexicographic ordering. |
| 379 | * |
| 380 | * All comparisons of keys is handled by this function. |
| 381 | */ |
| 382 | static int str_lt(const char *a, size_t an, const char *b, size_t bn) |
| 383 | { |
| 384 | /* This is a little subtle. We need only compare the first N bytes of the |
| 385 | * strings, where N is the length of the shorter string. If this |
| 386 | * distinguishes the two strings, then we're clearly done. Otherwise, if |
| 387 | * the prefixes are equal then the shorter string is the smaller one. If |
| 388 | * the two strings are the same length, then they're equal. |
| 389 | * |
| 390 | * Hence, if A is the strictly shorter string, then A precedes B if A |
| 391 | * precedes or matches the prefix of B; otherwise A only precedes B if A |
| 392 | * strictly precedes the prefix of B. |
| 393 | */ |
| 394 | if (an < bn) return (MEMCMP(a, <=, b, an)); |
| 395 | else return (MEMCMP(a, <, b, bn)); |
| 396 | } |
| 397 | |
| 398 | /* Initialize the treap T. |
| 399 | * |
| 400 | * Usually you'd use the static initializer `TREAP_INIT'. |
| 401 | */ |
| 402 | void treap_init(struct treap *t) { t->root = 0; } |
| 403 | |
| 404 | /* Look up the KN-byte key K in the treap T. |
| 405 | * |
| 406 | * Return a pointer to the matching node if one was found, or null otherwise. |
| 407 | */ |
| 408 | void *treap_lookup(const struct treap *t, const char *k, size_t kn) |
| 409 | { |
| 410 | struct treap_node *n = t->root, *candidate = 0; |
| 411 | |
| 412 | /* This is a simple prototype for some of the search loops we'll encounter |
| 413 | * later. Notice that we use a strict one-sided comparison, rather than |
| 414 | * the more conventional two-sided comparison. |
| 415 | * |
| 416 | * The main loop will find the largest key not greater than K. |
| 417 | */ |
| 418 | while (n) |
| 419 | /* Compare the node's key against our key. If the node is too large, |
| 420 | * then we ignore it and move left. Otherwise remember this node for |
| 421 | * later, and move right to see if we can find a better, larger node. |
| 422 | */ |
| 423 | |
| 424 | if (str_lt(k, kn, n->k, n->kn)) n = n->left; |
| 425 | else { candidate = n; n = n->right; } |
| 426 | |
| 427 | /* If the candidate node is less than our key then we failed. Otherwise, |
| 428 | * by trichotomy, we have found the correct node. |
| 429 | */ |
| 430 | if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0); |
| 431 | return (candidate); |
| 432 | } |
| 433 | |
| 434 | /* Look up the KN-byte K in the treap T, recording a path in P. |
| 435 | * |
| 436 | * This is similar to `treap_lookup', in that it returns the requested node |
| 437 | * if it already exists, or null otherwise, but it also records in P |
| 438 | * information to be used by `treap_insert' to insert a new node with the |
| 439 | * given key if it's not there already. |
| 440 | */ |
| 441 | void *treap_probe(struct treap *t, const char *k, size_t kn, |
| 442 | struct treap_path *p) |
| 443 | { |
| 444 | struct treap_node **nn = &t->root, *candidate = 0; |
| 445 | unsigned i = 0; |
| 446 | |
| 447 | /* This walk is similar to `treap_lookup' above, except that we also record |
| 448 | * the address of each node pointer we visit along the way. |
| 449 | */ |
| 450 | for (;;) { |
| 451 | assert(i < TREAP_PATHMAX); p->path[i++] = nn; |
| 452 | if (!*nn) break; |
| 453 | if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left; |
| 454 | else { candidate = *nn; nn = &(*nn)->right; } |
| 455 | } |
| 456 | p->nsteps = i; |
| 457 | |
| 458 | /* Check to see whether we found the right node. */ |
| 459 | if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0); |
| 460 | return (candidate); |
| 461 | } |
| 462 | |
| 463 | /* Insert a new node N into T, associating it with the KN-byte key K. |
| 464 | * |
| 465 | * Use the path data P, from `treap_probe', to help with insertion. |
| 466 | */ |
| 467 | void treap_insert(struct treap *t, const struct treap_path *p, |
| 468 | struct treap_node *n, const char *k, size_t kn) |
| 469 | { |
| 470 | size_t i = p->nsteps; |
| 471 | struct treap_node **nn, **uu, *u; |
| 472 | unsigned wt; |
| 473 | |
| 474 | /* Fill in the node structure. */ |
| 475 | n->k = xstrndup(k, kn); n->kn = kn; |
| 476 | n->wt = wt = rand(); n->left = n->right = 0; |
| 477 | |
| 478 | /* Prepare for the insertion. |
| 479 | * |
| 480 | * The path actually points to each of the links traversed when searching |
| 481 | * for the node, starting with the `root' pointer, then the `left' or |
| 482 | * `right' pointer of the root node, and so on; `nsteps' will always be |
| 483 | * nonzero, since the path will always pass through the root, and the final |
| 484 | * step, `path->path[path->nsteps - 1]' will always be the address of a |
| 485 | * null pointer onto which the freshly inserted node could be hooked in |
| 486 | * order to satisfy the binary-search-tree ordering. (Of course, this will |
| 487 | * likely /not/ satisfy the heap condition, so more work needs to be done.) |
| 488 | * |
| 489 | * Throughout, NN is our current candidate for where to attach the node N. |
| 490 | * As the loop progresses, NN will ascend to links further up the tree, and |
| 491 | * N will be adjusted to accumulate pieces of the existing tree structure. |
| 492 | * We'll stop when we find that the parent node's weight is larger than our |
| 493 | * new node's weight, at which point we can just set *NN = N; or if we run |
| 494 | * out of steps in the path, in which case *NN is the root pointer. |
| 495 | */ |
| 496 | assert(i); nn = p->path[--i]; |
| 497 | while (i--) { |
| 498 | |
| 499 | /* Collect the next step in the path, and get the pointer to the node. */ |
| 500 | uu = p->path[i]; u = *uu; |
| 501 | |
| 502 | /* If this node's weight is higher, then we've found the right level and |
| 503 | * we can stop. |
| 504 | */ |
| 505 | if (wt <= u->wt) break; |
| 506 | |
| 507 | /* The node U is lighter than our new node N, so we must rotate in order |
| 508 | * to fix things. If we were currently planning to hook N as the left |
| 509 | * subtree of U, then we rotate like this: |
| 510 | * |
| 511 | * | | |
| 512 | * U (N) |
| 513 | * / \ / \ |
| 514 | * (N) Z ---> X U |
| 515 | * / \ / \ |
| 516 | * X Y Y Z |
| 517 | * |
| 518 | * On the other hand, if we ere planning to hook N as the right subtree |
| 519 | * of U, then we do the opposite rotation: |
| 520 | * |
| 521 | * | | |
| 522 | * U (N) |
| 523 | * / \ / \ |
| 524 | * X (N) ---> U Z |
| 525 | * / \ / \ |
| 526 | * Y Z X Y |
| 527 | * |
| 528 | * These transformations clearly preserve the ordering of nodes in the |
| 529 | * binary search tree, and satisfy the heap condition in the subtree |
| 530 | * headed by N. |
| 531 | */ |
| 532 | if (nn == &u->left) { u->left = n->right; n->right = u; } |
| 533 | else { u->right = n->left; n->left = u; } |
| 534 | |
| 535 | /* And this arrangement must be attached to UU, or some higher attachment |
| 536 | * point. The subtree satisfies the heap condition, and can be attached |
| 537 | * safely at the selected place. |
| 538 | */ |
| 539 | nn = uu; |
| 540 | } |
| 541 | |
| 542 | /* We've found the right spot. Hook the accumulated subtree into place. */ |
| 543 | *nn = n; |
| 544 | } |
| 545 | |
| 546 | /* Remove the node with the KN-byte K from T. |
| 547 | * |
| 548 | * Return the address of the node we removed, or null if it couldn't be |
| 549 | * found. |
| 550 | */ |
| 551 | void *treap_remove(struct treap *t, const char *k, size_t kn) |
| 552 | { |
| 553 | struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r; |
| 554 | |
| 555 | /* Search for the matching node, but keep track of the address of the link |
| 556 | * which points to our target node. |
| 557 | */ |
| 558 | while (*nn) |
| 559 | if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left; |
| 560 | else { candidate = nn; nn = &(*nn)->right; } |
| 561 | |
| 562 | /* If this isn't the right node then give up. */ |
| 563 | if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn)) |
| 564 | return (0); |
| 565 | |
| 566 | /* Now we need to disentangle the node from the tree. This is essentially |
| 567 | * the reverse of insertion: we pretend that this node is suddenly very |
| 568 | * light, and mutate the tree so as to restore the heap condition until |
| 569 | * eventually our node is a leaf and can be cut off without trouble. |
| 570 | * |
| 571 | * Throughout, the link *NN notionally points to N, but we don't actually |
| 572 | * update it until we're certain what value it should finally take. |
| 573 | */ |
| 574 | nn = candidate; n = *nn; l = n->left; r = n->right; |
| 575 | for (;;) |
| 576 | |
| 577 | /* If its left subtree is empty then we can replace our node by its right |
| 578 | * subtree and be done. Similarly, if the right subtree is empty then we |
| 579 | * replace the node by its left subtree. |
| 580 | * |
| 581 | * | | | | |
| 582 | * (N) ---> R ; (N) ---> L |
| 583 | * / \ / \ |
| 584 | * * R L * |
| 585 | */ |
| 586 | if (!l) { *nn = r; break; } |
| 587 | else if (!r) { *nn = l; break; } |
| 588 | |
| 589 | /* Otherwise we need to rotate the pointers so that the heavier of the |
| 590 | * two children takes the place of our node; thus we have either |
| 591 | * |
| 592 | * | | |
| 593 | * (N) L |
| 594 | * / \ / \ |
| 595 | * L R ---> X (N) |
| 596 | * / \ / \ |
| 597 | * X Y Y R |
| 598 | * |
| 599 | * or |
| 600 | * |
| 601 | * | | |
| 602 | * (N) R |
| 603 | * / \ / \ |
| 604 | * L R ---> (N) Y |
| 605 | * / \ / \ |
| 606 | * X Y L X |
| 607 | * |
| 608 | * Again, these transformations clearly preserve the ordering of nodes in |
| 609 | * the binary search tree, and the heap condition. |
| 610 | */ |
| 611 | else if (l->wt > r->wt) |
| 612 | { *nn = l; nn = &l->right; l = n->left = l->right; } |
| 613 | else |
| 614 | { *nn = r; nn = &r->left; r = n->right = r->left; } |
| 615 | |
| 616 | /* Release the key buffer, and return the node that we've now detached. */ |
| 617 | free(n->k); return (n); |
| 618 | } |
| 619 | |
| 620 | /* Initialize an iterator I over T's nodes. */ |
| 621 | void treap_start_iter(struct treap *t, struct treap_iter *i) |
| 622 | { |
| 623 | struct treap_node *n = t->root; |
| 624 | unsigned sp = 0; |
| 625 | |
| 626 | /* The `stack' in the iterator structure is an empty ascending stack of |
| 627 | * nodes which have been encountered, and their left subtrees investigated, |
| 628 | * but not yet visited by the iteration. |
| 629 | * |
| 630 | * Iteration begins by stacking the root node, its left child, and so on, |
| 631 | * At the end of this, the topmost entry on the stack is the least node of |
| 632 | * the tree, followed by its parent, grandparent, and so on up to the root. |
| 633 | */ |
| 634 | while (n) { |
| 635 | assert(sp < TREAP_PATHMAX); |
| 636 | i->stack[sp++] = n; n = n->left; |
| 637 | } |
| 638 | i->sp = sp; |
| 639 | } |
| 640 | |
| 641 | /* Return the next node from I, in ascending order by key. |
| 642 | * |
| 643 | * If there are no more nodes, then return null. |
| 644 | */ |
| 645 | void *treap_next(struct treap_iter *i) |
| 646 | { |
| 647 | struct treap_node *n, *o; |
| 648 | unsigned sp = i->sp; |
| 649 | |
| 650 | /* We say that a node is /visited/ once it's been returned by this |
| 651 | * iterator. To traverse a tree in order, then, we traverse its left |
| 652 | * subtree, visit the tree root, and traverse its right subtree -- which is |
| 653 | * a fine recursive definition, but we need a nonrecursive implementation. |
| 654 | * |
| 655 | * As is usual in this kind of essential structural recursion, we maintain |
| 656 | * a stack. The invariant that we'll maintain is as follows. |
| 657 | * |
| 658 | * 1. If the stack is empty, then all nodes have been visited. |
| 659 | * |
| 660 | * 2, If the stack is nonempty then the topmost entry on the stack is the |
| 661 | * least node which has not yet been visited -- and therefore is the |
| 662 | * next node to visit. |
| 663 | * |
| 664 | * 3. The earlier entries in the stack are, in (top to bottom) order, |
| 665 | * those of the topmost node's parent, grandparent, etc., up to the |
| 666 | * root, which have not yet been visited. More specifically, a node |
| 667 | * appears in the stack if and only if some node in its left subtree |
| 668 | * is nearer the top of the stack. |
| 669 | * |
| 670 | * When we initialized the iterator state (in `treap_start_iter' above), we |
| 671 | * traced a path to the leftmost leaf, stacking the root, its left-hand |
| 672 | * child, and so on. The leftmost leaf is clearly the first node to be |
| 673 | * visited, and its entire ancestry is on the stack since none of these |
| 674 | * nodes has yet been visited. (If the tree is empty, then we have done |
| 675 | * nothing, the stack is empty, and there are no nodes to visit.) This |
| 676 | * establishes the base case for the induction. |
| 677 | */ |
| 678 | |
| 679 | /* So, if the stack is empty now, then (1) all of the nodes have been |
| 680 | * visited and there's nothing left to do. Return null. |
| 681 | */ |
| 682 | if (!sp) return (0); |
| 683 | |
| 684 | /* It's clear that, if we pop the topmost element of the stack, visit it, |
| 685 | * and arrange to reestablish the invariant, then we'll visit the nodes in |
| 686 | * the correct order, pretty much by definition. |
| 687 | * |
| 688 | * So, pop a node off the stack. This is the node we shall return. But |
| 689 | * before we can do that, we must reestablish the above invariant. |
| 690 | * Firstly, the current node is removed from the stack, because we're about |
| 691 | * to visit it, and visited nodes don't belong on the stack. Then there |
| 692 | * are two cases to consider. |
| 693 | * |
| 694 | * * If the current node's right subtree is not empty, then the next node |
| 695 | * to be visited is the leftmost node in that subtree. All of the |
| 696 | * nodes on the stack are ancestors of the current node, and the right |
| 697 | * subtree consists of its descendants, so none of them are already on |
| 698 | * the stack; and they're all greater than the current node, and |
| 699 | * therefore haven't been visited. Therefore, we must push the current |
| 700 | * node's right child, its /left/ child, and so on, proceeding |
| 701 | * leftwards until we fall off the bottom of the tree. |
| 702 | * |
| 703 | * * Otherwise, we've finished traversing some subtree. Either we are |
| 704 | * now done, or (3) we have just finished traversing the left subtree |
| 705 | * of the next topmost item on the stack. This must therefore be the |
| 706 | * next node to visit. The rest of the stack is already correct. |
| 707 | */ |
| 708 | n = i->stack[--sp]; |
| 709 | o = n->right; |
| 710 | while (o) { |
| 711 | assert(sp < TREAP_PATHMAX); |
| 712 | i->stack[sp++] = o; o = o->left; |
| 713 | } |
| 714 | i->sp = sp; |
| 715 | return (n); |
| 716 | } |
| 717 | |
| 718 | /* Recursively check the subtree headed by N. |
| 719 | * |
| 720 | * No node should have weight greater than MAXWT, to satisfy the heap |
| 721 | * condition; if LO is not null, then all node keys should be strictly |
| 722 | * greater than LO, and, similarly, if HI is not null, then all keys should |
| 723 | * be strictly smaller than HI. |
| 724 | */ |
| 725 | static void check_subtree(struct treap_node *n, unsigned maxwt, |
| 726 | const char *klo, const char *khi) |
| 727 | { |
| 728 | /* Check the heap condition. */ |
| 729 | assert(n->wt <= maxwt); |
| 730 | |
| 731 | /* Check that the key is in bounds. (Use `strcmp' here to ensure that our |
| 732 | * own `str_lt' is working correctly.) |
| 733 | */ |
| 734 | if (klo) assert(STRCMP(n->k, >, klo)); |
| 735 | if (khi) assert(STRCMP(n->k, <, khi)); |
| 736 | |
| 737 | /* Check the left subtree. Node weights must be bounded above by our own |
| 738 | * weight. And everykey in the left subtree must be smaller than our |
| 739 | * current key. We propagate the lower bound. |
| 740 | */ |
| 741 | if (n->left) check_subtree(n->left, n->wt, klo, n->k); |
| 742 | |
| 743 | /* Finally, check the right subtree. This time, every key must be larger |
| 744 | * than our key, and we propagate the upper bound. |
| 745 | */ |
| 746 | if (n->right) check_subtree(n->right, n->wt, n->k, khi); |
| 747 | } |
| 748 | |
| 749 | /* Check the treap structure rules for T. */ |
| 750 | void treap_check(struct treap *t) |
| 751 | { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); } |
| 752 | |
| 753 | /* Recursively dump the subtree headed by N, indenting the output lines by |
| 754 | * IND spaces. |
| 755 | */ |
| 756 | static void dump_node(struct treap_node *n, int ind) |
| 757 | { |
| 758 | if (n->left) dump_node(n->left, ind + 1); |
| 759 | printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k); |
| 760 | if (n->right) dump_node(n->right, ind + 1); |
| 761 | } |
| 762 | |
| 763 | /* Dump the treap T to standard output, for debugging purposes. */ |
| 764 | void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); } |
| 765 | |
| 766 | /*----- Configuration file parsing ----------------------------------------*/ |
| 767 | |
| 768 | #ifndef DECL_ENVIRON |
| 769 | extern char **environ; |
| 770 | #endif |
| 771 | |
| 772 | /* Advance P past a syntactically valid name, but no further than L. |
| 773 | * |
| 774 | * Return the new pointer. If no name is found, report an error, blaming |
| 775 | * FILE and LINE; WHAT is an adjective for the kind of name that was |
| 776 | * expected. |
| 777 | */ |
| 778 | static const char *scan_name(const char *what, |
| 779 | const char *p, const char *l, |
| 780 | const char *file, unsigned line) |
| 781 | { |
| 782 | const char *q = p; |
| 783 | |
| 784 | while (q < l && |
| 785 | (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' || |
| 786 | *q == '*' || *q == '+' || *q == '%' || *q == '@')) |
| 787 | q++; |
| 788 | if (q == p) lose("%s:%u: expected %s name", file, line, what); |
| 789 | return (q); |
| 790 | } |
| 791 | |
| 792 | /* Initialize the configuration state CONF. |
| 793 | * |
| 794 | * Usually you'd use the static initializer `CONFIG_INIT'. |
| 795 | */ |
| 796 | void config_init(struct config *conf) |
| 797 | { treap_init(&conf->sections); } |
| 798 | |
| 799 | /* Find and return the section with null-terminated NAME in CONF. |
| 800 | * |
| 801 | * If no section is found, the behaviour depends on whether `CF_CREAT' is set |
| 802 | * in F: if so, an empty section is created and returned; otherwise, a null |
| 803 | * pointer is returned. |
| 804 | */ |
| 805 | struct config_section *config_find_section(struct config *conf, unsigned f, |
| 806 | const char *name) |
| 807 | { return (config_find_section_n(conf, f, name, strlen(name))); } |
| 808 | |
| 809 | /* Find and return the section with the SZ-byte NAME in CONF. |
| 810 | * |
| 811 | * This works like `config_find_section', but with an explicit length for the |
| 812 | * NAME rather than null-termination. |
| 813 | */ |
| 814 | struct config_section *config_find_section_n(struct config *conf, unsigned f, |
| 815 | const char *name, size_t sz) |
| 816 | { |
| 817 | struct config_section *sect; |
| 818 | struct treap_path path; |
| 819 | |
| 820 | if (!(f&CF_CREAT)) |
| 821 | sect = treap_lookup(&conf->sections, name, sz); |
| 822 | else { |
| 823 | sect = treap_probe(&conf->sections, name, sz, &path); |
| 824 | if (!sect) { |
| 825 | sect = xmalloc(sizeof(*sect)); |
| 826 | if (!conf->head) conf->tail = &conf->head; |
| 827 | sect->next = 0; *conf->tail = sect; conf->tail = §->next; |
| 828 | sect->parents = 0; sect->nparents = SIZE_MAX; |
| 829 | treap_init(§->vars); treap_init(§->cache); |
| 830 | treap_insert(&conf->sections, &path, §->_node, name, sz); |
| 831 | config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz); |
| 832 | } |
| 833 | } |
| 834 | return (sect); |
| 835 | } |
| 836 | |
| 837 | /* Set the fallback section for CONF to be SECT. |
| 838 | * |
| 839 | * That is, if a section has no explicit parents, then by default it will |
| 840 | * have a single parent which is SECT. If SECT is null then there is no |
| 841 | * fallback section, and sections which don't have explicitly specified |
| 842 | * parents have no parents at all. (This is the default situation.) |
| 843 | */ |
| 844 | void config_set_fallback(struct config *conf, struct config_section *sect) |
| 845 | { conf->fallback = sect; } |
| 846 | |
| 847 | /* Arrange that SECT has PARENT as its single parent section. |
| 848 | * |
| 849 | * If PARENT is null, then arrange that SECT has no parents at all. In |
| 850 | * either case, any `@parents' setting will be ignored. |
| 851 | */ |
| 852 | void config_set_parent(struct config_section *sect, |
| 853 | struct config_section *parent) |
| 854 | { |
| 855 | if (!parent) |
| 856 | sect->nparents = 0; |
| 857 | else { |
| 858 | sect->parents = xmalloc(sizeof(*sect->parents)); |
| 859 | sect->parents[0] = parent; sect->nparents = 1; |
| 860 | } |
| 861 | } |
| 862 | |
| 863 | /* Initialize I to iterate over the sections defined in CONF. */ |
| 864 | void config_start_section_iter(struct config *conf, |
| 865 | struct config_section_iter *i) |
| 866 | { i->sect = conf->head; } |
| 867 | |
| 868 | /* Return the next section from I, in order of creation. |
| 869 | * |
| 870 | * If there are no more sections, then return null. |
| 871 | */ |
| 872 | struct config_section *config_next_section(struct config_section_iter *i) |
| 873 | { |
| 874 | struct config_section *sect; |
| 875 | |
| 876 | sect = i->sect; |
| 877 | if (sect) i->sect = sect->next; |
| 878 | return (sect); |
| 879 | } |
| 880 | |
| 881 | /* Initialize the `parents' links of SECT, if they aren't set up already. |
| 882 | * |
| 883 | * If SECT contains a `@parents' setting then parse it to determine the |
| 884 | * parents; otherwise use CONF's fallbeck section, as established by |
| 885 | * `config_set_fallback'. |
| 886 | */ |
| 887 | static void set_config_section_parents(struct config *conf, |
| 888 | struct config_section *sect) |
| 889 | { |
| 890 | struct config_section *parent; |
| 891 | struct config_var *var; |
| 892 | const char *file; unsigned line; |
| 893 | size_t i, n; |
| 894 | char *p, *q, *l; |
| 895 | struct argv av = ARGV_INIT; |
| 896 | |
| 897 | /* If the section already has parents established then there's nothing to |
| 898 | * do. |
| 899 | */ |
| 900 | if (sect->nparents != SIZE_MAX) return; |
| 901 | |
| 902 | /* Look up `@parents', without recursion! */ |
| 903 | var = treap_lookup(§->vars, "@parents", 8); |
| 904 | if (!var) { |
| 905 | /* No explicit setting: use the fallback setting. */ |
| 906 | |
| 907 | if (!conf->fallback || conf->fallback == sect) |
| 908 | sect->nparents = 0; |
| 909 | else { |
| 910 | sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1; |
| 911 | sect->parents[0] = conf->fallback; |
| 912 | } |
| 913 | } else { |
| 914 | /* Found a `@parents' list: parse it and set the parents list. */ |
| 915 | |
| 916 | file = var->file; line = var->line; if (!file) file = "<internal>"; |
| 917 | |
| 918 | /* We do this in two phases. First, we parse out the section names, and |
| 919 | * record start/limit pointer pairs in `av'. |
| 920 | */ |
| 921 | p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++; |
| 922 | while (*p) { |
| 923 | q = p; |
| 924 | p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line); |
| 925 | argv_append(&av, q); argv_append(&av, p); |
| 926 | while (p < l && ISSPACE(*p)) p++; |
| 927 | if (p >= l) break; |
| 928 | if (*p == ',') do p++; while (ISSPACE(*p)); |
| 929 | } |
| 930 | |
| 931 | /* Now that we've finished parsing, we know how many parents we're going |
| 932 | * to have, so we can allocate the `parents' vector and fill it in. |
| 933 | */ |
| 934 | sect->nparents = av.n/2; |
| 935 | sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents)); |
| 936 | for (i = 0; i < av.n; i += 2) { |
| 937 | n = av.v[i + 1] - av.v[i]; |
| 938 | parent = config_find_section_n(conf, 0, av.v[i], n); |
| 939 | if (!parent) |
| 940 | lose("%s:%u: unknown parent section `%.*s'", |
| 941 | file, line, (int)n, av.v[i]); |
| 942 | sect->parents[i/2] = parent; |
| 943 | } |
| 944 | } |
| 945 | |
| 946 | /* All done. */ |
| 947 | argv_release(&av); |
| 948 | } |
| 949 | |
| 950 | /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT. |
| 951 | * |
| 952 | * If successful, return a pointer to the variable; otherwise return null. |
| 953 | * Inheritance cycles and ambiguous inheritance are diagnosed as fatal |
| 954 | * errors. |
| 955 | */ |
| 956 | struct config_var *search_recursive(struct config *conf, |
| 957 | struct config_section *sect, |
| 958 | const char *name, size_t sz) |
| 959 | { |
| 960 | struct config_cache_entry *cache; |
| 961 | struct treap_path path; |
| 962 | struct config_var *var, *v; |
| 963 | size_t i, j = j; |
| 964 | |
| 965 | /* If the variable is defined locally then we can just return it. */ |
| 966 | var = treap_lookup(§->vars, name, sz); if (var) return (var); |
| 967 | |
| 968 | /* If we have no parents then there's no way we can find it. */ |
| 969 | set_config_section_parents(conf, sect); |
| 970 | if (!sect->parents) return (0); |
| 971 | |
| 972 | /* Otherwise we must visit the section's parents. We can avoid paying for |
| 973 | * this on every lookup by using a cache. If there's already an entry for |
| 974 | * this variable then we can return the result immediately (note that we |
| 975 | * cache both positive and negative outcomes). Otherwise we create a new |
| 976 | * cache entry, do the full recursive search, and fill in the result when |
| 977 | * we're done. |
| 978 | * |
| 979 | * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a |
| 980 | * new cache entry when it's first created, and clear it when we fill in |
| 981 | * the result: if we encounter an open cache entry again, we know that |
| 982 | * we've found a cycle. |
| 983 | */ |
| 984 | cache = treap_probe(§->cache, name, sz, &path); |
| 985 | if (!cache) { |
| 986 | cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN; |
| 987 | treap_insert(§->cache, &path, &cache->_node, name, sz); |
| 988 | } else if (cache->f&CF_OPEN) |
| 989 | lose("inheritance cycle through section `%s'", |
| 990 | CONFIG_SECTION_NAME(sect)); |
| 991 | else |
| 992 | return (cache->var); |
| 993 | |
| 994 | /* Recursively search in each parent. We insist that all parents that find |
| 995 | * a variable find the same binding; otherwise we declare ambiguous |
| 996 | * inheritance. |
| 997 | */ |
| 998 | for (i = 0; i < sect->nparents; i++) { |
| 999 | v = search_recursive(conf, sect->parents[i], name, sz); |
| 1000 | if (!v); |
| 1001 | else if (!var) { var = v; j = i; } |
| 1002 | else if (var != v) |
| 1003 | lose("section `%s' inherits variable `%s' ambiguously " |
| 1004 | "via `%s' and `%s'", |
| 1005 | CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var), |
| 1006 | CONFIG_SECTION_NAME(sect->parents[j]), |
| 1007 | CONFIG_SECTION_NAME(sect->parents[i])); |
| 1008 | } |
| 1009 | |
| 1010 | /* All done: fill the cache entry in, clear the open flag, and return the |
| 1011 | * result. |
| 1012 | */ |
| 1013 | cache->var = var; cache->f &= ~CF_OPEN; |
| 1014 | return (var); |
| 1015 | } |
| 1016 | |
| 1017 | /* Find and return the variable with null-terminated NAME in SECT. |
| 1018 | * |
| 1019 | * If `CF_INHERIT' is set in F, then the function searches the section's |
| 1020 | * parents recursively; otherwise, it only checks to see whether the variable |
| 1021 | * is set directly in SECT. |
| 1022 | * |
| 1023 | * If no variable is found, the behaviour depends on whether `CF_CREAT' is |
| 1024 | * set in F: if so, an empty variable is created and returned; otherwise, a |
| 1025 | * null pointer is returned. |
| 1026 | * |
| 1027 | * Setting both `CF_INHERIT' and `CF_CREAT' is not useful. |
| 1028 | */ |
| 1029 | struct config_var *config_find_var(struct config *conf, |
| 1030 | struct config_section *sect, |
| 1031 | unsigned f, const char *name) |
| 1032 | { return (config_find_var_n(conf, sect, f, name, strlen(name))); } |
| 1033 | |
| 1034 | /* Find and return the variable with the given SZ-byte NAME in SECT. |
| 1035 | * |
| 1036 | * This works like `config_find_var', but with an explicit length for the |
| 1037 | * NAME rather than null-termination. |
| 1038 | */ |
| 1039 | struct config_var *config_find_var_n(struct config *conf, |
| 1040 | struct config_section *sect, |
| 1041 | unsigned f, const char *name, size_t sz) |
| 1042 | { |
| 1043 | struct config_var *var; |
| 1044 | struct treap_path path; |
| 1045 | |
| 1046 | if (f&CF_INHERIT) |
| 1047 | var = search_recursive(conf, sect, name, sz); |
| 1048 | else if (!(f&CF_CREAT)) |
| 1049 | var = treap_lookup(§->vars, name, sz); |
| 1050 | else { |
| 1051 | var = treap_probe(§->vars, name, sz, &path); |
| 1052 | if (!var) { |
| 1053 | var = xmalloc(sizeof(*var)); |
| 1054 | var->val = 0; var->file = 0; var->f = 0; var->line = 1; |
| 1055 | treap_insert(§->vars, &path, &var->_node, name, sz); |
| 1056 | } |
| 1057 | } |
| 1058 | return (var); |
| 1059 | } |
| 1060 | |
| 1061 | /* Set variable NAME to VALUE in SECT, with associated flags F. |
| 1062 | * |
| 1063 | * The names are null-terminated. The flags are variable flags: see `struct |
| 1064 | * config_var' for details. Returns the variable. |
| 1065 | * |
| 1066 | * If the variable is already set and has the `CF_OVERRIDE' flag, then this |
| 1067 | * function does nothing unless `CF_OVERRIDE' is /also/ set in F. |
| 1068 | */ |
| 1069 | struct config_var *config_set_var(struct config *conf, |
| 1070 | struct config_section *sect, |
| 1071 | unsigned f, |
| 1072 | const char *name, const char *value) |
| 1073 | { |
| 1074 | return (config_set_var_n(conf, sect, f, |
| 1075 | name, strlen(name), |
| 1076 | value, strlen(value))); |
| 1077 | } |
| 1078 | |
| 1079 | /* As `config_set_var', except that the variable NAME and VALUE have explicit |
| 1080 | * lengths (NAMELEN and VALUELEN, respectively) rather than being null- |
| 1081 | * terminated. |
| 1082 | */ |
| 1083 | struct config_var *config_set_var_n(struct config *conf, |
| 1084 | struct config_section *sect, |
| 1085 | unsigned f, |
| 1086 | const char *name, size_t namelen, |
| 1087 | const char *value, size_t valuelen) |
| 1088 | { |
| 1089 | struct config_var *var = |
| 1090 | config_find_var_n(conf, sect, CF_CREAT, name, namelen); |
| 1091 | |
| 1092 | if (var->f&~f&CF_OVERRIDE) return (var); |
| 1093 | free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen; |
| 1094 | var->f = f; |
| 1095 | return (var); |
| 1096 | } |
| 1097 | |
| 1098 | /* Initialize I to iterate over the variables directly defined in SECT. */ |
| 1099 | void config_start_var_iter(struct config *conf, struct config_section *sect, |
| 1100 | struct config_var_iter *i) |
| 1101 | { treap_start_iter(§->vars, &i->i); } |
| 1102 | |
| 1103 | /* Return next variable from I, in ascending lexicographical order. |
| 1104 | * |
| 1105 | * If there are no more variables, then return null. |
| 1106 | */ |
| 1107 | struct config_var *config_next_var(struct config_var_iter *i) |
| 1108 | { return (treap_next(&i->i)); } |
| 1109 | |
| 1110 | /* Read and parse configuration FILE, applying its settings to CONF. |
| 1111 | * |
| 1112 | * If all goes well, the function returns 0. If the file is not found, then |
| 1113 | * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the |
| 1114 | * function simply returns -1. Otherwise, a fatal error is reported. Note |
| 1115 | * that this /only/ applies if the file does not exist (specifically, opening |
| 1116 | * it fails with `ENOENT') -- any other problems are reported as fatal |
| 1117 | * errors regardless of the flag setting. |
| 1118 | */ |
| 1119 | int config_read_file(struct config *conf, const char *file, unsigned f) |
| 1120 | { |
| 1121 | struct config_section *sect; |
| 1122 | struct config_var *var; |
| 1123 | struct dstr d = DSTR_INIT, dd = DSTR_INIT; |
| 1124 | unsigned line = 0; |
| 1125 | const char *p, *q, *r; |
| 1126 | FILE *fp; |
| 1127 | |
| 1128 | /* Try to open the file. */ |
| 1129 | fp = fopen(file, "r"); |
| 1130 | if (!fp) { |
| 1131 | if ((f&CF_NOENTOK) && errno == ENOENT) return (-1); |
| 1132 | lose("failed to open configuration file `%s': %s", |
| 1133 | file, strerror(errno)); |
| 1134 | } |
| 1135 | |
| 1136 | /* Find the initial section. */ |
| 1137 | sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0; |
| 1138 | |
| 1139 | /* Work through the file, line by line. */ |
| 1140 | for (;;) { |
| 1141 | dstr_reset(&d); if (dstr_readline(&d, fp)) break; |
| 1142 | line++; |
| 1143 | |
| 1144 | /* Trim trailing spaces from the line. The syntax is sensitive to |
| 1145 | * leading spaces, so we can't trim those yet. |
| 1146 | */ |
| 1147 | while (d.len && ISSPACE(d.p[d.len - 1])) d.len--; |
| 1148 | d.p[d.len] = 0; |
| 1149 | |
| 1150 | if (!*d.p || *d.p == ';') |
| 1151 | /* Ignore comments entirely. (In particular, a comment doesn't |
| 1152 | * interrupt a multiline variable value.) |
| 1153 | */ |
| 1154 | ; |
| 1155 | |
| 1156 | else if (ISSPACE(d.p[0])) { |
| 1157 | /* The line starts with whitespace, so it's a continuation line. */ |
| 1158 | |
| 1159 | /* Skip the initial whitespace. */ |
| 1160 | p = d.p; while (ISSPACE(*p)) p++; |
| 1161 | |
| 1162 | /* If we aren't collecting a variable value then this is an error. |
| 1163 | * Otherwise, accumulate it into the current value. |
| 1164 | */ |
| 1165 | if (!var) |
| 1166 | lose("%s:%u: continuation line, but no variable", file, line); |
| 1167 | if (dd.len) dstr_putc(&dd, ' '); |
| 1168 | dstr_putm(&dd, p, d.len - (p - d.p)); |
| 1169 | |
| 1170 | } else { |
| 1171 | /* The line starts in the first column. */ |
| 1172 | |
| 1173 | /* If there's a value value being collected then we must commit it to |
| 1174 | * its variable (unless there's already a setting there that says we |
| 1175 | * shouldn't). |
| 1176 | */ |
| 1177 | if (var) { |
| 1178 | if (!(var->f&CF_OVERRIDE)) |
| 1179 | { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; } |
| 1180 | var = 0; |
| 1181 | } |
| 1182 | |
| 1183 | /* Now decide what kind of line this is. */ |
| 1184 | if (d.p[0] == '[') { |
| 1185 | /* It's a section header. */ |
| 1186 | |
| 1187 | /* Parse the header. */ |
| 1188 | p = d.p + 1; while (ISSPACE(*p)) p++; |
| 1189 | q = scan_name("section", p, d.p + d.len, file, line); |
| 1190 | r = q; while (ISSPACE(*r)) r++; |
| 1191 | if (*r != ']') |
| 1192 | lose("%s:%u: expected `]' in section header", file, line); |
| 1193 | if (r[1]) |
| 1194 | lose("%s:%u: trailing junk after `]' in section header", |
| 1195 | file, line); |
| 1196 | |
| 1197 | /* Create the new section. */ |
| 1198 | sect = config_find_section_n(conf, CF_CREAT, p, q - p); |
| 1199 | |
| 1200 | } else { |
| 1201 | /* It's a variable assignment. Parse the name out. */ |
| 1202 | p = scan_name("variable", d.p, d.p + d.len, file, line); |
| 1203 | var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p); |
| 1204 | while (ISSPACE(*p)) p++; |
| 1205 | if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line); |
| 1206 | p++; while (ISSPACE(*p)) p++; |
| 1207 | |
| 1208 | /* Clear out the variable's initial value, unless we shouldn't |
| 1209 | * override it. |
| 1210 | */ |
| 1211 | if (!(var->f&CF_OVERRIDE)) { |
| 1212 | free(var->val); var->val = 0; var->f = 0; |
| 1213 | free(var->file); var->file = xstrdup(file); var->line = line; |
| 1214 | } |
| 1215 | dstr_reset(&dd); dstr_puts(&dd, p); |
| 1216 | } |
| 1217 | } |
| 1218 | } |
| 1219 | |
| 1220 | /* If there's a value under construction then commit the result. */ |
| 1221 | if (var && !(var->f&CF_OVERRIDE)) |
| 1222 | { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; } |
| 1223 | |
| 1224 | /* Close the file. */ |
| 1225 | if (fclose(fp)) |
| 1226 | lose("error reading configuration file `%s': %s", file, strerror(errno)); |
| 1227 | |
| 1228 | /* All done. */ |
| 1229 | dstr_release(&d); dstr_release(&dd); |
| 1230 | return (0); |
| 1231 | } |
| 1232 | |
| 1233 | /* Populate SECT with environment variables. |
| 1234 | * |
| 1235 | * Environment variables are always set with `CF_LITERAL'. |
| 1236 | */ |
| 1237 | void config_read_env(struct config *conf, struct config_section *sect) |
| 1238 | { |
| 1239 | const char *p, *v; |
| 1240 | size_t i; |
| 1241 | |
| 1242 | for (i = 0; (p = environ[i]) != 0; i++) { |
| 1243 | v = strchr(p, '='); if (!v) continue; |
| 1244 | config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1)); |
| 1245 | } |
| 1246 | } |
| 1247 | |
| 1248 | /*----- Substitution and quoting ------------------------------------------*/ |
| 1249 | |
| 1250 | /* The substitution and word-splitting state. |
| 1251 | * |
| 1252 | * This only keeps track of the immutable parameters for the substitution |
| 1253 | * task: stuff which changes (flags, filtering state, cursor position) is |
| 1254 | * maintained separately. |
| 1255 | */ |
| 1256 | struct subst { |
| 1257 | struct config *config; /* configuration state */ |
| 1258 | struct config_section *home; /* home section for lookups */ |
| 1259 | struct dstr *d; /* current word being constructed */ |
| 1260 | struct argv *av; /* output word list */ |
| 1261 | }; |
| 1262 | |
| 1263 | /* Flags for `subst' and related functions. */ |
| 1264 | #define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */ |
| 1265 | #define SF_QUOT 0x0002u /* currently within double quotes */ |
| 1266 | #define SF_SUBST 0x0004u /* apply `$-substitutions */ |
| 1267 | #define SF_SUBEXPR 0x0008u /* stop at delimiter `|' or `}' */ |
| 1268 | #define SF_SPANMASK 0x00ffu /* mask for the above */ |
| 1269 | |
| 1270 | #define SF_WORD 0x0100u /* output word under construction */ |
| 1271 | #define SF_SKIP 0x0200u /* not producing output */ |
| 1272 | #define SF_LITERAL 0x0400u /* do not expand or substitute */ |
| 1273 | #define SF_UPCASE 0x0800u /* convert to uppercase */ |
| 1274 | #define SF_DOWNCASE 0x1000u /* convert to lowercase */ |
| 1275 | #define SF_CASEMASK 0x1800u /* mask for case conversions */ |
| 1276 | |
| 1277 | /* Apply filters encoded in QFILT and F to the text from P to L, and output. |
| 1278 | * |
| 1279 | * SB is the substitution state which, in particular, explains where the |
| 1280 | * output should go. |
| 1281 | * |
| 1282 | * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case |
| 1283 | * conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is |
| 1284 | * encoded as the number of toothpicks to print: see `subst' for how this |
| 1285 | * determined.) |
| 1286 | */ |
| 1287 | static void filter_string(const char *p, const char *l, |
| 1288 | const struct subst *sb, unsigned qfilt, unsigned f) |
| 1289 | { |
| 1290 | size_t r, n; |
| 1291 | char *q; const char *pp, *ll; |
| 1292 | |
| 1293 | if (!qfilt && !(f&SF_CASEMASK)) |
| 1294 | /* Fast path: there's nothing to do: just write to the output. */ |
| 1295 | dstr_putm(sb->d, p, l - p); |
| 1296 | |
| 1297 | else for (;;) { |
| 1298 | /* We must be a bit more circumspect. */ |
| 1299 | |
| 1300 | /* Determine the length of the next span of characters which don't need |
| 1301 | * escaping. (If QFILT is zero then this is everything.) |
| 1302 | */ |
| 1303 | r = l - p; n = qfilt ? strcspn(p, "\"\\") : r; |
| 1304 | if (n > r) n = r; |
| 1305 | |
| 1306 | if (!(f&SF_CASEMASK)) |
| 1307 | /* No case conversion: we can just emit this chunk. */ |
| 1308 | |
| 1309 | dstr_putm(sb->d, p, n); |
| 1310 | |
| 1311 | else { |
| 1312 | /* Case conversion to do. Arrange enough space for the output, and |
| 1313 | * convert it character by character. |
| 1314 | */ |
| 1315 | |
| 1316 | dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n; |
| 1317 | if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++); |
| 1318 | else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++); |
| 1319 | sb->d->len += n; |
| 1320 | } |
| 1321 | |
| 1322 | /* If we've reached the end then stop. */ |
| 1323 | if (n >= r) break; |
| 1324 | |
| 1325 | /* Otherwise we must have found a character which requires escaping. |
| 1326 | * Emit enough toothpicks. |
| 1327 | */ |
| 1328 | dstr_putcn(sb->d, '\\', qfilt); |
| 1329 | |
| 1330 | /* This character is now done, so we can skip over and see if there's |
| 1331 | * another chunk of stuff we can do at high speed. |
| 1332 | */ |
| 1333 | dstr_putc(sb->d, p[n]); p += n + 1; |
| 1334 | } |
| 1335 | } |
| 1336 | |
| 1337 | /* Scan and resolve a `[SECT:]VAR' specifier at P. |
| 1338 | * |
| 1339 | * Return the address of the next character following the specifier. L is a |
| 1340 | * limit on the region of the buffer that we should process; SB is the |
| 1341 | * substitution state which provides the home section if none is given |
| 1342 | * explicitly; FILE and LINE are the source location to blame for problems. |
| 1343 | */ |
| 1344 | static const char *retrieve_varspec(const char *p, const char *l, |
| 1345 | const struct subst *sb, |
| 1346 | struct config_var **var_out, |
| 1347 | const char *file, unsigned line) |
| 1348 | { |
| 1349 | struct config_section *sect = sb->home; |
| 1350 | const char *t; |
| 1351 | |
| 1352 | t = scan_name("section or variable", p, l, file, line); |
| 1353 | if (t < l && *t == ':') { |
| 1354 | sect = config_find_section_n(sb->config, 0, p, t - p); |
| 1355 | p = t + 1; t = scan_name("variable", p, l, file, line); |
| 1356 | } |
| 1357 | |
| 1358 | if (!sect) *var_out = 0; |
| 1359 | else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p); |
| 1360 | return (t); |
| 1361 | } |
| 1362 | |
| 1363 | /* Substitute and/or word-split text. |
| 1364 | * |
| 1365 | * The input text starts at P, and continues to (just before) L. Context for |
| 1366 | * the task is provided by SB; the source location to blame is FILE and LINE |
| 1367 | * (FILE may be null so that this can be passed directly from a `config_var' |
| 1368 | * without further checking); QFILT is the nesting depth in toothpick- |
| 1369 | * escaping; and F holds a mask of `SF_...' flags. |
| 1370 | */ |
| 1371 | static const char *subst(const char *p, const char *l, |
| 1372 | const struct subst *sb, |
| 1373 | const char *file, unsigned line, |
| 1374 | unsigned qfilt, unsigned f) |
| 1375 | { |
| 1376 | struct config_var *var; |
| 1377 | const char *q0, *q1, *t; |
| 1378 | unsigned subqfilt, ff; |
| 1379 | size_t n; |
| 1380 | |
| 1381 | /* It would be best if we could process literal text at high speed. To |
| 1382 | * this end, we have a table, indexed by the low-order bits of F, to tell |
| 1383 | * us which special characters we need to stop at. This way, we can use |
| 1384 | * `strcspn' to skip over literal text and stop at the next character which |
| 1385 | * needs special handling. Entries in this table with a null pointer |
| 1386 | * correspond to impossible flag settings. |
| 1387 | */ |
| 1388 | static const char *const delimtab[] = { |
| 1389 | |
| 1390 | #define ESCAPE "\\" /* always watch for `\'-escapes */ |
| 1391 | #define SUBST "$" /* check for `$' if `SF_SUBST' set */ |
| 1392 | #define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but |
| 1393 | * not `SF_QUOT' */ |
| 1394 | #define QUOT "\"" /* only quotes if `SF_SPLIT' and |
| 1395 | * `SF_QUOT' */ |
| 1396 | #define DELIM "|}" /* end delimiters of `SF_SUBEXPR' */ |
| 1397 | |
| 1398 | ESCAPE, |
| 1399 | ESCAPE WORDSEP, |
| 1400 | 0, |
| 1401 | ESCAPE QUOT, |
| 1402 | ESCAPE SUBST, |
| 1403 | ESCAPE SUBST WORDSEP, |
| 1404 | 0, |
| 1405 | ESCAPE SUBST QUOT, |
| 1406 | ESCAPE DELIM, |
| 1407 | ESCAPE DELIM WORDSEP, |
| 1408 | 0, |
| 1409 | ESCAPE DELIM QUOT, |
| 1410 | ESCAPE DELIM SUBST, |
| 1411 | ESCAPE DELIM SUBST WORDSEP, |
| 1412 | 0, |
| 1413 | ESCAPE DELIM SUBST QUOT |
| 1414 | |
| 1415 | #undef COMMON |
| 1416 | #undef WORDSEP |
| 1417 | #undef SQUOT |
| 1418 | #undef DELIM |
| 1419 | }; |
| 1420 | |
| 1421 | /* Set FILE to be useful if it was null on entry. */ |
| 1422 | if (!file) file = "<internal>"; |
| 1423 | |
| 1424 | /* If the text is literal then hand off to `filter_string'. This obviously |
| 1425 | * starts a word. |
| 1426 | */ |
| 1427 | if (f&SF_LITERAL) { |
| 1428 | filter_string(p, l, sb, qfilt, f); |
| 1429 | f |= SF_WORD; |
| 1430 | goto done; |
| 1431 | } |
| 1432 | |
| 1433 | /* Chew through the input until it's all gone. */ |
| 1434 | while (p < l) { |
| 1435 | |
| 1436 | if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) { |
| 1437 | /* This is whitespace, we're supposed to split, and we're not within |
| 1438 | * quotes, so we should split here. |
| 1439 | */ |
| 1440 | |
| 1441 | /* If there's a word in progress then we should commit it. */ |
| 1442 | if (f&SF_WORD) { |
| 1443 | if (!(f&SF_SKIP)) { |
| 1444 | argv_append(sb->av, xstrndup(sb->d->p, sb->d->len)); |
| 1445 | dstr_reset(sb->d); |
| 1446 | } |
| 1447 | f &= ~SF_WORD; |
| 1448 | } |
| 1449 | |
| 1450 | /* Skip over further whitespace at high speed. */ |
| 1451 | do p++; while (p < l && ISSPACE(*p)); |
| 1452 | |
| 1453 | } else if (*p == '\\') { |
| 1454 | /* This is a toothpick, so start a new word and add the next character |
| 1455 | * to it. |
| 1456 | */ |
| 1457 | |
| 1458 | /* If there's no next charact3er then we should be upset. */ |
| 1459 | p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line); |
| 1460 | |
| 1461 | if (!(f&SF_SKIP)) { |
| 1462 | |
| 1463 | /* If this is a double quote or backslash then check DFLT to see if |
| 1464 | * it needs escaping. |
| 1465 | */ |
| 1466 | if (qfilt && (*p == '"' || *p == '\\')) |
| 1467 | dstr_putcn(sb->d, '\\', qfilt); |
| 1468 | |
| 1469 | /* Output the character. */ |
| 1470 | if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p)); |
| 1471 | else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p)); |
| 1472 | else dstr_putc(sb->d, *p); |
| 1473 | } |
| 1474 | |
| 1475 | /* Move past the escaped character. Remember we started a word. */ |
| 1476 | p++; f |= SF_WORD; |
| 1477 | |
| 1478 | } else if ((f&SF_SPLIT) && *p == '"') { |
| 1479 | /* This is a double quote, and we're word splitting. We're definitely |
| 1480 | * in a word now. Toggle whether we're within quotes. |
| 1481 | */ |
| 1482 | |
| 1483 | f ^= SF_QUOT; f |= SF_WORD; p++; |
| 1484 | |
| 1485 | } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') { |
| 1486 | /* This is a single quote, and we're word splitting but not within |
| 1487 | * double quotes. Find the matching end quote, and just output |
| 1488 | * everything between literally. |
| 1489 | */ |
| 1490 | |
| 1491 | p++; t = strchr(p, '\''); |
| 1492 | if (!t || t >= l) lose("%s:%u: missing `''", file, line); |
| 1493 | if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f); |
| 1494 | p = t + 1; f |= SF_WORD; |
| 1495 | |
| 1496 | } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) { |
| 1497 | /* This is an end delimiter, and we're supposed to stop here. */ |
| 1498 | break; |
| 1499 | |
| 1500 | } else if ((f&SF_SUBST) && *p == '$') { |
| 1501 | /* This is a `$' and we're supposed to do substitution. */ |
| 1502 | |
| 1503 | /* The kind of substitution is determined by the next character. */ |
| 1504 | p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line); |
| 1505 | |
| 1506 | /* Prepare flags for a recursive substitution. |
| 1507 | * |
| 1508 | * Hide our quote state from the recursive call. If we're within a |
| 1509 | * word, then disable word-splitting. |
| 1510 | */ |
| 1511 | ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0)); |
| 1512 | |
| 1513 | /* Now dispatch based on the following character. */ |
| 1514 | switch (*p) { |
| 1515 | |
| 1516 | case '?': |
| 1517 | /* A conditional expression: $?VAR{CONSEQ[|ALT]} */ |
| 1518 | |
| 1519 | /* Skip initial space. */ |
| 1520 | p++; while (p < l && ISSPACE(*p)) p++; |
| 1521 | |
| 1522 | /* Find the variable. */ |
| 1523 | p = retrieve_varspec(p, l, sb, &var, file, line); |
| 1524 | |
| 1525 | /* Skip whitespace again. */ |
| 1526 | while (p < l && ISSPACE(*p)) p++; |
| 1527 | |
| 1528 | /* Expect the opening `{'. */ |
| 1529 | if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line); |
| 1530 | p++; |
| 1531 | |
| 1532 | /* We'll process the parts recursively, but we need to come back |
| 1533 | * when we hit the appropriate delimiters, so arrange for that. |
| 1534 | */ |
| 1535 | ff |= SF_SUBEXPR; |
| 1536 | |
| 1537 | /* Process the consequent (skip if the variable wasn't found). */ |
| 1538 | p = subst(p, l, sb, file, line, qfilt, |
| 1539 | ff | (var ? 0 : SF_SKIP)); |
| 1540 | |
| 1541 | /* If there's a `|' then process the alternative too (skip if the |
| 1542 | * variable /was/ found). |
| 1543 | */ |
| 1544 | if (p < l && *p == '|') |
| 1545 | p = subst(p + 1, l, sb, file, line, qfilt, |
| 1546 | ff | (var ? SF_SKIP : 0)); |
| 1547 | |
| 1548 | /* We should now be past the closing `}'. */ |
| 1549 | if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line); |
| 1550 | p++; |
| 1551 | break; |
| 1552 | |
| 1553 | case '{': |
| 1554 | /* A variable substitution: ${VAR[|FILT]...[?ALT]} */ |
| 1555 | |
| 1556 | /* Skip initial whitespace. */ |
| 1557 | p++; while (p < l && ISSPACE(*p)) p++; |
| 1558 | |
| 1559 | /* Find the variable. */ |
| 1560 | q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p; |
| 1561 | |
| 1562 | /* Determine the filters to apply when substituting the variable |
| 1563 | * value. |
| 1564 | */ |
| 1565 | subqfilt = qfilt; |
| 1566 | for (;;) { |
| 1567 | |
| 1568 | /* Skip spaces again. */ |
| 1569 | while (p < l && ISSPACE(*p)) p++; |
| 1570 | |
| 1571 | /* If there's no `|' then there are no more filters, so stop. */ |
| 1572 | if (p >= l || *p != '|') break; |
| 1573 | |
| 1574 | /* Skip the `|' and more spaces. */ |
| 1575 | p++; while (p < l && ISSPACE(*p)) p++; |
| 1576 | |
| 1577 | /* Collect the filter name. */ |
| 1578 | t = scan_name("filter", p, l, file, line); |
| 1579 | |
| 1580 | /* Dispatch on the filter name. */ |
| 1581 | if (t - p == 1 && *p == 'q') |
| 1582 | /* `q' -- quote for Lisp string. |
| 1583 | * |
| 1584 | * We're currently adding Q `\' characters before each naughty |
| 1585 | * character. But a backslash itself is naughty too, so that |
| 1586 | * makes Q + 1 naughty characters, each of which needs a |
| 1587 | * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks. |
| 1588 | * |
| 1589 | * Calculate this here rather than at each point toothpicks |
| 1590 | * needs to be deployed. |
| 1591 | */ |
| 1592 | |
| 1593 | subqfilt = 2*subqfilt + 1; |
| 1594 | |
| 1595 | else if (t - p == 1 && *p == 'l') |
| 1596 | /* `u' -- convert to uppercase. |
| 1597 | * |
| 1598 | * If a case conversion is already set, then that will override |
| 1599 | * whatever we do here, so don't bother. |
| 1600 | */ |
| 1601 | |
| 1602 | { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; } |
| 1603 | |
| 1604 | else if (t - p == 1 && *p == 'u') |
| 1605 | /* `u' -- convert to uppercase. |
| 1606 | * |
| 1607 | * If a case conversion is already set, then that will override |
| 1608 | * whatever we do here, so don't bother. |
| 1609 | */ |
| 1610 | { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; } |
| 1611 | |
| 1612 | else |
| 1613 | /* Something else we didn't understand. */ |
| 1614 | lose("%s:%u: unknown filter `%.*s'", |
| 1615 | file, line, (int)(t - p), p); |
| 1616 | |
| 1617 | /* Continue from after the filter name. */ |
| 1618 | p = t; |
| 1619 | } |
| 1620 | |
| 1621 | /* If we're not skipping, and we found a variable, then substitute |
| 1622 | * its value. This is the point where we need to be careful about |
| 1623 | * recursive expansion. |
| 1624 | */ |
| 1625 | if (!(f&SF_SKIP) && var) { |
| 1626 | if (var->f&CF_EXPAND) |
| 1627 | lose("%s:%u: recursive expansion of variable `%.*s'", |
| 1628 | file, line, (int)(q1 - q0), q0); |
| 1629 | var->f |= CF_EXPAND; |
| 1630 | subst(var->val, var->val + var->n, sb, |
| 1631 | var->file, var->line, subqfilt, |
| 1632 | ff | (var->f&CF_LITERAL ? SF_LITERAL : 0)); |
| 1633 | var->f &= ~CF_EXPAND; |
| 1634 | } |
| 1635 | |
| 1636 | /* If there's an alternative, then we need to process (or maybe |
| 1637 | * skip) it. Otherwise, we should complain if there was no |
| 1638 | * veriable, and we're not skipping. |
| 1639 | */ |
| 1640 | if (p < l && *p == '?') |
| 1641 | p = subst(p + 1, l, sb, file, line, subqfilt, |
| 1642 | ff | SF_SUBEXPR | (var ? SF_SKIP : 0)); |
| 1643 | else if (!var && !(f&SF_SKIP)) |
| 1644 | lose("%s:%u: unknown variable `%.*s'", |
| 1645 | file, line, (int)(q1 - q0), q0); |
| 1646 | |
| 1647 | /* Expect a `}' here. (No need to skip spaces: we already did that |
| 1648 | * after scanning for filters, and either there was no alternative, |
| 1649 | * or we advanced to a delimiter character anyway.) |
| 1650 | */ |
| 1651 | if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line); |
| 1652 | p++; |
| 1653 | break; |
| 1654 | |
| 1655 | default: |
| 1656 | /* Something else. That's a shame. */ |
| 1657 | lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p); |
| 1658 | } |
| 1659 | |
| 1660 | /* Complain if we started out in word-splitting state, and therefore |
| 1661 | * have added a whole number of words to the output, but there's a |
| 1662 | * word-fragment stuck onto the end of this substitution. |
| 1663 | */ |
| 1664 | if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) && |
| 1665 | !((f&SF_SUBEXPR) && (*p == '|' || *p == '}'))) |
| 1666 | lose("%s:%u: surprising word boundary " |
| 1667 | "after splicing substitution", |
| 1668 | file, line); |
| 1669 | } |
| 1670 | |
| 1671 | else { |
| 1672 | /* Something else. Try to skip over this at high speed. |
| 1673 | * |
| 1674 | * This makes use of the table we set up earlier. |
| 1675 | */ |
| 1676 | |
| 1677 | n = strcspn(p, delimtab[f&SF_SPANMASK]); |
| 1678 | if (n > l - p) n = l - p; |
| 1679 | if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f); |
| 1680 | p += n; f |= SF_WORD; |
| 1681 | } |
| 1682 | } |
| 1683 | |
| 1684 | done: |
| 1685 | /* Sort out the wreckage. */ |
| 1686 | |
| 1687 | /* If we're still within quotes then something has gone wrong. */ |
| 1688 | if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line); |
| 1689 | |
| 1690 | /* If we're within a word, and should be splitting, then commit the word to |
| 1691 | * the output list. |
| 1692 | */ |
| 1693 | if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) { |
| 1694 | argv_append(sb->av, xstrndup(sb->d->p, sb->d->len)); |
| 1695 | dstr_reset(sb->d); |
| 1696 | } |
| 1697 | |
| 1698 | /* And, with that, we're done. */ |
| 1699 | return (p); |
| 1700 | } |
| 1701 | |
| 1702 | /* Expand substitutions in a string. |
| 1703 | * |
| 1704 | * Expand the null-terminated string P relative to the HOME section, using |
| 1705 | * configuration CONFIG, and appending the result to dynamic string D. Blame |
| 1706 | * WHAT in any error messages. |
| 1707 | */ |
| 1708 | void config_subst_string(struct config *config, struct config_section *home, |
| 1709 | const char *what, const char *p, struct dstr *d) |
| 1710 | { |
| 1711 | struct subst sb; |
| 1712 | |
| 1713 | sb.config = config; sb.home = home; sb.d = d; |
| 1714 | subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST); |
| 1715 | dstr_putz(d); |
| 1716 | } |
| 1717 | |
| 1718 | /* Expand substitutions in a string. |
| 1719 | * |
| 1720 | * Expand the null-terminated string P relative to the HOME section, using |
| 1721 | * configuration CONFIG, returning the result as a freshly malloc(3)ed |
| 1722 | * string. Blame WHAT in any error messages. |
| 1723 | */ |
| 1724 | char *config_subst_string_alloc(struct config *config, |
| 1725 | struct config_section *home, |
| 1726 | const char *what, const char *p) |
| 1727 | { |
| 1728 | struct dstr d = DSTR_INIT; |
| 1729 | char *q; |
| 1730 | |
| 1731 | config_subst_string(config, home, what, p, &d); |
| 1732 | q = xstrndup(d.p, d.len); dstr_release(&d); return (q); |
| 1733 | } |
| 1734 | |
| 1735 | /* Expand substitutions in a variable. |
| 1736 | * |
| 1737 | * Expand the value of the variable VAR relative to the HOME section, using |
| 1738 | * configuration CONFIG, appending the result to dynamic string D. |
| 1739 | */ |
| 1740 | void config_subst_var(struct config *config, struct config_section *home, |
| 1741 | struct config_var *var, struct dstr *d) |
| 1742 | { |
| 1743 | struct subst sb; |
| 1744 | |
| 1745 | sb.config = config; sb.home = home; sb.d = d; |
| 1746 | var->f |= CF_EXPAND; |
| 1747 | subst(var->val, var->val + var->n, &sb, var->file, var->line, 0, |
| 1748 | SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0)); |
| 1749 | var->f &= ~CF_EXPAND; |
| 1750 | dstr_putz(d); |
| 1751 | } |
| 1752 | |
| 1753 | /* Expand substitutions in a variable. |
| 1754 | * |
| 1755 | * Expand the value of the variable VAR relative to the HOME section, using |
| 1756 | * configuration CONFIG, returning the result as a freshly malloc(3)ed |
| 1757 | * string. |
| 1758 | */ |
| 1759 | char *config_subst_var_alloc(struct config *config, |
| 1760 | struct config_section *home, |
| 1761 | struct config_var *var) |
| 1762 | { |
| 1763 | struct dstr d = DSTR_INIT; |
| 1764 | char *q; |
| 1765 | |
| 1766 | config_subst_var(config, home, var, &d); |
| 1767 | q = xstrndup(d.p, d.len); dstr_release(&d); return (q); |
| 1768 | } |
| 1769 | |
| 1770 | /* Expand substitutions in a variable and split into words. |
| 1771 | * |
| 1772 | * Expand and word-split the value of the variable VAR relative to the HOME |
| 1773 | * section, using configuration CONFIG, appending the resulting words into |
| 1774 | * the vector AV. |
| 1775 | */ |
| 1776 | void config_subst_split_var(struct config *config, |
| 1777 | struct config_section *home, |
| 1778 | struct config_var *var, struct argv *av) |
| 1779 | { |
| 1780 | struct dstr d = DSTR_INIT; |
| 1781 | struct subst sb; |
| 1782 | |
| 1783 | sb.config = config; sb.home = home; sb.av = av; sb.d = &d; |
| 1784 | var->f |= CF_EXPAND; |
| 1785 | subst(var->val, var->val + var->n, &sb, var->file, var->line, 0, |
| 1786 | SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0)); |
| 1787 | var->f &= ~CF_EXPAND; |
| 1788 | dstr_release(&d); |
| 1789 | } |
| 1790 | |
| 1791 | /*----- That's all, folks -------------------------------------------------*/ |