chiark - git - mdw - runlisp/blob - dump-runlisp-image.c

   1 /* -*-c-*-
   2  *
   3  * Dump custom Lisp images for faster script execution
   4  *
   5  * (c) 2020 Mark Wooding
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
  11  *
  12  * Runlisp is free software: you can redistribute it and/or modify it
  13  * under the terms of the GNU General Public License as published by the
  14  * Free Software Foundation; either version 3 of the License, or (at your
  15  * option) any later version.
  16  *
  17  * Runlisp is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  20  * for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with Runlisp.  If not, see <https://www.gnu.org/licenses/>.
  24  */
  25
  26 /*----- Header files ------------------------------------------------------*/
  27
  28 #include "config.h"
  29
  30 #include <assert.h>
  31 #include <ctype.h>
  32 #include <errno.h>
  33 #include <signal.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <time.h>
  38
  39 #include <dirent.h>
  40 #include <fcntl.h>
  41 #include <unistd.h>
  42
  43 #include <sys/select.h>
  44 #include <sys/stat.h>
  45 #include <sys/time.h>
  46 #include <sys/uio.h>
  47 #include <sys/wait.h>
  48
  49 #include "common.h"
  50 #include "lib.h"
  51 #include "mdwopt.h"
  52
  53 /*----- Static data -------------------------------------------------------*/
  54
  55 /* The state required to break an output stream from a subprocess into lines
  56  * so we can prefix them appropriately.  Once our process starts, the `buf'
  57  * points to a buffer of `MAXLINE' bytes.  This is arranged as a circular
  58  * buffer, containing `len' bytes starting at offset `off', and wrapping
  59  * around to the start of the buffer if it runs off the end.
  60  *
  61  * The descriptor `fd' is reset to -1 after it's seen end-of-file.
  62  */
  63 struct linebuf {
  64   int fd;                               /* our file descriptor (or -1) */
  65   char *buf;                            /* line buffer, or null */
  66   unsigned off, len;                    /* offset */
  67 };
  68 #define MAXLINE 16384u                  /* maximum acceptable line length */
  69
  70 /* Job-state constants. */
  71 enum {
  72   JST_READY,                            /* not yet started */
  73   JST_RUN,                              /* currently running */
  74   JST_DEAD,                             /* process exited */
  75   JST_NSTATE
  76 };
  77
  78 /* The state associated with an image-dumping job. */
  79 struct job {
  80   struct treap_node _node;              /* treap intrusion */
  81   struct job *next;                     /* next job in whichever list */
  82   struct argv av;                       /* argument vector to execute */
  83   char *imgnew, *imgout;                /* staging and final output files */
  84   unsigned st;                          /* job state (`JST_...') */
  85   FILE *log;                            /* log output file (`stdout'?) */
  86   pid_t kid;                            /* process id of child (or -1) */
  87   int exit;                             /* exit status from child */
  88   struct linebuf out, err;              /* line buffers for stdout, stderr */
  89 };
  90 #define JOB_NAME(job) TREAP_NODE_KEY(job)
  91 #define JOB_NAMELEN(job) TREAP_NODE_KEYLEN(job)
  92
  93 static struct treap jobs = TREAP_INIT;  /* Lisp systems scheduled to dump */
  94 static struct job *job_ready, *job_run, *job_dead; /* list jobs by state */
  95 static unsigned nrun, maxrun = 1;       /* running and maximum job counts */
  96 static int rc = 0;                      /* code that we should return */
  97 static int nullfd;                      /* file descriptor for `/dev/null' */
  98 static const char *tmpdir;              /* temporary directory path */
  99
 100 static int sig_pipe[2] = { -1, -1 };    /* pipe for reporting signals */
 101 static sigset_t caught, pending;        /* signals we catch; have caught */
 102 static int sigloss = -1;                /* signal that caused us to lose */
 103
 104 static unsigned flags = 0;              /* flags for the application */
 105 #define AF_BOGUS 0x0001u                /*   invalid comand-line syntax */
 106 #define AF_SETCONF 0x0002u              /*   explicit configuration */
 107 #define AF_DRYRUN 0x0004u               /*   don't actually do it */
 108 #define AF_ALL 0x0008u                  /*   dump all known Lisps */
 109 #define AF_FORCE 0x0010u                /*   dump even if images exist */
 110 #define AF_CHECKINST 0x0020u            /*   check Lisp exists before dump */
 111
 112 /*----- Miscellany --------------------------------------------------------*/
 113
 114 /* Report a (printf(3)-style) message MSG, and remember to fail later. */
 115 static PRINTF_LIKE(1, 2) void bad(const char *msg, ...)
 116   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); rc = 127; }
 117
 118 /*----- File utilities ----------------------------------------------------*/
 119
 120 /* Main recursive subroutine for `recursive_delete'.
 121  *
 122  * The string DD currently contains the pathname of a directory, without a
 123  * trailing `/' (though there is /space/ for a terminating zero or whatever).
 124  * Recursively delete all of the files and directories within it.  Appending
 125  * further text to DD is OK, but clobbering the characters which are there
 126  * already isn't allowed.
 127  */
 128 static void recursive_delete_(struct dstr *dd)
 129 {
 130   DIR *dir;
 131   struct dirent *d;
 132   size_t n = dd->len;
 133
 134   /* Open the directory. */
 135   dd->p[n] = 0; dir = opendir(dd->p);
 136   if (!dir)
 137     lose("failed to open directory `%s' for cleanup: %s",
 138          dd->p, strerror(errno));
 139
 140   /* We'll need to build pathnames for the files inside the directory, so add
 141    * the separating `/' character.  Remember the length of this prefix
 142    * because this is the point we'll be rewinding to for each filename we
 143    * find.
 144    */
 145   dd->p[n++] = '/';
 146
 147   /* Now go through each file in turn. */
 148   for (;;) {
 149
 150     /* Get a filename.  If we've run out then we're done.  Skip the special
 151      * `.' and `..' entries.
 152      */
 153     d = readdir(dir); if (!d) break;
 154     if (d->d_name[0] == '.' && (!d->d_name[1] ||
 155                                 (d->d_name[1] == '.' && !d->d_name[2])))
 156       continue;
 157
 158     /* Rewind the string offset and append the new filename. */
 159     dd->len = n; dstr_puts(dd, d->d_name);
 160
 161     /* Try to delete it the usual way.  If it was actually a directory then
 162      * recursively delete it instead.  (We could lstat(2) it first, but this
 163      * should be at least as quick to identify a directory, and it'll save a
 164      * lstat(2) call in the (common) case that it's not a directory.
 165      */
 166     if (!unlink(dd->p));
 167     else if (errno == EISDIR) recursive_delete_(dd);
 168     else lose("failed to delete file `%s': %s", dd->p, strerror(errno));
 169   }
 170
 171   /* We're done.  Try to delete the directory.  (It's possible that there was
 172    * some problem with enumerating the directory, but we'll ignore that: if
 173    * it matters then the directory won't be empty and the rmdir(2) will
 174    * fail.)
 175    */
 176   closedir(dir);
 177   dd->p[--n] = 0;
 178   if (rmdir(dd->p))
 179     lose("failed to delete directory `%s': %s", dd->p, strerror(errno));
 180 }
 181
 182 /* Recursively delete the thing named PATH. */
 183 static void recursive_delete(const char *path)
 184 {
 185   struct dstr d = DSTR_INIT;
 186   dstr_puts(&d, path); recursive_delete_(&d); dstr_release(&d);
 187 }
 188
 189 /* Configure a file descriptor FD.
 190  *
 191  * Set its nonblocking state to NONBLOCK and close-on-exec state to CLOEXEC.
 192  * In both cases, -1 means to leave it alone, zero means to turn it off, and
 193  * any other nonzero value means to turn it on.
 194  */
 195 static int configure_fd(const char *what, int fd, int nonblock, int cloexec)
 196 {
 197   int fl, nfl;
 198
 199   if (nonblock != -1) {
 200     fl = fcntl(fd, F_GETFL); if (fl < 0) goto fail;
 201     if (nonblock) nfl = fl | O_NONBLOCK;
 202     else nfl = fl&~O_NONBLOCK;
 203     if (fl != nfl && fcntl(fd, F_SETFL, nfl)) goto fail;
 204   }
 205
 206   if (cloexec != -1) {
 207     fl = fcntl(fd, F_GETFD); if (fl < 0) goto fail;
 208     if (cloexec) nfl = fl | FD_CLOEXEC;
 209     else nfl = fl&~FD_CLOEXEC;
 210     if (fl != nfl && fcntl(fd, F_SETFD, nfl)) goto fail;
 211   }
 212
 213   return (0);
 214
 215 fail:
 216   bad("failed to configure %s descriptor: %s", what, strerror(errno));
 217   return (-1);
 218 }
 219
 220 /* Create a temporary directory and remember where we put it. */
 221 static void set_tmpdir(void)
 222 {
 223   struct dstr d = DSTR_INIT;
 224   size_t n;
 225   unsigned i;
 226
 227   /* Start building the path name.  Remember the length: we'll rewind to
 228    * here and try again if our first attempt doesn't work.
 229    */
 230   dstr_putf(&d, "%s/runlisp.%d.", my_getenv("TMPDIR", "/tmp"), getpid());
 231   i = 0; n = d.len;
 232
 233   /* Keep trying until it works. */
 234   for (;;) {
 235
 236     /* Build a complete name. */
 237     d.len = n; dstr_putf(&d, "%d", rand());
 238
 239     /* Try to create the directory.  If it worked, we're done.  If it failed
 240      * with `EEXIST' then we'll try again for a while, but give up it it
 241      * doesn't look like we're making any progress.  If it failed for some
 242      * other reason then there's probably not much hope so give up.
 243      */
 244     if (!mkdir(d.p, 0700)) break;
 245     else if (errno != EEXIST)
 246       lose("failed to create temporary directory `%s': %s",
 247            d.p, strerror(errno));
 248     else if (++i >= 32) {
 249       d.len = n; dstr_puts(&d, "???");
 250       lose("failed to create temporary directory `%s': too many attempts",
 251            d.p);
 252     }
 253   }
 254
 255   /* Remember the directory name. */
 256   tmpdir = xstrndup(d.p, d.len); dstr_release(&d);
 257 }
 258
 259 /*----- Signal handling ---------------------------------------------------*/
 260
 261 /* Forward reference into job management. */
 262 static void reap_children(void);
 263
 264 /* Clean things up on exit.
 265  *
 266  * Currently this just means to delete the temporary directory if we've made
 267  * one.
 268  */
 269 static void cleanup(void)
 270   { if (tmpdir) { recursive_delete(tmpdir); tmpdir = 0; } }
 271
 272 /* Check to see whether any signals have arrived, and do the sensible thing
 273  * with them.
 274  */
 275 static void check_signals(void)
 276 {
 277   sigset_t old, pend;
 278   char buf[32];
 279   ssize_t n;
 280
 281   /* Ensure exclusive access to the signal-handling machinery, drain the
 282    * signal pipe, and take a copy of the set of caught signals.
 283    */
 284   sigprocmask(SIG_BLOCK, &caught, &old);
 285   pend = pending; sigemptyset(&pending);
 286   for (;;) {
 287     n = read(sig_pipe[0], buf, sizeof(buf));
 288     if (!n) lose("(internal) signal pipe closed!");
 289     if (n < 0) break;
 290   }
 291   if (errno != EAGAIN && errno != EWOULDBLOCK)
 292     lose("failed to read signal pipe: %s", strerror(errno));
 293   sigprocmask(SIG_SETMASK, &old, 0);
 294
 295   /* Check for each signal of interest to us.
 296    *
 297    * Interrupty signals just set `sigloss' -- the `run_jobs' loop will know
 298    * to unravel everything if this happens.  If `SIGCHLD' happened, then
 299    * check on job process status.
 300    */
 301   if (sigismember(&pend, SIGINT)) sigloss = SIGINT;
 302   else if (sigismember(&pend, SIGHUP)) sigloss = SIGHUP;
 303   else if (sigismember(&pend, SIGTERM)) sigloss = SIGTERM;
 304   if (sigismember(&pend, SIGCHLD)) reap_children();
 305 }
 306
 307 /* The actual signal handler.
 308  *
 309  * Set the appropriate signal bit in `pending', and a byte (of any value)
 310  * down the signal pipe to wake up the select(2) loop.
 311  */
 312 static void handle_signal(int sig)
 313 {
 314   sigset_t old;
 315   char x = '!';
 316
 317   /* Ensure exclusive access while we fiddle with the `caught' set. */
 318   sigprocmask(SIG_BLOCK, &caught, &old);
 319   sigaddset(&pending, sig);
 320   sigprocmask(SIG_SETMASK, &old, 0);
 321
 322   /* Wake up the select(2) loop.  If this fails, there's not a lot we can do
 323    * about it.
 324    */
 325   DISCARD(write(sig_pipe[1], &x, 1));
 326 }
 327
 328 /* Install our signal handler to catch SIG.
 329  *
 330  * If `SIGF_IGNOK' is set in F then don't trap the signal if it's currently
 331  * ignored.  (This is used for signals like `SIGINT', which usually should
 332  * interrupt us; but if the caller wants us to ignore them, we should do as
 333  * it wants.)
 334  *
 335  * WHAT describes the signal, for use in diagnostic messages.
 336  */
 337 #define SIGF_IGNOK 1u
 338 static void set_signal_handler(const char *what, int sig, unsigned f)
 339 {
 340   struct sigaction sa, sa_old;
 341
 342   sigaddset(&caught, sig);
 343
 344   if (f&SIGF_IGNOK) {
 345     if (sigaction(sig, 0, &sa_old)) goto fail;
 346     if (sa_old.sa_handler == SIG_IGN) return;
 347   }
 348
 349   sa.sa_handler = handle_signal;
 350   sigemptyset(&sa.sa_mask);
 351   sa.sa_flags = SA_NOCLDSTOP;
 352   if (sigaction(sig, &sa, 0)) goto fail;
 353
 354   return;
 355
 356 fail:
 357   lose("failed to set %s signal handler: %s", what, strerror(errno));
 358 }
 359
 360 /*----- Line buffering ----------------------------------------------------*/
 361
 362 /* Find the next newline in the line buffer BUF.
 363  *
 364  * The search starts at `BUF->off', and potentially covers the entire buffer
 365  * contents.  Set *LINESZ_OUT to the length of the line, in bytes.  (Callers
 366  * must beware that the text of the line may wrap around the ends of the
 367  * buffer.)  Return zero if we found a newline, or nonzero if the search
 368  * failed.
 369  */
 370 static int find_newline(struct linebuf *buf, size_t *linesz_out)
 371 {
 372   char *nl;
 373
 374   if (buf->off + buf->len <= MAXLINE) {
 375     /* The buffer contents is in one piece.  Just search it. */
 376
 377     nl = memchr(buf->buf + buf->off, '\n', buf->len);
 378     if (nl) { *linesz_out = (nl - buf->buf) - buf->off; return (0); }
 379
 380   } else {
 381     /* The buffer contents is in two pieces.  We must search both of them. */
 382
 383     nl = memchr(buf->buf + buf->off, '\n', MAXLINE - buf->off);
 384     if (nl) { *linesz_out = (nl - buf->buf) - buf->off; return (0); }
 385     nl = memchr(buf->buf, '\n', buf->len - (MAXLINE - buf->off));
 386     if (nl)
 387       { *linesz_out = (nl - buf->buf) + (MAXLINE - buf->off); return (0); }
 388   }
 389
 390   return (-1);
 391 }
 392
 393 /* Write a completed line out to the JOB's log file.
 394  *
 395  * The line starts at BUF->off, and continues for N bytes, not including the
 396  * newline (which, in fact, might not exist at all).  Precede the actual text
 397  * of the line with the JOB's name, and the MARKER character, and follow it
 398  * with the TAIL text (which should include an actual newline character).
 399  */
 400 static void write_line(struct job *job, struct linebuf *buf,
 401                        size_t n, char marker, const char *tail)
 402 {
 403   fprintf(job->log, "%-13s %c ", JOB_NAME(job), marker);
 404   if (buf->off + n <= MAXLINE)
 405     fwrite(buf->buf + buf->off, 1, n, job->log);
 406   else {
 407     fwrite(buf->buf + buf->off, 1, MAXLINE - buf->off, job->log);
 408     fwrite(buf->buf, 1, n - (MAXLINE - buf->off), job->log);
 409   }
 410   fputs(tail, job->log);
 411 }
 412
 413 /* Collect output lines from JOB's process and write them to the log.
 414  *
 415  * Read data from BUF's file descriptor.  Output complete (or overlong) lines
 416  * usng `write_line'.  On end-of-file, output any final incomplete line in
 417  * the same way, close the descriptor, and set it to -1.
 418  */
 419 static void prefix_lines(struct job *job, struct linebuf *buf, char marker)
 420 {
 421   struct iovec iov[2]; int niov;
 422   ssize_t n;
 423   size_t linesz;
 424
 425   /* Read data into the buffer.  This fancy dance with readv(2) is probably
 426    * overkill.
 427    *
 428    * We can't have BUF->len = MAXLINE because we'd have flushed out a
 429    * maximum-length buffer as an incomplete line last time.
 430    */
 431   assert(buf->len < MAXLINE);
 432   if (!buf->off) {
 433     iov[0].iov_base = buf->buf + buf->len;
 434     iov[0].iov_len = MAXLINE - buf->len;
 435     niov = 1;
 436   } else if (buf->off + buf->len >= MAXLINE) {
 437     iov[0].iov_base = buf->buf + buf->off + buf->len - MAXLINE;
 438     iov[0].iov_len = MAXLINE - buf->len;
 439     niov = 1;
 440   } else {
 441     iov[0].iov_base = buf->buf + buf->off + buf->len;
 442     iov[0].iov_len = MAXLINE - (buf->off + buf->len);
 443     iov[1].iov_base = buf->buf;
 444     iov[1].iov_len = buf->off;
 445     niov = 1;
 446   }
 447   n = readv(buf->fd, iov, niov);
 448
 449   if (n < 0) {
 450     /* If there's no data to read after all then just move on.  Otherwise we
 451      * have a problem.
 452      */
 453     if (errno == EAGAIN || errno == EWOULDBLOCK) return;
 454     lose("failed to read job `%s' output stream: %s",
 455          JOB_NAME(job), strerror(errno));
 456   }
 457
 458   /* Include the new material in the buffer length, and write out any
 459    * complete lines we find.
 460    */
 461   buf->len += n;
 462   while (!find_newline(buf, &linesz)) {
 463     write_line(job, buf, linesz, marker, "\n");
 464     buf->len -= linesz + 1;
 465     buf->off += linesz + 1; if (buf->off >= MAXLINE) buf->off -= MAXLINE;
 466   }
 467
 468   if (!buf->len)
 469     /* If there's nothing left then we might as well reset the buffer offset
 470      * to the start of the buffer.
 471      */
 472     buf->off = 0;
 473   else if (buf->len == MAXLINE) {
 474     /* We've filled the buffer with stuff that's not a whole line.  Flush it
 475      * out anyway.
 476      */
 477     write_line(job, buf, MAXLINE, marker, " [...]\n");
 478     buf->off = buf->len = 0;
 479   }
 480
 481   if (!n) {
 482     /* We've hit end-of-file.  Close the stream, and write out any
 483      * unterminated partial line.
 484      */
 485     close(buf->fd); buf->fd = -1;
 486     if (buf->len)
 487       write_line(job, buf, buf->len, marker, " [missing final newline]\n");
 488   }
 489 }
 490
 491 /*----- Job management ----------------------------------------------------*/
 492
 493 /* Add a new job to the `ready' queue.
 494  *
 495  * The job will be to dump the Lisp system with the given LEN-byte NAME.  On
 496  * entry, *TAIL_INOUT should point to the `next' link of the last node in the
 497  * list (or the list head pointer), and will be updated on exit.
 498  *
 499  * This function reports (fatal) errors for most kinds of problems.  If
 500  * `JF_QUIET' is set in F then silently ignore a well-described Lisp system
 501  * which nonetheless isn't suitable.  (This is specifically intended for the
 502  * case where we try to dump all known Lisp systems, but some don't have a
 503  * `dump-image' command.)
 504  */
 505 #define JF_QUIET 1u
 506 static void add_job(struct job ***tail_inout, unsigned f,
 507                     const char *name, size_t len)
 508 {
 509   struct job *job;
 510   struct treap_path path;
 511   struct config_section *sect;
 512   struct config_var *dumpvar, *cmdvar, *imgvar;
 513   struct dstr d = DSTR_INIT;
 514   struct argv av = ARGV_INIT;
 515   char *imgnew = 0, *imgout = 0;
 516   size_t i;
 517   unsigned fef;
 518
 519   /* Check to see whether this Lisp system is already queued up. */
 520   job = treap_probe(&jobs, name, len, &path);
 521   if (job) {
 522     if (verbose >= 2) {
 523       moan("ignoring duplicate Lisp `%s'", JOB_NAME(job));
 524       return;
 525     }
 526   }
 527
 528   /* Find the configuration for this Lisp system and check that it can be
 529    * dumped.
 530    */
 531   sect = config_find_section_n(&config, 0, name, len);
 532   if (!sect) lose("unknown Lisp implementation `%.*s'", (int)len, name);
 533   name = CONFIG_SECTION_NAME(sect);
 534   dumpvar = config_find_var(&config, sect, 0, "dump-image");
 535   if (!dumpvar) {
 536     if (!(f&JF_QUIET))
 537       lose("don't know how to dump images for Lisp implementation `%s'",
 538            name);
 539     goto end;
 540   }
 541
 542   /* Check that the other necessary variables are present. */
 543   imgvar = config_find_var(&config, sect, 0, "image-file");
 544   if (!imgvar) lose("variable `image-file' not defined for Lisp `%s'", name);
 545   cmdvar = config_find_var(&config, sect, 0, "command");
 546   if (!cmdvar) lose("variable `command' not defined for Lisp `%s'", name);
 547
 548   /* Build the job's command line. */
 549   config_subst_split_var(&config, sect, dumpvar, &av);
 550   if (!av.n)
 551     lose("empty `dump-image' command for Lisp implementation `%s'", name);
 552
 553   /* If we're supposed to check that the Lisp exists before proceeding then
 554    * do that.  There are /two/ commands to check: the basic Lisp command,
 555    * /and/ the command to actually do the dumping, which might not be the
 556    * same thing.  (Be careful not to check the same command twice, though,
 557    * because that would cause us to spam the user with redundant
 558    * diagnostics.)
 559    */
 560   if (flags&AF_CHECKINST) {
 561     dstr_reset(&d);
 562     fef = (verbose >= 2 ? FEF_VERBOSE : 0);
 563     config_subst_var(&config, sect, cmdvar, &d);
 564     if (!found_in_path_p(d.p, fef) ||
 565         (STRCMP(d.p, !=, av.v[0]) && !found_in_path_p(av.v[0], fef))) {
 566       if (verbose >= 2) moan("skipping Lisp implementation `%s'", name);
 567       goto end;
 568     }
 569   }
 570
 571   /* Collect the output image file names. */
 572   imgnew =
 573     config_subst_string_alloc(&config, sect, "<internal>", "${@image-new}");
 574   imgout =
 575     config_subst_string_alloc(&config, sect, "<internal>", "${@image-out}");
 576
 577   /* If we're supposed to check whether the image file exists, then we should
 578    * do that.
 579    */
 580   if (!(flags&AF_FORCE)) {
 581     if (!access(imgout, F_OK)) {
 582       if (verbose >= 2)
 583         moan("image `%s' already exists: skipping `%s'", d.p, name);
 584       goto end;
 585     }
 586   }
 587
 588   /* All preflight checks complete.  Build the job and hook it onto the end
 589    * of the list.  (Steal the command-line vector so that we don't try to
 590    * free it during cleanup.)
 591    */
 592   job = xmalloc(sizeof(*job));
 593   job->st = JST_READY;
 594   job->kid = -1;
 595   job->out.fd = -1; job->out.buf = 0;
 596   job->err.fd = -1; job->err.buf = 0;
 597   job->av = av; argv_init(&av);
 598   job->imgnew = imgnew; job->imgout = imgout; imgnew = imgout = 0;
 599   treap_insert(&jobs, &path, &job->_node, name, len);
 600   **tail_inout = job; *tail_inout = &job->next;
 601
 602 end:
 603   /* All done.  Cleanup time. */
 604   for (i = 0; i < av.n; i++) free(av.v[i]);
 605   free(imgnew); free(imgout);
 606   dstr_release(&d); argv_release(&av);
 607 }
 608
 609 /* Free the JOB and all the resources it holds.
 610  *
 611  * Close the pipes; kill the child process.  Everything must go.
 612  */
 613 static void release_job(struct job *job)
 614 {
 615   size_t i;
 616
 617   if (job->kid > 0) kill(job->kid, SIGKILL); /* ?? */
 618   if (job->log && job->log != stdout) fclose(job->log);
 619   free(job->imgnew); free(job->imgout);
 620   for (i = 0; i < job->av.n; i++) free(job->av.v[i]);
 621   argv_release(&job->av);
 622   free(job->out.buf); if (job->out.fd >= 0) close(job->out.fd);
 623   free(job->err.buf); if (job->err.fd >= 0) close(job->err.fd);
 624   free(job);
 625 }
 626
 627 /* Do all the necessary things when JOB finishes (successfully or not).
 628  *
 629  * Eventually the job is freed (using `release_job').
 630  */
 631 static void finish_job(struct job *job)
 632 {
 633   char buf[16483];
 634   size_t n;
 635   int ok = 0;
 636
 637   /* Start a final line to the job log describing its eventual fate.
 638    *
 639    * This is where we actually pick apart the exit status.  Set `ok' if it
 640    * actually succeeded, because that's all anything else cares about.
 641    */
 642   fprintf(job->log, "%-13s > ", JOB_NAME(job));
 643   if (WIFEXITED(job->exit)) {
 644     if (!WEXITSTATUS(job->exit))
 645       { fputs("completed successfully\n", job->log); ok = 1; }
 646     else
 647       fprintf(job->log, "failed with exit status %d\n",
 648               WEXITSTATUS(job->exit));
 649   } else if (WIFSIGNALED(job->exit))
 650     fprintf(job->log, "killed by signal %d (%s%s)", WTERMSIG(job->exit),
 651 #if defined(HAVE_STRSIGNAL)
 652         strsignal(WTERMSIG(job->exit)),
 653 #elif defined(HAVE_DECL_SYS_SIGLIST)
 654         sys_siglist[WTERMSIG(job->exit)],
 655 #else
 656         "unknown signal",
 657 #endif
 658 #ifdef WCOREDUMP
 659         WCOREDUMP(job->exit) ? "; core dumped" :
 660 #endif
 661         "");
 662   else
 663     fprintf(job->log, "exited with incomprehensible status %06o\n",
 664             job->exit);
 665
 666   /* If it succeeded, then try to rename the completed image file into place.
 667    *
 668    * If that caused trouble then mark the job as failed after all.
 669    */
 670   if (ok && rename(job->imgnew, job->imgout)) {
 671     fprintf(job->log, "%-13s > failed to rename Lisp `%s' "
 672                               "output image `%s' to `%s': %s",
 673             JOB_NAME(job), JOB_NAME(job),
 674             job->imgnew, job->imgout, strerror(errno));
 675     ok = 0;
 676   }
 677
 678   /* If the job failed and we're being quiet then write out the log that we
 679    * made.
 680    */
 681   if (!ok && verbose < 2) {
 682     rewind(job->log);
 683     for (;;) {
 684       n = fread(buf, 1, sizeof(buf), job->log);
 685       if (n) fwrite(buf, 1, n, stdout);
 686       if (n < sizeof(buf)) break;
 687     }
 688   }
 689
 690   /* Also make a node to stderr about what happened.  (Just to make sure
 691    * that we've gotten someone's attention.)
 692    */
 693   if (!ok) bad("failed to dump Lisp `%s'", JOB_NAME(job));
 694
 695   /* Finally free the job control block. */
 696   release_job(job);
 697 }
 698
 699 /* Called after `SIGCHLD': collect exit statuses and mark jobs as dead. */
 700 static void reap_children(void)
 701 {
 702   struct job *job, **link;
 703   pid_t kid;
 704   int st;
 705
 706   for (;;) {
 707
 708     /* Collect a child exit status.  If there aren't any more then we're
 709      * done.
 710      */
 711     kid = waitpid(0, &st, WNOHANG);
 712     if (kid <= 0) break;
 713
 714     /* Try to find a matching job.  If we can't, then we should just ignore
 715      * it.
 716      */
 717     for (link = &job_run; (job = *link); link = &job->next)
 718       if (job->kid == kid) goto found;
 719     continue;
 720
 721   found:
 722     /* Mark the job as dead, save its exit status, and move it into the dead
 723      * list.
 724      */
 725     job->exit = st; job->st = JST_DEAD; job->kid = -1; nrun--;
 726     *link = job->next; job->next = job_dead; job_dead = job;
 727   }
 728
 729   /* If there was a problem with waitpid(2) then report it. */
 730   if (kid < 0 && errno != ECHILD)
 731     lose("failed to collect child process exit status: %s", strerror(errno));
 732 }
 733
 734 /* Execute the handler for some JOB. */
 735 static NORETURN void job_child(struct job *job)
 736 {
 737   try_exec(&job->av,
 738            !(flags&AF_CHECKINST) && verbose >= 2 ? TEF_VERBOSE : 0);
 739   moan("failed to run `%s': %s", job->av.v[0], strerror(errno));
 740   _exit(127);
 741 }
 742
 743 /* Start up jobs while there are (a) jobs to run and (b) slots to run them
 744  * in.
 745  */
 746 static void start_jobs(void)
 747 {
 748   struct dstr d = DSTR_INIT;
 749   int p_out[2], p_err[2];
 750   struct job *job;
 751   pid_t kid;
 752
 753   /* Keep going until either we run out of jobs, or we've got enough running
 754    * already.
 755    */
 756   while (job_ready && nrun < maxrun) {
 757
 758     /* Set things up ready.  If things go wrong, we need to know what stuff
 759      * needs to be cleaned up.
 760      */
 761     job = job_ready; job_ready = job->next;
 762     p_out[0] = p_out[1] = p_err[0] = p_err[1] = -1;
 763
 764     /* Make a temporary subdirectory for this job to use. */
 765     dstr_reset(&d); dstr_putf(&d, "%s/%s", tmpdir, JOB_NAME(job));
 766     if (mkdir(d.p, 0700)) {
 767       bad("failed to create working directory for job `%s': %s",
 768           JOB_NAME(job), strerror(errno));
 769       goto fail;
 770     }
 771
 772     /* Create the job's log file.  If we're being verbose then that's just
 773      * our normal standard output -- /not/ stderr: it's likely that users
 774      * will want to pipe this stuff through a pager or something, and that'll
 775      * be easier if we use stdout.  Otherwise, make a file in the temporary
 776      * directory.
 777      */
 778     if (verbose >= 2)
 779       job->log = stdout;
 780     else {
 781       dstr_puts(&d, "/log"); job->log = fopen(d.p, "w+");
 782       if (!job->log)
 783         lose("failed to open log file `%s': %s", d.p, strerror(errno));
 784     }
 785
 786     /* Make the pipes to capture the child process's standard output and
 787      * error streams.
 788      */
 789     if (pipe(p_out) || pipe(p_err)) {
 790       bad("failed to create pipes for job `%s': %s",
 791           JOB_NAME(job), strerror(errno));
 792       goto fail;
 793     }
 794     if (configure_fd("job stdout pipe", p_out[0], 1, 1) ||
 795         configure_fd("job stdout pipe", p_out[1], 0, 1) ||
 796         configure_fd("job stderr pipe", p_err[0], 1, 1) ||
 797         configure_fd("job stderr pipe", p_err[1], 0, 1) ||
 798         configure_fd("log file", fileno(job->log), 1, 1))
 799       goto fail;
 800
 801     /* Initialize the line-buffer structures ready for use. */
 802     job->out.buf = xmalloc(MAXLINE); job->out.off = job->out.len = 0;
 803     job->out.fd = p_out[0]; p_out[0] = -1;
 804     job->err.buf = xmalloc(MAXLINE); job->err.off = job->err.len = 0;
 805     job->err.fd = p_err[0]; p_err[0] = -1;
 806     dstr_reset(&d); argv_string(&d, &job->av);
 807
 808     /* Print a note to the top of the log. */
 809     fprintf(job->log, "%-13s > starting %s\n", JOB_NAME(job), d.p);
 810
 811     /* Flush the standard output stream.  (Otherwise the child might try to
 812      * flush it too.)
 813      */
 814     fflush(stdout);
 815
 816     /* Spin up the child process. */
 817     kid = fork();
 818     if (kid < 0) {
 819       bad("failed to fork process for job `%s': %s",
 820           JOB_NAME(job), strerror(errno));
 821       goto fail;
 822     }
 823     if (!kid) {
 824       if (dup2(nullfd, 0) < 0 ||
 825           dup2(p_out[1], 1) < 0 ||
 826           dup2(p_err[1], 2) < 0)
 827         lose("failed to juggle job `%s' file descriptors: %s",
 828              JOB_NAME(job), strerror(errno));
 829       job_child(job);
 830     }
 831
 832     /* Close the ends of the pipes that we don't need.  Move the job into
 833      * the running list.
 834      */
 835     close(p_out[1]); close(p_err[1]);
 836     job->kid = kid;
 837     job->st = JST_RUN; job->next = job_run; job_run = job; nrun++;
 838     continue;
 839
 840   fail:
 841     /* Clean up the wreckage if it didn't work. */
 842     if (p_out[0] >= 0) close(p_out[0]);
 843     if (p_out[1] >= 0) close(p_out[1]);
 844     if (p_err[0] >= 0) close(p_err[0]);
 845     if (p_err[1] >= 0) close(p_err[1]);
 846     release_job(job);
 847   }
 848
 849   /* All done except for some final tidying up. */
 850   dstr_release(&d);
 851 }
 852
 853 /* Take care of all of the jobs until they're all done. */
 854 static void run_jobs(void)
 855 {
 856   struct job *job, *next, **link;
 857   int nfd;
 858   fd_set fd_in;
 859
 860   for (;;) {
 861
 862     /* If there are jobs still to be started and we have slots to spare then
 863      * start some more up.
 864      */
 865     start_jobs();
 866
 867     /* If the queues are now all empty then we're done.  (No need to check
 868      * `job_ready' here: `start_jobs' would have started them if `job_run'
 869      * was empty.
 870      */
 871     if (!job_run && !job_dead) break;
 872
 873
 874     /* Prepare for the select(2) call: watch for the signal pipe and all of
 875      * the job pipes.
 876      */
 877 #define SET_FD(dir, fd) do {                                            \
 878   int _fd = (fd);                                                       \
 879   FD_SET(_fd, &fd_##dir);                                               \
 880   if (_fd >= nfd) nfd = _fd + 1;                                        \
 881 } while (0)
 882
 883     FD_ZERO(&fd_in); nfd = 0;
 884     SET_FD(in, sig_pipe[0]);
 885     for (job = job_run; job; job = job->next) {
 886       if (job->out.fd >= 0) SET_FD(in, job->out.fd);
 887       if (job->err.fd >= 0) SET_FD(in, job->err.fd);
 888     }
 889     for (job = job_dead; job; job = job->next) {
 890       if (job->out.fd >= 0) SET_FD(in, job->out.fd);
 891       if (job->err.fd >= 0) SET_FD(in, job->err.fd);
 892     }
 893
 894 #undef SET_FD
 895
 896     /* Find out what's going on. */
 897     if (select(nfd, &fd_in, 0, 0, 0) < 0) {
 898       if (errno == EINTR) continue;
 899       else lose("select failed: %s", strerror(errno));
 900     }
 901
 902     /* If there were any signals then handle them. */
 903     if (FD_ISSET(sig_pipe[0], &fd_in)) {
 904       check_signals();
 905       if (sigloss >= 0) {
 906         /* We hit a fatal signal.  Kill off the remaining jobs and abort. */
 907         for (job = job_ready; job; job = next)
 908           { next = job->next; release_job(job); }
 909         for (job = job_run; job; job = next)
 910           { next = job->next; release_job(job); }
 911         for (job = job_dead; job; job = next)
 912           { next = job->next; release_job(job); }
 913         break;
 914       }
 915     }
 916
 917     /* Log any new output from the running jobs. */
 918     for (job = job_run; job; job = job->next) {
 919       if (job->out.fd >= 0 && FD_ISSET(job->out.fd, &fd_in))
 920         prefix_lines(job, &job->out, '|');
 921       if (job->err.fd >= 0 && FD_ISSET(job->err.fd, &fd_in))
 922         prefix_lines(job, &job->err, '*');
 923     }
 924
 925     /* Finally, clear away any dead jobs once we've collected all their
 926      * output.
 927      */
 928     for (link = &job_dead, job = *link; job; job = next) {
 929       next = job->next;
 930       if (job->out.fd >= 0 || job->err.fd >= 0) link = &job->next;
 931       else { *link = next; finish_job(job); }
 932     }
 933   }
 934 }
 935
 936 /*----- Main program ------------------------------------------------------*/
 937
 938 /* Help and related functions. */
 939 static void version(FILE *fp)
 940   { fprintf(fp, "%s, runlisp version %s\n", progname, PACKAGE_VERSION); }
 941
 942 static void usage(FILE *fp)
 943 {
 944   fprintf(fp, "\
 945 usage: %s [-afnqv] [-c CONF] [-o [SECT:]VAR=VAL]\n\
 946         [-O FILE|DIR] [-j NJOBS] [LISP ...]\n",
 947           progname);
 948 }
 949
 950 static void help(FILE *fp)
 951 {
 952   version(fp); fputc('\n', fp); usage(fp);
 953   fputs("\n\
 954 Help options:\n\
 955   -h, --help                    Show this help text and exit successfully.\n\
 956   -V, --version                 Show version number and exit successfully.\n\
 957 \n\
 958 Diagnostics:\n\
 959   -n, --dry-run                 Don't run run anything (useful with `-v').\n\
 960   -q, --quiet                   Don't print warning messages.\n\
 961   -v, --verbose                 Print informational messages (repeatable).\n\
 962 \n\
 963 Configuration:\n\
 964   -c, --config-file=CONF        Read configuration from CONF (repeatable).\n\
 965   -o, --set-option=[SECT:]VAR=VAL Set configuration variable (repeatable).\n\
 966 \n\
 967 Image dumping:\n\
 968   -O, --output=FILE|DIR         Store image(s) in FILE or DIR.\n\
 969   -a, --all-configured          Dump all implementations configured.\n\
 970   -f, --force                   Dump images even if they already exist.\n\
 971   -i, --check-installed         Check Lisp systems exist before invoking.\n\
 972   -j, --jobs=NJOBS              Run up to NJOBS jobs in parallel.\n",
 973         fp);
 974 }
 975
 976 /* Main program. */
 977 int main(int argc, char *argv[])
 978 {
 979   struct config_section_iter si;
 980   struct config_section *sect;
 981   struct config_var *var;
 982   const char *out = 0, *p, *q, *l;
 983   struct job *job, **tail;
 984   struct stat st;
 985   struct dstr d = DSTR_INIT;
 986   int i, fd, first;
 987
 988   /* Command-line options. */
 989   static const struct option opts[] = {
 990     { "help",                   0,              0,      'h' },
 991     { "version",                0,              0,      'V' },
 992     { "output",                 OPTF_ARGREQ,    0,      'O' },
 993     { "all-configured",         0,              0,      'a' },
 994     { "config-file",            OPTF_ARGREQ,    0,      'c' },
 995     { "force",                  OPTF_NEGATE,    0,      'f' },
 996     { "check-installed",        OPTF_NEGATE,    0,      'i' },
 997     { "jobs",                   OPTF_ARGREQ,    0,      'j' },
 998     { "dry-run",                OPTF_NEGATE,    0,      'n' },
 999     { "set-option",             OPTF_ARGREQ,    0,      'o' },
1000     { "quiet",                  0,              0,      'q' },
1001     { "verbose",                0,              0,      'v' },
1002     { 0,                        0,              0,      0 }
1003   };
1004
1005   /* Initial setup. */
1006   set_progname(argv[0]);
1007   init_config();
1008
1009   /* Parse the options. */
1010   optprog = (/*unconst*/ char *)progname;
1011   for (;;) {
1012     i = mdwopt(argc - 1, argv + 1, "hVO:ac:f+i+j:n+o:qv", opts, 0, 0,
1013                OPTF_NEGATION | OPTF_NOPROGNAME);
1014     if (i < 0) break;
1015     switch (i) {
1016       case 'h': help(stdout); exit(0);
1017       case 'V': version(stdout); exit(0);
1018       case 'O': out = optarg; break;
1019       case 'a': flags |= AF_ALL; break;
1020       case 'c': read_config_path(optarg, 0); flags |= AF_SETCONF; break;
1021       case 'f': flags |= AF_FORCE; break;
1022       case 'f' | OPTF_NEGATED: flags &= ~AF_FORCE; break;
1023       case 'i': flags |= AF_CHECKINST; break;
1024       case 'i' | OPTF_NEGATED: flags &= ~AF_CHECKINST; break;
1025       case 'j': maxrun = parse_int("number of jobs", optarg, 1, 65535); break;
1026       case 'n': flags |= AF_DRYRUN; break;
1027       case 'n' | OPTF_NEGATED: flags &= ~AF_DRYRUN; break;
1028       case 'o': if (set_config_var(optarg)) flags |= AF_BOGUS; break;
1029       case 'q': if (verbose) verbose--; break;
1030       case 'v': verbose++; break;
1031       default: flags |= AF_BOGUS; break;
1032     }
1033   }
1034
1035   /* CHeck that everything worked. */
1036   optind++;
1037   if ((flags&AF_ALL) ? optind < argc : optind >= argc) flags |= AF_BOGUS;
1038   if (flags&AF_BOGUS) { usage(stderr); exit(127); }
1039
1040   /* Load default configuration if no explicit files were requested. */
1041   if (!(flags&AF_SETCONF)) load_default_config();
1042
1043   /* OK, so we've probably got some work to do.  Let's set things up ready.
1044    * It'll be annoying if our standard descriptors aren't actually set up
1045    * properly, so we'll make sure those slots are populated.  We'll need a
1046    * `/dev/null' descriptor anyway (to be stdin for the jobs).  We'll also
1047    * need a temporary directory, and it'll be less temporary if we don't
1048    * arrange to delete it when we're done.  And finally we'll need to know
1049    * when a child process exits.
1050    */
1051   for (;;) {
1052     fd = open("/dev/null", O_RDWR);
1053     if (fd < 0) lose("failed to open `/dev/null': %s", strerror(errno));
1054     if (fd > 2) { nullfd = fd; break; }
1055   }
1056   configure_fd("null fd", nullfd, 0, 1);
1057   atexit(cleanup);
1058   if (pipe(sig_pipe))
1059     lose("failed to create signal pipe: %s", strerror(errno));
1060   configure_fd("signal pipe (read end)", sig_pipe[0], 1, 1);
1061   configure_fd("signal pipe (write end)", sig_pipe[1], 1, 1);
1062   sigemptyset(&caught); sigemptyset(&pending);
1063   set_signal_handler("SIGTERM", SIGTERM, SIGF_IGNOK);
1064   set_signal_handler("SIGINT", SIGINT, SIGF_IGNOK);
1065   set_signal_handler("SIGHUP", SIGHUP, SIGF_IGNOK);
1066   set_signal_handler("SIGCHLD", SIGCHLD, 0);
1067
1068   /* Create the temporary directory and export it into the configuration. */
1069   set_tmpdir();
1070   config_set_var(&config, builtin, CF_LITERAL, "@%tmp-dir", tmpdir);
1071   config_set_var(&config, builtin, 0,
1072                  "@tmp-dir", "${@BUILTIN:@%tmp-dir}/${@name}");
1073
1074   /* Work out where the image files are going to go.  If there's no `-O'
1075    * option then we use the main `image-dir'.  Otherwise what happens depends
1076    * on whether this is a file or a directory.
1077    */
1078   if (!out)
1079     config_set_var(&config, builtin, 0,
1080                    "@image-out", "${@image-dir}/${image-file}");
1081   else if (!stat(out, &st) && S_ISDIR(st.st_mode))  {
1082     config_set_var(&config, builtin, CF_LITERAL, "@%out-dir", out);
1083     config_set_var(&config, builtin, 0,
1084                    "@image-out", "${@BUILTIN:@%out-dir}/${image-file}");
1085   } else if (argc - optind != 1)
1086     lose("can't dump multiple Lisps to a single output file");
1087   else
1088     config_set_var(&config, builtin, CF_LITERAL, "@image-out", out);
1089
1090   /* Set the staging file. */
1091   config_set_var(&config, builtin, 0, "@image-new", "${@image-out}.new");
1092
1093   /* Dump the final configuration if we're being very verbose. */
1094   if (verbose >= 5) dump_config();
1095
1096   /* Create jobs for the Lisp systems we're supposed to be dumping. */
1097   tail = &job_ready;
1098   if (!(flags&AF_ALL))
1099     for (i = optind; i < argc; i++)
1100       add_job(&tail, 0, argv[i], strlen(argv[i]));
1101   else {
1102     /* So we're supposed to dump `all' of them.  If there's a `dump'
1103      * configuration setting then we need to parse that.  Otherwise we just
1104      * try all of them.
1105      */
1106     var = config_find_var(&config, toplevel, 0, "dump");
1107     if (!var) {
1108       /* No setting.  Just do all of the Lisps which look available. */
1109
1110       flags |= AF_CHECKINST;
1111       for (config_start_section_iter(&config, &si);
1112            (sect = config_next_section(&si)); )
1113         add_job(&tail, JF_QUIET,
1114                 CONFIG_SECTION_NAME(sect),
1115                 CONFIG_SECTION_NAMELEN(sect));
1116     } else {
1117       /* Parse the `dump' list. */
1118
1119       p = var->val; l = p + var->n;
1120       for (;;) {
1121         while (p < l && ISSPACE(*p)) p++;
1122         if (p >= l) break;
1123         q = p;
1124         while (p < l && !ISSPACE(*p) && *p != ',') p++;
1125         add_job(&tail, 0, q, p - q);
1126         while (p < l && ISSPACE(*p)) p++;
1127         if (p < l && *p == ',') p++;
1128       }
1129     }
1130   }
1131   *tail = 0;
1132
1133   /* Report on what it is we're about to do. */
1134   if (verbose >= 3) {
1135     dstr_reset(&d);
1136     first = 1;
1137     for (job = job_ready; job; job = job->next) {
1138       if (first) first = 0;
1139       else dstr_puts(&d, ", ");
1140       dstr_putf(&d, "`%s'", JOB_NAME(job));
1141     }
1142     if (first) dstr_puts(&d, "(none)");
1143     dstr_putz(&d);
1144     moan("dumping Lisps: %s", d.p);
1145   }
1146
1147   /* If we're not actually going to do anything after all then now's the time
1148    * to, err, not do that.
1149    */
1150   if (flags&AF_DRYRUN) {
1151     for (job = job_ready; job; job = job->next) {
1152       if (try_exec(&job->av,
1153                    TEF_DRYRUN |
1154                      (verbose >= 2 && !(flags&AF_CHECKINST) ?
1155                        TEF_VERBOSE : 0)))
1156         rc = 2;
1157       else if (verbose >= 2)
1158         printf("%-13s > (not dumping `%s': dry run)\n",
1159                 JOB_NAME(job), JOB_NAME(job));
1160     }
1161     return (rc);
1162   }
1163
1164   /* Run the jobs. */
1165   run_jobs();
1166
1167   /* Finally, check for any last signals.  If we hit any fatal signals then
1168    * we should kill ourselves so that the exit status will be right.
1169    */
1170   check_signals();
1171   if (sigloss) { cleanup(); signal(sigloss, SIG_DFL); raise(sigloss); }
1172
1173   /* All done! */
1174   return (rc);
1175 }
1176
1177 /*----- That's all, folks -------------------------------------------------*/