cprogs/watershed.c

   1 /*
   2  * watershed - an auxiliary verb for optimising away
   3  *             unnecessary runs of idempotent commands
   4  *
   5  * watershed is Copyright 2007 Canonical Ltd
   6  *
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  *
  22  * See the file XXXXX for a full list of credits information (often
  23  * installed as XXXXX.
  24  *
  25  */
  26 /*
  27  *
  28  * usage: watershed [<options>] <command> [<arg>...]
  29  *
  30  * options:
  31  *   -d|--state-dir <state-dir>
  32  *        default is /var/run/watershed for uid 0
  33  *                   $HOME/.watershed for others
  34  *   -i|--command-id <command-id>
  35  *
  36  * files used:
  37  *    <state-dir>/<command-id>.lock            lockfile
  38  *    <state-dir>/<command-id>.cohort          cohort
  39  *
  40  * default <command-id> is
  41  *    hex(sha256(argv[0]+'\0' + argv[1]+'\0' ... argv[argc-1]+'\0')
  42  *    '='
  43  *    mangled argv[0] (all chars [^-+_0-9A-Za-z] replaced with ?
  44  *                     and max 32 chars)
  45  *
  46  * exit status:
  47  *  127      - something went wrong, or process died with some other signal
  48  *  SIGPIPE  - process died with SIGPIPE
  49  *  x        - process called _exit(x)
  50  *
  51  * stdin/stdout/stderr:
  52  *
  53  *  If watershed exits 127 due to some unexpected problem, a message
  54  *  is printed to stderr explaining why (obviously).
  55  *
  56  *  If a watershed invocation ends up running the process, the process
  57  *  simply inherits stdin/out/err.  Otherwise stdin/stdout are not used.
  58  *
  59  *  If the process run for us by another invocation of watershed exits
  60  *  zero, or watershed die with the same signal as the process
  61  *  (currently just SIGPIPE), nothing is printed to stderr.  Otherwise
  62  *  (ie, failure of the actual process, in another invocation),
  63  *  watershed prints a description of the wait status to stderr, much
  64  *  as the shell might.
  65  *
  66  */
  67 /*
  68  * gcc -Wall -Wwrite-strings -Wmissing-prototypes watershed.c -o watershed /usr/lib/libnettle.a
  69  */
  70 /*
  71  *
  72  * Theory:
  73  *
  74  *  We consider only invocations with a specific command id (and state
  75  *  directory), since other invocations are completely independent by
  76  *  virtue of having different state file pathnames and thus different
  77  *  state files.  Normally, a command id corresponds to invocations
  78  *  with a particular set of command line arguments and a state
  79  *  directory corresponds to a particular euid; environment variable
  80  *  settings and other inherited process properties are disregarded.
  81  *
  82  *  A `cohort' is a set of invocations which can be coalesced into one
  83  *  run of the command.  For each cohort there is a file, the cohort
  84  *  file (which may not yet exist, may exist and have a name, or may
  85  *  be unliked).
  86  *
  87  *  An `invocation' is an invocation of the `watershed' program.  A
  88  *  `process' is an invocation of the requested command.
  89  *
  90  *  There is always one current cohort, in one of the following
  91  *  two states:
  92  *
  93  *   * Empty
  94  *     No invocations are in this cohort yet.
  95  *     The cohort filename is ENOENT.
  96  *     This is the initial state for a cohort, and the legal next
  97  *     state is Accumulating.
  98  *
  99  *   * Accumulating
 100  *     The process for this run has not yet started, so that new
 101  *     invocations arriving would be satisfied if this cohort were to
 102  *     run.
 103  *     The cohort filename refers to this cohort's file.
 104  *     The legal next state for the cohort is Ready.
 105  *
 106  *  Additionally, there may be older cohorts in the following states:
 107  *
 108  *   * Ready
 109  *     The command for this cohort has not yet been run.
 110  *     The cohort file has no name and is empty.
 111  *     Only one cohort, the lockholder's, may be in this state.
 112  *     The next legal states are Running, or exceptionally Forgotten
 113  *     (if the lockholder crashes and is the only invocation in the
 114  *     cohort).
 115  *
 116  *   * Running
 117  *     The lockholder is running the command for this cohort.
 118  *     This state is identical to Ready from the point of view
 119  *     of all invocations except the lockholder.
 120  *     The legal next states are Done (the usual case), or (if the
 121  *     lockholder crashes) Ready or Forgotten.
 122  *
 123  *   * Done
 124  *     The main process for this run has finished.
 125  *     The cohort file has no name and contains sizeof(int)
 126  *     bytes, the `status' value from waitpid.
 127  *     The legal next state is Forgotten.
 128  *
 129  *   * Forgotten
 130  *     All invocations have finished and the cohort file no longer
 131  *     exists.  This is the final state.
 132  *
 133  *  Only the lockholder may move a cohort between states, except that
 134  *  any invocation may make the current Empty cohort become
 135  *  Accumulating, and that the kernel will automatically move a cohort
 136  *  from Running to Ready or from Done to Forgotten, when appropriate.
 137  *
 138  *
 139  * Algorithm:
 140  *
 141  *   1. Open the cohort file (O_CREAT|O_RDWR)   so our cohort is
 142  *                                                 Accumulating/Ready/
 143  *                                                    Running/Done
 144  *
 145  *   2. Acquire lock (see below)                so lockholder's cohort is
 146  *                                                 Accumulating/Ready/Done
 147  *   3. fstat the open cohort file
 148  *         If it is nonempty:                      Done
 149  *          Read status from it and exit.
 150  *         Otherwise, if nonzero link count:       Accumulating
 151  *          Unlink the cohort filename
 152  *         Otherwise:                              Ready
 153  *
 154  *   4. Fork and run the command                   Running
 155  *       and wait for it
 156  *
 157  *   5. Write the wait status to the cohort file   Done
 158  *
 159  *
 160  *   6. Release the lock                        so we are no longer lockholder
 161  *                                              but our cohort is still
 162  *                                                 Done
 163  *
 164  *   8. Exit                                       Done/Forgotten
 165  *
 166  *  If an invocation crashes (ie, if watershed itself fails, rather
 167  *  than if the command does) then that invocation's caller will be
 168  *  informed of the error.
 169  *
 170  *  If the lockholder crashes with the cohort in:
 171  *
 172  *     Accumulating:
 173  *       The cohort remains in Accumulating and another invocation can
 174  *       become the lockholder.  If there are never any other
 175  *       invocations then the lockfile and cohort file will not be
 176  *       cleaned up (see below).
 177  *
 178  *     Running/Ready:
 179  *       The cohort goes from Running back to Ready (see above) and
 180  *       another invocation in the same cohort will become the
 181  *       lockholder and run it.  If there is no other invocation in
 182  *       the cohort the cohort goes to Forgotten although the lockfile
 183  *       will not be cleaned up - see below.
 184  *
 185  *     Done:
 186  *       If there are no more invocations, the cohort is Forgotten but
 187  *       the lockfile is not cleaned up.
 188  *
 189  * Lockfile:
 190  *
 191  *  There is one lock for all cohorts.  The lockholder is the
 192  *  invocation which holds the fcntl lock on the file whose name is
 193  *  the lockfile.  The lockholder (and no-one else) may unlink the
 194  *  lockfile.
 195  *
 196  *  To acquire the lock:
 197  *
 198  *   1. Open the lockfile (O_CREAT|O_RDWR)
 199  *   2. Acquire fcntl lock (F_SETLKW)
 200  *   3. fstat the open lockfile and stat the lockfile filenmae
 201  *      If inode numbers disagree, close lockfile and start
 202  *      again from the beginning.
 203  *
 204  *  To release the lock, unlink the lockfile and then either close it
 205  *  or exit.  Crashing will also release the lock but leave the
 206  *  lockfile lying around (which is slightly untidy but not
 207  *  incorrect); if this is a problem a cleanup task could periodically
 208  *  acquire and release the lock for each lockfile found:
 209  *
 210  * Cleanup:
 211  *
 212  *  As described above and below, stale cohort files and lockfiles can
 213  *  result from invocations which crashed if the same command is never
 214  *  run again.  Such cohorts are always in Empty or Accumulating.
 215  *
 216  *  If it became necessary to clean up stale cohort files and
 217  *  lockfiles resulting from crashes, the following algorithm should
 218  *  be executed for each lockfile found, as a cleanup task:
 219  *
 220  *   1. Acquire the lock.
 221  *      This makes us the lockholder.           and the current cohort is in
 222  *                                                 Empty/Accumulating
 223  *
 224  *                                              so now that cohort is
 225  *   2. Unlink the cohort file, ignoring ENOENT.   Ready/Forgotten
 226  *   3. Release the lock.                          Ready/Forgotten
 227  *   4. Exit.                                      Ready/Forgotten
 228  *
 229  *  This consists only of legal transitions, so if current cohort
 230  *  wasn't stale, it will have been moved to Ready and some other
 231  *  invocation in this cohort will become the lockholder and as normal
 232  *  from step 4 of the main algorithm.  If the cohort was stale it
 233  *  will go to Forgotten straight away.
 234  *
 235  *  A suitable cleanup script, on a system with with-lock-ex, is: */
 236  //     #!/bin/sh
 237  //     set -e
 238  //     if [ $# != 1 ]; echo >&2 'usage: cleanup <statedir>'; exit 1; fi
 239  //     cd "$1"
 240  //     for f in ./*.lock; do
 241  //       with-lock-ex -w rm -f "${f%.lock}.cohort"
 242  //     done
 243 /*
 244  */
 245
 246 #define _GNU_SOURCE
 247
 248 #include <stdio.h>
 249 #include <stdlib.h>
 250 #include <string.h>
 251 #include <errno.h>
 252 #include <stdarg.h>
 253 #include <ctype.h>
 254 #include <assert.h>
 255
 256 #include <sys/types.h>
 257 #include <sys/stat.h>
 258 #include <sys/wait.h>
 259 #include <unistd.h>
 260 #include <fcntl.h>
 261 #include <getopt.h>
 262 #include <locale.h>
 263 #include <libintl.h>
 264
 265 #include <nettle/sha.h>
 266
 267 static const struct option os[]= {
 268   { "--state-dir", 1,0,'d' },
 269   { "--command-id",1,0,'i' },
 270   { 0 }
 271 };
 272
 273 static const char *state_dir, *command_id, *command;
 274 static const char *lock_path, *cohort_path;
 275
 276 static int cohort_fd, lock_fd;
 277
 278
 279 #define _(x) gettext(x)
 280
 281 #define NOEINTR_TYPED(type,assign) do{                  \
 282     while ((assign)==(type)-1 && errno==EINTR) {}       \
 283   }while(0)
 284
 285 #define NOEINTR(assign) \
 286     NOEINTR_TYPED(int,(assign))
 287
 288 #define CHECKED(value,what) do{                 \
 289     NOEINTR(r= (value));                        \
 290     if (r<0) diee((what));                      \
 291   }while(0)
 292
 293
 294 static void badusage(void) {
 295   fputs(_("usage: watershed [<options>] <command>...\n"
 296           "options: -d|--state-dir <directory>  -i|--command-id <id>\n"),
 297           stderr);
 298   exit(127);
 299 }
 300 static void die(const char *m) {
 301   fprintf(stderr,_("watershed: error: %s\n"), m);
 302   exit(127);
 303 }
 304 static void diee(const char *m) {
 305   fprintf(stderr,_("watershed: error: %s failed: %s\n"), m, strerror(errno));
 306   exit(127);
 307 }
 308 static void dieep(const char *action, const char *path) {
 309   fprintf(stderr,_("watershed: error: could not %s `%s': %s\n"),
 310           action, path, strerror(errno));
 311   exit(127);
 312 }
 313
 314 static char *m_vasprintf(const char *fmt, va_list al) {
 315   char *s;  int r;
 316   r= vasprintf(&s,fmt,al);
 317   if (r==-1) diee("vasprintf");
 318   return s;
 319 }
 320 static char *m_asprintf(const char *fmt, ...) {
 321   char *s;  va_list al;
 322   va_start(al,fmt); s= m_vasprintf(fmt,al); va_end(al);
 323   return s;
 324 }
 325
 326 static void parse_args(int argc, char *const *argv) {
 327   int o;
 328   for (;;) {
 329     o= getopt_long(argc, argv, "+d:i:", os,0);
 330     if (o==-1) break;
 331     switch (o) {
 332     case 'd': state_dir= optarg; break;
 333     case 'i': command_id= optarg; break;
 334     default: badusage();
 335     }
 336   }
 337   command= argv[optind];
 338   if (!command) badusage();
 339   if (!state_dir) state_dir= getenv("WATERSHED_STATEDIR");
 340   if (!state_dir) {
 341     uid_t u= geteuid();  if (u==(uid_t)-1) diee("getuid");
 342     if (u) {
 343       const char *home= getenv("HOME");
 344       if (!home) die(_("HOME not set, no --state-dir option"
 345                        " supplied, not root"));
 346       state_dir= m_asprintf("%s/.watershed", home);
 347     } else {
 348       state_dir= "/var/run/watershed";
 349     }
 350   }
 351   if (!command_id) {
 352     char *const *ap;
 353     struct sha256_ctx sc;
 354     unsigned char dbuf[SHA256_DIGEST_SIZE], *p;
 355     char *construct, *q;
 356     int i, c;
 357
 358     sha256_init(&sc);
 359     for (ap= argv+optind; *ap; ap++) sha256_update(&sc,strlen(*ap)+1,*ap);
 360     sha256_digest(&sc,sizeof(dbuf),dbuf);
 361
 362     construct= m_asprintf("%*s#%.32s", (int)sizeof(dbuf)*2,"", command);
 363     for (i=sizeof(dbuf), p=dbuf, q=construct; i; i--,p++,q+=2)
 364       sprintf(q,"%02x",*p);
 365     *q++= '=';
 366     while ((c=*q++)) {
 367       if (!(c=='-' || c=='+' || c=='_' || isalnum((unsigned char)c)))
 368         q[-1]= '?';
 369     }
 370     command_id= construct;
 371   }
 372
 373   lock_path= m_asprintf("%s/%s.lock", state_dir, command_id);
 374   cohort_path= m_asprintf("%s/%s.cohort", state_dir, command_id);
 375 }
 376
 377 static void acquire_lock(void) {
 378   struct stat current_stab, our_stab;
 379   struct flock fl;
 380   int r;
 381
 382   for (;;) {
 383     NOEINTR( lock_fd= open(lock_path, O_CREAT|O_RDWR, 0600) );
 384     if (lock_fd<0) diee("open lock");
 385
 386     memset(&fl,0,sizeof(fl));
 387     fl.l_type= F_WRLCK;
 388     fl.l_whence= SEEK_SET;
 389     CHECKED( fcntl(lock_fd, F_SETLKW, &fl), "acquire lock" );
 390
 391     CHECKED( fstat(lock_fd, &our_stab), "fstat our lock");
 392
 393     NOEINTR( r= stat(lock_path, &current_stab) );
 394     if (!r &&
 395         our_stab.st_ino == current_stab.st_ino &&
 396         our_stab.st_dev == current_stab.st_dev) break;
 397     if (r && errno!=ENOENT) diee("fstat current lock");
 398
 399     close(lock_fd);
 400   }
 401 }
 402 static void release_lock(void) {
 403   int r;
 404   CHECKED( unlink(lock_path), "unlink lock");
 405 }
 406
 407 static void report(int status) {
 408   int v;
 409   if (WIFEXITED(status)) {
 410     v= WEXITSTATUS(status);
 411     if (v) fprintf(stderr,_("watershed: `%s' failed with error exit status %d"
 412                             " (in another invocation)\n"), command, v);
 413     exit(status);
 414   }
 415   if (WIFSIGNALED(status)) {
 416     v= WTERMSIG(status); assert(v);
 417     if (v == SIGPIPE) raise(v);
 418     fprintf(stderr,
 419             WCOREDUMP(status)
 420             ? _("watershed: `%s' died due to fatal signal %s (core dumped)\n")
 421             : _("watershed: `%s' died due to fatal signal %s\n"),
 422             command, strsignal(v));
 423   } else {
 424     fprintf(stderr, _("watershed: `%s' failed with"
 425                       " crazy wait status 0x%x\n"), command, status);
 426   }
 427   exit(127);
 428 }
 429
 430 int main(int argc, char *const *argv) {
 431   int status, r, dir_created=0, l;
 432   unsigned char *p;
 433   struct stat cohort_stab;
 434   pid_t c, c2;
 435
 436   setlocale(LC_MESSAGES,""); /* not LC_ALL, see use of isalnum below */
 437   parse_args(argc,argv);
 438
 439   for (;;) {
 440     NOEINTR( cohort_fd= open(cohort_path, O_CREAT|O_RDWR, 0644) );
 441     if (cohort_fd>=0) break;
 442     if (errno!=ENOENT) dieep(_("open/create cohort state file"), cohort_path);
 443     if (dir_created++) die("open cohort state file still ENOENT after mkdir");
 444     NOEINTR( r= mkdir(state_dir,0700) );
 445     if (r && errno!=EEXIST) dieep(_("create state directory"), state_dir);
 446   }
 447
 448   acquire_lock();
 449
 450   CHECKED( fstat(cohort_fd, &cohort_stab), "fstat our cohort");
 451   if (cohort_stab.st_size) {
 452     if (cohort_stab.st_size < sizeof(status))
 453       die(_("cohort status file too short (disk full?)"));
 454     else if (cohort_stab.st_size != sizeof(status))
 455       die("cohort status file too long");
 456     NOEINTR( r= read(cohort_fd,&status,sizeof(status)) );
 457     if (r==-1) diee("read cohort");
 458     if (r!=sizeof(status)) die("cohort file read wrong length");
 459     release_lock(); report(status);
 460   }
 461
 462   if (cohort_stab.st_nlink)
 463     CHECKED( unlink(cohort_path), "unlink our cohort");
 464
 465   NOEINTR_TYPED(pid_t, c= fork() );  if (c==(pid_t)-1) diee("fork");
 466   if (!c) {
 467     close(cohort_fd); close(lock_fd);
 468     execvp(command, argv+optind);
 469     fprintf(stderr,_("watershed: failed to execute `%s': %s\n"),
 470             command, strerror(errno));
 471     exit(127);
 472   }
 473
 474   NOEINTR( c2= waitpid(c, &status, 0) );
 475   if (c2==(pid_t)-1) diee("waitpid");
 476   if (c2!=c) die("waitpid gave wrong pid");
 477
 478   for (l=sizeof(status), p=(void*)&status; l>0; l-=r, p+=r)
 479     CHECKED( write(cohort_fd,p,l), _("write result status"));
 480
 481   release_lock();
 482   if (!WIFEXITED(status)) report(status);
 483   exit(WEXITSTATUS(status));
 484 }