chiark - git - mdw - mLib/blob - sys/mdup.c

   1 /* -*-c-*-
   2  *
   3  * Duplicate multiple files
   4  *
   5  * (c) 2008 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This file is part of the mLib utilities library.
  11  *
  12  * mLib is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU Library General Public License as
  14  * published by the Free Software Foundation; either version 2 of the
  15  * License, or (at your option) any later version.
  16  *
  17  * mLib is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU Library General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Library General Public
  23  * License along with mLib; if not, write to the Free
  24  * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  25  * MA 02111-1307, USA.
  26  */
  27
  28 /*----- Header files ------------------------------------------------------*/
  29
  30 #include <errno.h>
  31 #include <stdlib.h>
  32
  33 #include <unistd.h>
  34
  35 #include "alloc.h"
  36 #include "mdup.h"
  37
  38 /*----- Data structures ---------------------------------------------------*/
  39
  40 typedef struct mdup_fdinfo {
  41
  42   mdup_fd *f;
  43   /* Each @fdinfo@ structure refers to one of the caller's @fd@ structures.
  44    * This is it.
  45    */
  46
  47   struct mdup_fdinfo *eqnext, *eqprev;
  48   /* The caller's request list can contain more than one entry with any given
  49    * @cur@ descriptor.  We group them together into an equivalence class,
  50    * which is doubly linked using these fields.
  51    */
  52
  53   struct mdup_fdinfo *up;
  54   /* We require that there be at most one node with any given @want@
  55    * descriptor (other than @-1@).  There is therefore at most one node whose
  56    * @want@ is equal to my @cur@.  If such a node exists, @up@ points to it;
  57    * otherwise @up@ is null.
  58    */
  59
  60   struct mdup_fdinfo *down;
  61   /* Obviously, @down@ links in the opposite direction from @up@.  However,
  62    * there may be several nodes whose @cur@ equals my @want@; therefore
  63    * @down@ simply links to one of the nodes in the equivalence class.
  64    *
  65    * Unsurprisingly, @down@ is the direction we move during the depth-first
  66    * traversal phase of the operation.
  67    */
  68
  69   struct mdup_fdinfo *dlink;
  70   /* Nodes with @want == -1@, and nodes where we've broken cycles, are
  71    * considered `dynamic': their @cur@ has been chosen by @dup@ to be
  72    * distinct from any existing descriptor, but may collide with a @want@.
  73    * We check each proposed move against the list of dynamic nodes, and move
  74    * them out of the way as necessary.  Note that this is really a list of
  75    * equivalence classes rather than single nodes.
  76    */
  77
  78   unsigned state;
  79   /* The current state of this node.  One of the @ST@ constants described
  80    * below.
  81    */
  82 } mdup_fdinfo;
  83
  84 enum {
  85   ST_READY,
  86   /* Node has not yet been processed.
  87    */
  88
  89   ST_MARK,
  90   /* Node has been reached by the depth-first traversal, but its descriptor
  91    * has not yet been moved.  This state is used to detect cycles using the
  92    * depth-first traversal.
  93    */
  94
  95   ST_DONE,
  96   /* Node has been processed completely.  We have @want == -1@ or
  97    * @want == cur@.
  98    */
  99
 100   ST_BROKEN,
 101   /* Node has been clobbered in order to break a cycle.  The node's
 102    * equivalence class has been remapped to a fresh descriptor which (we
 103    * hope) is not equal to any node's @want@.  All broken nodes are put on
 104    * the dynamic list: if our hope turns out to be misplaced we can remap the
 105    * class again.
 106    */
 107 };
 108
 109 /*----- Main code ---------------------------------------------------------*/
 110
 111 /* --- @DO_EQUIVS@ --- *
 112  *
 113  * Perform @body@ once for each @g@ in the equivalence class of @f@.
 114  */
 115
 116 #define DO_EQUIVS(g, f, body) do {                                      \
 117   mdup_fdinfo *f_ = (f), *g_ = f_;                                      \
 118   do { mdup_fdinfo *g = g_; g_ = g_->eqnext; body; } while (g_ != f_);  \
 119 } while (0)
 120
 121 /* --- @dump@ --- *
 122  *
 123  * Arguments:   @mdup_fdinfo *v@ = pointer to info vector
 124  *              @size_t n@ = size of vector
 125  *
 126  * Returns:     ---
 127  *
 128  * Use:         Dumps a scary-looking description of the state of @mdup@'s
 129  *              workings.
 130  */
 131
 132 #ifdef DEBUG
 133
 134 #include <stdarg.h>
 135 #include <stdio.h>
 136
 137 #include "macros.h"
 138
 139 #define D(x) x
 140
 141 static PRINTF_LIKE(4, 5) IGNORABLE
 142   void dump(mdup_fdinfo *v, size_t n, mdup_fdinfo *dhead,
 143             const char *fmt, ...)
 144 {
 145   int i;
 146   mdup_fdinfo *f, *g;
 147   static const char *state[] = { "READY", "MARK", "DONE", "BROKEN" };
 148   va_list ap;
 149
 150 #define INDEX(p) ((p) ? (int)((p) - (v)) : -1)
 151
 152   /* --- Dump the items, fairly raw --- */
 153
 154   va_start(ap, fmt);
 155   fputs("*** ", stdout);
 156   vprintf(fmt, ap);
 157   putchar('\n');
 158   for (i = 0; i < n; i++) {
 159     f = &v[i];
 160     printf("%3d: %-6s %3d -> %3d; "
 161            "equivs: %3d, %3d; up: %3d; down: %3d; dyn: %3d\n",
 162            i, state[f->state], f->f->cur, f->f->want,
 163            INDEX(f->eqprev), INDEX(f->eqnext),
 164            INDEX(f->up), INDEX(f->down), INDEX(f->dlink));
 165   }
 166   putchar('\n');
 167   va_end(ap);
 168
 169 #undef INDEX
 170 }
 171
 172 #else
 173
 174 #define D(x)
 175
 176 #endif
 177
 178 /* --- @dfs@ --- *
 179  *
 180  * Arguments:   @mdup_fdinfo *f@ = which node to process
 181  *              @mdup_fdinfo **dhead, ***dtail@ = the dynamic list
 182  *
 183  * Returns:     Zero on success, @-1@ on some OS failure.
 184  *
 185  * Use:         Recursive depth-first traversal of the descriptor graph.
 186  *
 187  *              On exit, the node @f@ will be in state @ST_DONE@ or
 188  *              @ST_BROKEN@.
 189  */
 190
 191 static int dfs(mdup_fdinfo *f, mdup_fdinfo **dhead, mdup_fdinfo ***dtail)
 192 {
 193   mdup_fdinfo *d;
 194   mdup_fd *ff;
 195   int can_close_p = 1;
 196   int fd, ofd;
 197   int e;
 198
 199   /* --- Null pointers need no processing --- *
 200    *
 201    * Null pointers mark the end of descending chains.
 202    */
 203
 204   if (!f)
 205     return (0);
 206
 207   /* --- Otherwise our behaviour depends on the node's state --- */
 208
 209   switch (f->state) {
 210
 211     /* --- The standard processing, in several phases --- */
 212
 213     case ST_READY:
 214
 215       /* --- Mark the class as being in-progress --- */
 216
 217       DO_EQUIVS(g, f, { g->state = ST_MARK; });
 218
 219       /* --- Ensure that the our proposed destination is clear --- *
 220        *
 221        * The depth-first traversal will leave the node in @ST_DONE@ or
 222        * @ST_BROKEN@ afterwards; either way, its @cur@ will not be same as
 223        * our @want@.
 224        *
 225        * Note that this can move @%\emph{us}@ to @ST_BROKEN@.  This is not a
 226        * significant problem.
 227        */
 228
 229       DO_EQUIVS(g, f, { if (dfs(g->down, dhead, dtail)) return (-1); });
 230
 231       /* --- Now the real work can begin --- *
 232        *
 233        * For each node in the class, copy the descriptor from @cur@ to
 234        * @want@.  Before doing this, we must move out of the way any (other)
 235        * dynamic nodes whose @cur@ matches our @want@.
 236        *
 237        * Interestingly, this is the only point in the function where we need
 238        * nontrivial error handling: if something goes wrong with one of the
 239        * @dup2@ calls, we must close the descriptors made so far this pass
 240        * before returning.
 241        */
 242
 243       ofd = f->f->cur;
 244       DO_EQUIVS(g, f, {
 245         ff = g->f;
 246         for (d = *dhead; d; d = d->dlink) {
 247           if (d != f && d->f->cur == ff->want) {
 248             if ((fd = dup(ff->want)) < 0)
 249               goto fail;
 250             DO_EQUIVS(dd, d, { dd->f->cur = fd; });
 251             close(ff->want);
 252           }
 253         }
 254         if (ff->cur == ff->want)
 255           can_close_p = 0;
 256         else if (dup2(ofd, ff->want) < 0)
 257           goto fail;
 258         goto ok;
 259       fail:
 260         e = errno;
 261         for (g = g->eqprev; g != f->eqprev; g = g->eqprev) {
 262           if (g->f->want != g->f->cur)
 263             close(g->f->want);
 264         }
 265         errno = e;
 266         return (-1);
 267       ok:;
 268       });
 269
 270       /* --- We're done --- *
 271        *
 272        * If the original descriptor isn't wanted by anyone we can (and must)
 273        * close it.  Nodes can now move to @ST_DONE@.
 274        */
 275
 276       if (can_close_p)
 277         close(ofd);
 278       DO_EQUIVS(g, f, {
 279         g->f->cur = g->f->want;
 280         g->state = ST_DONE;
 281       });
 282       break;
 283
 284     /* --- We have encoutered a cycle --- *
 285      *
 286      * The caller wants our descriptor.  We therefore shunt this entire
 287      * equivalence class to a new descriptor, and link it onto the dynamic
 288      * list.  Mark it as broken so that we don't try to do anything
 289      * complicated to it again.
 290      */
 291
 292     case ST_MARK:
 293       ofd = f->f->cur;
 294       if ((fd = dup(ofd)) < 0)
 295         return (-1);
 296       DO_EQUIVS(g, f, {
 297         g->f->cur = fd;
 298         g->state = ST_BROKEN;
 299       });
 300       f->dlink = **dtail;
 301       **dtail = f;
 302       close(ofd);
 303       break;
 304
 305     /* --- Nothing to be done here --- *
 306      *
 307      * @ST_DONE@ nodes have already been completely processed; @ST_BROKEN@
 308      * nodes will be fixed up after the main traversal.
 309      */
 310
 311     case ST_DONE:
 312     case ST_BROKEN:
 313       return (0);
 314
 315   }
 316   return (0);
 317 }
 318
 319 /* --- @mdup@ --- *
 320  *
 321  * Arguments:   @mdup_fd *v@ = pointer to @mdup_fd@ vector
 322  *              @size_t n@ = size of vector
 323  *
 324  * Returns:     Zero if successful, @-1@ on failure.
 325  *
 326  * Use:         Rearranges file descriptors.
 327  *
 328  *              The vector @v@ consists of a number of @mdup_fd@ structures.
 329  *              Each `slot' in the table represents a file.  The slot's @cur@
 330  *              member names the current file descriptor for this file; the
 331  *              @want@ member is the file descriptor we want to use for it.
 332  *              if you want to keep a file alive but don't care which
 333  *              descriptor it ends up with, set @want = -1@.  Several slots
 334  *              may specify the same @cur@ descriptor; but they all have to
 335  *              declare different @want@s (except that several slots may have
 336  *              @want = -1@.
 337  *
 338  *              On successful exit, the function will have rearranged the
 339  *              file descriptors as requested.  To reflect this, the @cur@
 340  *              members will all be set to match the (non-@-1@) @want@
 341  *              members.
 342  *
 343  *              If there is a failure, then some rearrangement may have been
 344  *              performed and some not; the @cur@ members are set to reflect
 345  *              which file descriptors are to be used.  The old file
 346  *              descriptors are closed.  (This is different from usual @dup@
 347  *              behaviour, of course, but essential for reliable error
 348  *              handling.)  If you want to keep a particular source file
 349  *              descriptor open as well as make a new copy then specify two
 350  *              slots with the same @cur@, one with @want = cur@ and one with
 351  *              the desired output descriptor.
 352  *
 353  *              This function works correctly even if the desired remappings
 354  *              contain cycles.
 355  */
 356
 357 int mdup(mdup_fd *v, size_t n)
 358 {
 359   size_t i, j;
 360   mdup_fdinfo *vv;
 361   mdup_fdinfo *f, *g, *dhead, **dtail;
 362   mdup_fd *ff;
 363   int rc = -1;
 364   int can_close_p;
 365   int ofd, fd;
 366
 367   /* --- Allocate and initialize the table of info nodes --- *
 368    *
 369    * Each entry @ff@ in the caller's @v@ array will have a corresponding node
 370    * @f@ in @vv@ with @f->f = ff@.  Initially each node's links are null, and
 371    * the node is in the @ST_READY@ state.
 372    *
 373    * We also initialize a list given by @dhead@ and @dtail@ containing the
 374    * entries with `dynamically-assigned' descriptors -- i.e., those whose
 375    * values we made up using @dup@.  The list lets us detect collisions with
 376    * explicitly requested descriptors and move the dynamic ones out of the
 377    * way.
 378    */
 379
 380   if (!NEWV_SAFE_P(vv, n) || (vv = malloc(n*sizeof(*vv))) == 0)
 381     return (-1);
 382
 383   dhead = 0;
 384   dtail = &dhead;
 385   for (i = 0; i < n; i++) {
 386     f = &vv[i];
 387     f->f = &v[i];
 388     f->up = f->down = 0;
 389     f->eqnext = f->eqprev = 0;
 390     f->state = ST_READY;
 391   }
 392
 393   /* --- Pass one: link the graph together --- *
 394    *
 395    * Once this pass is complete, the following properties will hold.
 396    *
 397    *   * The nodes which have the same @cur@ are linked together by their
 398    *     @eqnext@ and @eqprev@ fields into a doubly-linked circular list
 399    *     representing this equivalence class.
 400    *
 401    *   * @f->up == g@ if and only if @f->f->cur == g->f->want@.  (Note that
 402    *     @want@ fields are unique according to our interface.  We detect
 403    *     violations and exit with @errno == EINVAL@.)
 404    *
 405    *   * If @f->up == g@ then there exists a @ff@ in the same equivalence
 406    *     class (and therefore on @f@'s @eqnext@ list) as @f@ with
 407    *     @g->down == ff@.
 408    */
 409
 410   for (i = 0; i < n; i++) {
 411     f = &vv[i];
 412     if (!f->eqnext)
 413       f->eqnext = f->eqprev = f;
 414     for (j = 0; j < n; j++) {
 415       if (i == j)
 416         continue;
 417       g = &vv[j];
 418       if (f->f->cur == g->f->cur) {
 419         if (!g->eqnext) {
 420           g->eqnext = f->eqnext;
 421           g->eqprev = f;
 422           f->eqnext->eqprev = g;
 423           f->eqnext = g;
 424         }
 425       }
 426       if (g->f->want == -1)
 427         /* fine */;
 428       else if (f->f->want == g->f->want) {
 429         errno = EINVAL;
 430         goto fail;
 431       } else if (f->f->cur == g->f->want) {
 432         f->up = g;
 433         if (!g->down)
 434           g->down = f;
 435       }
 436     }
 437   }
 438
 439   /* --- Pass two: handle don't-care requests --- *
 440    *
 441    * By the end of this pass, we have the following properties.
 442    *
 443    *   * Every node will be marked @ST_DONE@.  This is a temporary abuse of
 444    *     the @ST_DONE@ state which will be rectified during the next pass.
 445    *
 446    *   * Every node with @want == -1@ will have @cur@ set to a freshly
 447    *     allocated file descriptor distinct from every previously open file.
 448    */
 449
 450   for (i = 0; i < n; i++) {
 451     f = &vv[i];
 452     switch (f->state) {
 453       case ST_DONE:
 454         break;
 455       case ST_READY:
 456         can_close_p = 1;
 457         DO_EQUIVS(g, f, {
 458           ff = g->f;
 459           ofd = ff->cur;
 460           if (ff->want != -1)
 461             can_close_p = 0;
 462           else {
 463             if ((fd = dup(ofd)) < 0)
 464               goto fail;
 465             ff->cur = fd;
 466           }
 467           g->state = ST_DONE;
 468         });
 469         if (can_close_p)
 470           close(ofd);
 471         break;
 472     }
 473   }
 474
 475   /* --- Pass three: restore equivalence classes and @down@ links --- *
 476    *
 477    * This pass re-establishes the properties from pass one.  Because we've
 478    * changed some @cur@ members, the equivalence classes will have changed,
 479    * so we must fix up the @eqnext@ lists and @down@ links.
 480    *
 481    * Nodes with @want == -1@ are now finished with (modulo tweaking
 482    * dynamically allocated descriptors as we process the others), so we leave
 483    * them in @ST_DONE@; other nodes are restored to @ST_READY@.
 484    */
 485
 486   for (i = 0; i < n; i++) {
 487     f = &vv[i];
 488     ff = f->f;
 489     if (ff->want == -1) {
 490       f->eqnext->eqprev = f->eqprev;
 491       f->eqprev->eqnext = f->eqnext;
 492       f->eqnext = f->eqprev = f;
 493       f->dlink = *dtail;
 494       *dtail = f;
 495     } else
 496       f->state = ST_READY;
 497   }
 498
 499   /* --- Pass four: main depth-first traversal --- *
 500    *
 501    * See the description of the function @dfs@ above.  After this pass, every
 502    * node is in state @ST_DONE@ or @ST_BROKEN@.
 503    */
 504
 505   for (i = 0; i < n; i++) {
 506     if (dfs(&vv[i], &dhead, &dtail))
 507       goto fail;
 508   }
 509
 510   /* --- Finished --- */
 511
 512   rc = 0;
 513 fail:
 514   free(vv);
 515   return (rc);
 516 }
 517
 518 /*----- That's all, folks -------------------------------------------------*/