chiark - git - mdw - mLib/blob - mdup.c

   1 /* -*-c-*-
   2  *
   3  * Duplicate multiple files
   4  *
   5  * (c) 2008 Straylight/Edgeware
   6  */
   7
   8 /*----- Licensing notice --------------------------------------------------*
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software Foundation,
  22  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  23  */
  24
  25 /*----- Header files ------------------------------------------------------*/
  26
  27 #include <errno.h>
  28 #include <stdlib.h>
  29
  30 #include <unistd.h>
  31
  32 #include "mdup.h"
  33
  34 /*----- Data structures ---------------------------------------------------*/
  35
  36 typedef struct mdup_fdinfo {
  37
  38   mdup_fd *f;
  39   /* Each @fdinfo@ structure refers to one of the caller's @fd@ structures.
  40    * This is it.
  41    */
  42
  43   struct mdup_fdinfo *eqnext, *eqprev;
  44   /* The caller's request list can contain more than one entry with any given
  45    * @cur@ descriptor.  We group them together into an equivalence class,
  46    * which is doubly linked using these fields.
  47    */
  48
  49   struct mdup_fdinfo *up;
  50   /* We require that there be at most one node with any given @want@
  51    * descriptor (other than @-1@).  There is therefore at most one node whose
  52    * @want@ is equal to my @cur@.  If such a node exists, @up@ points to it;
  53    * otherwise @up@ is null.
  54    */
  55
  56   struct mdup_fdinfo *down;
  57   /* Obviously, @down@ links in the opposite direction from @up@.  However,
  58    * there may be several nodes whose @cur@ equals my @want@; therefore
  59    * @down@ simply links to one of the nodes in the equivalence class.
  60    *
  61    * Unsurprisingly, @down@ is the direction we move during the depth-first
  62    * traversal phase of the operation.
  63    */
  64
  65   struct mdup_fdinfo *dlink;
  66   /* Nodes with @want == -1@, and nodes where we've broken cycles, are
  67    * considered `dynamic': their @cur@ has been chosen by @dup@ to be
  68    * distinct from any existing descriptor, but may collide with a @want@.
  69    * We check each proposed move against the list of dynamic nodes, and move
  70    * them out of the way as necessary.  Note that this is really a list of
  71    * equivalence classes rather than single nodes.
  72    */
  73
  74   unsigned state;
  75   /* The current state of this node.  One of the @ST@ constants described
  76    * below.
  77    */
  78 } mdup_fdinfo;
  79
  80 enum {
  81   ST_READY,
  82   /* Node has not yet been processed.
  83    */
  84
  85   ST_MARK,
  86   /* Node has been reached by the depth-first traversal, but its descriptor
  87    * has not yet been moved.  This state is used to detect cycles using the
  88    * depth-first traversal.
  89    */
  90
  91   ST_DONE,
  92   /* Node has been processed completely.  We have @want == -1@ or
  93    * @want == cur@.
  94    */
  95
  96   ST_BROKEN,
  97   /* Node has been clobbered in order to break a cycle.  The node's
  98    * equivalence class has been remapped to a fresh descriptor which (we
  99    * hope) is not equal to any node's @want@.  All broken nodes are put on
 100    * the dynamic list: if our hope turns out to be misplaced we can remap the
 101    * class again.
 102    */
 103 };
 104
 105 /*----- Main code ---------------------------------------------------------*/
 106
 107 /* --- @DO_EQUIVS@ --- *
 108  *
 109  * Perform @body@ once for each @g@ in the equivalence class of @f@.
 110  */
 111
 112 #define DO_EQUIVS(g, f, body) do {                                      \
 113   mdup_fdinfo *f_ = (f), *g_ = f_;                                      \
 114   do { mdup_fdinfo *g = g_; g_ = g_->eqnext; body; } while (g_ != f_);  \
 115 } while (0)
 116
 117 /* --- @dump@ --- *
 118  *
 119  * Arguments:   @mdup_fdinfo *v@ = pointer to info vector
 120  *              @size_t n@ = size of vector
 121  *
 122  * Returns:     ---
 123  *
 124  * Use:         Dumps a scary-looking description of the state of @mdup@'s
 125  *              workings.
 126  */
 127
 128 #ifdef DEBUG
 129
 130 #include <stdarg.h>
 131 #include <stdio.h>
 132
 133 #define D(x) x
 134
 135 static void dump(mdup_fdinfo *v, size_t n, mdup_fdinfo *dhead,
 136                  const char *fmt, ...)
 137 {
 138   int i;
 139   mdup_fdinfo *f, *g;
 140   static const char *state[] = { "READY", "MARK", "DONE", "BROKEN" };
 141   va_list ap;
 142
 143 #define INDEX(p) ((p) ? (int)((p) - (v)) : -1)
 144
 145   /* --- Dump the items, fairly raw --- */
 146
 147   va_start(ap, fmt);
 148   fputs("*** ", stdout);
 149   vprintf(fmt, ap);
 150   putchar('\n');
 151   for (i = 0; i < n; i++) {
 152     f = &v[i];
 153     printf("%3d: %-6s %3d -> %3d; "
 154            "equivs: %3d, %3d; up: %3d; down: %3d; dyn: %3d\n",
 155            i, state[f->state], f->f->cur, f->f->want,
 156            INDEX(f->eqprev), INDEX(f->eqnext),
 157            INDEX(f->up), INDEX(f->down), INDEX(f->dlink));
 158   }
 159   putchar('\n');
 160   va_end(ap);
 161
 162 #undef INDEX
 163 }
 164
 165 #else
 166
 167 #define D(x)
 168
 169 #endif
 170
 171 /* --- @dfs@ --- *
 172  *
 173  * Arguments:   @mdup_fdinfo *f@ = which node to process
 174  *              @mdup_fdinfo **dhead, ***dtail@ = the dynamic list
 175  *
 176  * Returns:     Zero on success, @-1@ on some OS failure.
 177  *
 178  * Use:         Recursive depth-first traversal of the descriptor graph.
 179  *
 180  *              On exit, the node @f@ will be in state @ST_DONE@ or
 181  *              @ST_BROKEN@.
 182  */
 183
 184 static int dfs(mdup_fdinfo *f, mdup_fdinfo **dhead, mdup_fdinfo ***dtail)
 185 {
 186   mdup_fdinfo *d;
 187   mdup_fd *ff;
 188   int can_close_p = 1;
 189   int fd, ofd;
 190   int e;
 191
 192   /* --- Null pointers need no processing --- *
 193    *
 194    * Null pointers mark the end of descending chains.
 195    */
 196
 197   if (!f)
 198     return (0);
 199
 200   /* --- Otherwise our behaviour depends on the node's state --- */
 201
 202   switch (f->state) {
 203
 204     /* --- The standard processing, in several phases --- */
 205
 206     case ST_READY:
 207
 208       /* --- Mark the class as being in-progress --- */
 209
 210       DO_EQUIVS(g, f, { g->state = ST_MARK; });
 211
 212       /* --- Ensure that the our proposed destination is clear --- *
 213        *
 214        * The depth-first traversal will leave the node in @ST_DONE@ or
 215        * @ST_BROKEN@ afterwards; either way, its @cur@ will not be same as
 216        * our @want@.
 217        *
 218        * Note that this can move @%\emph{us}@ to @ST_BROKEN@.  This is not a
 219        * significant problem.
 220        */
 221
 222       DO_EQUIVS(g, f, { if (dfs(g->down, dhead, dtail)) return (-1); });
 223
 224       /* --- Now the real work can begin --- *
 225        *
 226        * For each node in the class, copy the descriptor from @cur@ to
 227        * @want@.  Before doing this, we must move out of the way any (other)
 228        * dynamic nodes whose @cur@ matches our @want@.
 229        *
 230        * Interestingly, this is the only point in the function where we need
 231        * nontrivial error handling: if something goes wrong with one of the
 232        * @dup2@ calls, we must close the descriptors made so far this pass
 233        * before returning.
 234        */
 235
 236       ofd = f->f->cur;
 237       DO_EQUIVS(g, f, {
 238         ff = g->f;
 239         for (d = *dhead; d; d = d->dlink) {
 240           if (d != f && d->f->cur == ff->want) {
 241             if ((fd = dup(ff->want)) < 0)
 242               goto fail;
 243             DO_EQUIVS(dd, d, { dd->f->cur = fd; });
 244             close(ff->want);
 245           }
 246         }
 247         if (ff->cur == ff->want)
 248           can_close_p = 0;
 249         else if (dup2(ofd, ff->want) < 0)
 250           goto fail;
 251         goto ok;
 252       fail:
 253         e = errno;
 254         for (g = g->eqprev; g != f->eqprev; g = g->eqprev) {
 255           if (g->f->want != g->f->cur)
 256             close(g->f->want);
 257         }
 258         errno = e;
 259         return (-1);
 260       ok:;
 261       });
 262
 263       /* --- We're done --- *
 264        *
 265        * If the original descriptor isn't wanted by anyone we can (and must)
 266        * close it.  Nodes can now move to @ST_DONE@.
 267        */
 268
 269       if (can_close_p)
 270         close(ofd);
 271       DO_EQUIVS(g, f, {
 272         g->f->cur = g->f->want;
 273         g->state = ST_DONE;
 274       });
 275       break;
 276
 277     /* --- We have encoutered a cycle --- *
 278      *
 279      * The caller wants our descriptor.  We therefore shunt this entire
 280      * equivalence class to a new descriptor, and link it onto the dynamic
 281      * list.  Mark it as broken so that we don't try to do anything
 282      * complicated to it again.
 283      */
 284
 285     case ST_MARK:
 286       ofd = f->f->cur;
 287       if ((fd = dup(ofd)) < 0)
 288         return (-1);
 289       DO_EQUIVS(g, f, {
 290         g->f->cur = fd;
 291         g->state = ST_BROKEN;
 292       });
 293       f->dlink = **dtail;
 294       **dtail = f;
 295       close(ofd);
 296       break;
 297
 298     /* --- Nothing to be done here --- *
 299      *
 300      * @ST_DONE@ nodes have already been completely processed; @ST_BROKEN@
 301      * nodes will be fixed up after the main traversal.
 302      */
 303
 304     case ST_DONE:
 305     case ST_BROKEN:
 306       return (0);
 307
 308   }
 309   return (0);
 310 }
 311
 312 /* --- @mdup@ --- *
 313  *
 314  * Arguments:   @mdup_fd *v@ = pointer to @mdup_fd@ vector
 315  *              @size_t n@ = size of vector
 316  *
 317  * Returns:     Zero if successful, @-1@ on failure.
 318  *
 319  * Use:         Rearranges file descriptors.
 320  *
 321  *              The vector @v@ consists of a number of @mdup_fd@ structures.
 322  *              Each `slot' in the table represents a file.  The slot's @cur@
 323  *              member names the current file descriptor for this file; the
 324  *              @want@ member is the file descriptor we want to use for it.
 325  *              if you want to keep a file alive but don't care which
 326  *              descriptor it ends up with, set @want = -1@.  Several slots
 327  *              may specify the same @cur@ descriptor; but they all have to
 328  *              declare different @want@s (except that several slots may have
 329  *              @want = -1@.
 330  *
 331  *              On successful exit, the function will have rearranged the
 332  *              file descriptors as requested.  To reflect this, the @cur@
 333  *              members will all be set to match the (non-@-1@) @want@
 334  *              members.
 335  *
 336  *              If there is a failure, then some rearrangement may have been
 337  *              performed and some not; the @cur@ members are set to reflect
 338  *              which file descriptors are to be used.  The old file
 339  *              descriptors are closed.  (This is different from usual @dup@
 340  *              behaviour, of course, but essential for reliable error
 341  *              handling.)  If you want to keep a particular source file
 342  *              descriptor open as well as make a new copy then specify two
 343  *              slots with the same @cur@, one with @want = cur@ and one with
 344  *              the desired output descriptor.
 345  *
 346  *              This function works correctly even if the desired remappings
 347  *              contain cycles.
 348  */
 349
 350 int mdup(mdup_fd *v, size_t n)
 351 {
 352   size_t i, j;
 353   mdup_fdinfo *vv;
 354   mdup_fdinfo *f, *g, *dhead, **dtail;
 355   mdup_fd *ff;
 356   int rc = -1;
 357   int can_close_p;
 358   int ofd, fd;
 359
 360   /* --- Allocate and initialize the table of info nodes --- *
 361    *
 362    * Each entry @ff@ in the caller's @v@ array will have a corresponding node
 363    * @f@ in @vv@ with @f->f = ff@.  Initially each node's links are null, and
 364    * the node is in the @ST_READY@ state.
 365    *
 366    * We also initialize a list given by @dhead@ and @dtail@ containing the
 367    * entries with `dynamically-assigned' descriptors -- i.e., those whose
 368    * values we made up using @dup@.  The list lets us detect collisions with
 369    * explicitly requested descriptors and move the dynamic ones out of the
 370    * way.
 371    */
 372
 373   if ((vv = malloc(sizeof(*vv) * n)) == 0)
 374     return (-1);
 375
 376   dhead = 0;
 377   dtail = &dhead;
 378   for (i = 0; i < n; i++) {
 379     f = &vv[i];
 380     f->f = &v[i];
 381     f->up = f->down = 0;
 382     f->eqnext = f->eqprev = 0;
 383     f->state = ST_READY;
 384   }
 385
 386   /* --- Pass one: link the graph together --- *
 387    *
 388    * Once this pass is complete, the following properties will hold.
 389    *
 390    *   * The nodes which have the same @cur@ are linked together by their
 391    *     @eqnext@ and @eqprev@ fields into a doubly-linked circular list
 392    *     representing this equivalence class.
 393    *
 394    *   * @f->up == g@ if and only if @f->f->cur == g->f->want@.  (Note that
 395    *     @want@ fields are unique according to our interface.  We detect
 396    *     violations and exit with @errno == EINVAL@.)
 397    *
 398    *   * If @f->up == g@ then there exists a @ff@ in the same equivalence
 399    *     class (and therefore on @f@'s @eqnext@ list) as @f@ with
 400    *     @g->down == ff@.
 401    */
 402
 403   for (i = 0; i < n; i++) {
 404     f = &vv[i];
 405     if (!f->eqnext)
 406       f->eqnext = f->eqprev = f;
 407     for (j = 0; j < n; j++) {
 408       if (i == j)
 409         continue;
 410       g = &vv[j];
 411       if (f->f->cur == g->f->cur) {
 412         if (!g->eqnext) {
 413           g->eqnext = f->eqnext;
 414           g->eqprev = f;
 415           f->eqnext->eqprev = g;
 416           f->eqnext = g;
 417         }
 418       }
 419       if (g->f->want == -1)
 420         /* fine */;
 421       else if (f->f->want == g->f->want) {
 422         errno = EINVAL;
 423         goto fail;
 424       } else if (f->f->cur == g->f->want) {
 425         f->up = g;
 426         if (!g->down)
 427           g->down = f;
 428       }
 429     }
 430   }
 431
 432   /* --- Pass two: handle don't-care requests --- *
 433    *
 434    * By the end of this pass, we have the following properties.
 435    *
 436    *   * Every node will be marked @ST_DONE@.  This is a temporary abuse of
 437    *     the @ST_DONE@ state which will be rectified during the next pass.
 438    *
 439    *   * Every node with @want == -1@ will have @cur@ set to a freshly
 440    *     allocated file descriptor distinct from every previously open file.
 441    */
 442
 443   for (i = 0; i < n; i++) {
 444     f = &vv[i];
 445     switch (f->state) {
 446       case ST_DONE:
 447         break;
 448       case ST_READY:
 449         can_close_p = 1;
 450         DO_EQUIVS(g, f, {
 451           ff = g->f;
 452           ofd = ff->cur;
 453           if (ff->want != -1)
 454             can_close_p = 0;
 455           else {
 456             if ((fd = dup(ofd)) < 0)
 457               goto fail;
 458             ff->cur = fd;
 459           }
 460           g->state = ST_DONE;
 461         });
 462         if (can_close_p)
 463           close(ofd);
 464         break;
 465     }
 466   }
 467
 468   /* --- Pass three: restore equivalence classes and @down@ links --- *
 469    *
 470    * This pass re-establishes the properties from pass one.  Because we've
 471    * changed some @cur@ members, the equivalence classes will have changed,
 472    * so we must fix up the @eqnext@ lists and @down@ links.
 473    *
 474    * Nodes with @want == -1@ are now finished with (modulo tweaking
 475    * dynamically allocated descriptors as we process the others), so we leave
 476    * them in @ST_DONE@; other nodes are restored to @ST_READY@.
 477    */
 478
 479   for (i = 0; i < n; i++) {
 480     f = &vv[i];
 481     ff = f->f;
 482     if (ff->want == -1) {
 483       f->eqnext->eqprev = f->eqprev;
 484       f->eqprev->eqnext = f->eqnext;
 485       f->eqnext = f->eqprev = f;
 486       f->dlink = *dtail;
 487       *dtail = f;
 488     } else
 489       f->state = ST_READY;
 490   }
 491
 492   /* --- Pass four: main depth-first traversal --- *
 493    *
 494    * See the description of the function @dfs@ above.  After this pass, every
 495    * node is in state @ST_DONE@ or @ST_BROKEN@.
 496    */
 497
 498   for (i = 0; i < n; i++) {
 499     if (dfs(&vv[i], &dhead, &dtail))
 500       goto fail;
 501   }
 502
 503   /* --- Finished --- */
 504
 505   rc = 0;
 506 fail:
 507   free(vv);
 508   return (rc);
 509 }
 510
 511 /*----- That's all, folks -------------------------------------------------*/