chiark - git - mdw - disorder/blob - lib/event.c

   1 /*
   2  * This file is part of DisOrder.
   3  * Copyright (C) 2004, 2005, 2007, 2008 Richard Kettlewell
   4  *
   5  * This program is free software: you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation, either version 3 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17  */
  18 /** @file lib/event.c
  19  * @brief DisOrder event loop
  20  */
  21
  22 #include "common.h"
  23
  24 #include <unistd.h>
  25 #include <fcntl.h>
  26 #include <sys/time.h>
  27 #include <sys/types.h>
  28 #include <sys/resource.h>
  29 #include <sys/wait.h>
  30 #include <sys/stat.h>
  31 #include <unistd.h>
  32 #include <signal.h>
  33 #include <errno.h>
  34 #include <sys/socket.h>
  35 #include <netinet/in.h>
  36 #include <sys/un.h>
  37 #include "event.h"
  38 #include "mem.h"
  39 #include "log.h"
  40 #include "syscalls.h"
  41 #include "printf.h"
  42 #include "sink.h"
  43 #include "vector.h"
  44 #include "timeval.h"
  45 #include "heap.h"
  46
  47 /** @brief A timeout */
  48 struct timeout {
  49   struct timeout *next;
  50   struct timeval when;
  51   ev_timeout_callback *callback;
  52   void *u;
  53   int active;
  54 };
  55
  56 /** @brief Comparison function for timeouts */
  57 static int timeout_lt(const struct timeout *a,
  58                       const struct timeout *b) {
  59   return tvlt(&a->when, &b->when);
  60 }
  61
  62 HEAP_TYPE(timeout_heap, struct timeout *, timeout_lt);
  63 HEAP_DEFINE(timeout_heap, struct timeout *, timeout_lt);
  64
  65 /** @brief A file descriptor in one mode */
  66 struct fd {
  67   int fd;
  68   ev_fd_callback *callback;
  69   void *u;
  70   const char *what;
  71 };
  72
  73 /** @brief All the file descriptors in a given mode */
  74 struct fdmode {
  75   /** @brief Mask of active file descriptors passed to @c select() */
  76   fd_set enabled;
  77
  78   /** @brief File descriptor mask returned from @c select() */
  79   fd_set tripped;
  80
  81   /** @brief Number of file descriptors in @p fds */
  82   int nfds;
  83
  84   /** @brief Number of slots in @p fds */
  85   int fdslots;
  86
  87   /** @brief Array of all active file descriptors */
  88   struct fd *fds;
  89
  90   /** @brief Highest-numbered file descriptor or 0 */
  91   int maxfd;
  92 };
  93
  94 /** @brief A signal handler */
  95 struct signal {
  96   struct sigaction oldsa;
  97   ev_signal_callback *callback;
  98   void *u;
  99 };
 100
 101 /** @brief A child process */
 102 struct child {
 103   pid_t pid;
 104   int options;
 105   ev_child_callback *callback;
 106   void *u;
 107 };
 108
 109 /** @brief An event loop */
 110 struct ev_source {
 111   /** @brief File descriptors, per mode */
 112   struct fdmode mode[ev_nmodes];
 113
 114   /** @brief Heap of timeouts */
 115   struct timeout_heap timeouts[1];
 116
 117   /** @brief Array of handled signals */
 118   struct signal signals[NSIG];
 119
 120   /** @brief Mask of handled signals */
 121   sigset_t sigmask;
 122
 123   /** @brief Escape early from handling of @c select() results
 124    *
 125    * This is set if any of the file descriptor arrays are invalidated, since
 126    * it's then not safe for processing of them to continue.
 127    */
 128   int escape;
 129
 130   /** @brief Signal handling pipe
 131    *
 132    * The signal handle writes signal numbers down this pipe.
 133    */
 134   int sigpipe[2];
 135
 136   /** @brief Number of child processes in @p children */
 137   int nchildren;
 138
 139   /** @brief Number of slots in @p children */
 140   int nchildslots;
 141
 142   /** @brief Array of child processes */
 143   struct child *children;
 144 };
 145
 146 /** @brief Names of file descriptor modes */
 147 static const char *modenames[] = { "read", "write", "except" };
 148
 149 /* utilities ******************************************************************/
 150
 151 /* creation *******************************************************************/
 152
 153 /** @brief Create a new event loop */
 154 ev_source *ev_new(void) {
 155   ev_source *ev = xmalloc(sizeof *ev);
 156   int n;
 157
 158   memset(ev, 0, sizeof *ev);
 159   for(n = 0; n < ev_nmodes; ++n)
 160     FD_ZERO(&ev->mode[n].enabled);
 161   ev->sigpipe[0] = ev->sigpipe[1] = -1;
 162   sigemptyset(&ev->sigmask);
 163   timeout_heap_init(ev->timeouts);
 164   return ev;
 165 }
 166
 167 /* event loop *****************************************************************/
 168
 169 /** @brief Run the event loop
 170  * @return -1 on error, non-0 if any callback returned non-0
 171  */
 172 int ev_run(ev_source *ev) {
 173   for(;;) {
 174     struct timeval now;
 175     struct timeval delta;
 176     int n, mode;
 177     int ret;
 178     int maxfd;
 179     struct timeout *timeouts, *t, **tt;
 180     struct stat sb;
 181
 182     xgettimeofday(&now, 0);
 183     /* Handle timeouts.  We don't want to handle any timeouts that are added
 184      * while we're handling them (otherwise we'd have to break out of infinite
 185      * loops, preferrably without starving better-behaved subsystems).  Hence
 186      * the slightly complicated two-phase approach here. */
 187     /* First we read those timeouts that have triggered out of the heap.  We
 188      * keep them in the same order they came out of the heap in. */
 189     tt = &timeouts;
 190     while(timeout_heap_count(ev->timeouts)
 191           && tvle(&timeout_heap_first(ev->timeouts)->when, &now)) {
 192       /* This timeout has reached its trigger time; provided it has not been
 193        * cancelled we add it to the timeouts list. */
 194       t = timeout_heap_remove(ev->timeouts);
 195       if(t->active) {
 196         *tt = t;
 197         tt = &t->next;
 198       }
 199     }
 200     *tt = 0;
 201     /* Now we can run the callbacks for those timeouts.  They might add further
 202      * timeouts that are already in the past but they won't trigger until the
 203      * next time round the event loop. */
 204     for(t = timeouts; t; t = t->next) {
 205       D(("calling timeout for %ld.%ld callback %p %p",
 206          (long)t->when.tv_sec, (long)t->when.tv_usec,
 207          (void *)t->callback, t->u));
 208       ret = t->callback(ev, &now, t->u);
 209       if(ret)
 210         return ret;
 211     }
 212     maxfd = 0;
 213     for(mode = 0; mode < ev_nmodes; ++mode) {
 214       ev->mode[mode].tripped = ev->mode[mode].enabled;
 215       if(ev->mode[mode].maxfd > maxfd)
 216         maxfd = ev->mode[mode].maxfd;
 217     }
 218     xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0);
 219     do {
 220       if(timeout_heap_count(ev->timeouts)) {
 221         t = timeout_heap_first(ev->timeouts);
 222         xgettimeofday(&now, 0);
 223         delta.tv_sec = t->when.tv_sec - now.tv_sec;
 224         delta.tv_usec = t->when.tv_usec - now.tv_usec;
 225         if(delta.tv_usec < 0) {
 226           delta.tv_usec += 1000000;
 227           --delta.tv_sec;
 228         }
 229         if(delta.tv_sec < 0)
 230           delta.tv_sec = delta.tv_usec = 0;
 231         n = select(maxfd + 1,
 232                    &ev->mode[ev_read].tripped,
 233                    &ev->mode[ev_write].tripped,
 234                    &ev->mode[ev_except].tripped,
 235                    &delta);
 236       } else {
 237         n = select(maxfd + 1,
 238                    &ev->mode[ev_read].tripped,
 239                    &ev->mode[ev_write].tripped,
 240                    &ev->mode[ev_except].tripped,
 241                    0);
 242       }
 243     } while(n < 0 && errno == EINTR);
 244     xsigprocmask(SIG_BLOCK, &ev->sigmask, 0);
 245     if(n < 0) {
 246       disorder_error(errno, "error calling select");
 247       if(errno == EBADF) {
 248         /* If there's a bad FD in the mix then check them all and log what we
 249          * find, to ease debugging */
 250         for(mode = 0; mode < ev_nmodes; ++mode) {
 251           for(n = 0; n < ev->mode[mode].nfds; ++n) {
 252             const int fd = ev->mode[mode].fds[n].fd;
 253
 254             if(FD_ISSET(fd, &ev->mode[mode].enabled)
 255                && fstat(fd, &sb) < 0)
 256               disorder_error(errno, "mode %s fstat %d (%s)",
 257                              modenames[mode], fd, ev->mode[mode].fds[n].what);
 258           }
 259           for(n = 0; n <= maxfd; ++n)
 260             if(FD_ISSET(n, &ev->mode[mode].enabled)
 261                && fstat(n, &sb) < 0)
 262               disorder_error(errno, "mode %s fstat %d", modenames[mode], n);
 263         }
 264       }
 265       return -1;
 266     }
 267     if(n > 0) {
 268       /* if anything deranges the meaning of an fd, or re-orders the
 269        * fds[] tables, we'd better give up; such operations will
 270        * therefore set @escape@. */
 271       ev->escape = 0;
 272       for(mode = 0; mode < ev_nmodes && !ev->escape; ++mode)
 273         for(n = 0; n < ev->mode[mode].nfds && !ev->escape; ++n) {
 274           int fd = ev->mode[mode].fds[n].fd;
 275           if(FD_ISSET(fd, &ev->mode[mode].tripped)) {
 276             D(("calling %s fd %d callback %p %p", modenames[mode], fd,
 277                (void *)ev->mode[mode].fds[n].callback,
 278                ev->mode[mode].fds[n].u));
 279             ret = ev->mode[mode].fds[n].callback(ev, fd,
 280                                                  ev->mode[mode].fds[n].u);
 281             if(ret)
 282               return ret;
 283           }
 284         }
 285     }
 286     /* we'll pick up timeouts back round the loop */
 287   }
 288 }
 289
 290 /* file descriptors ***********************************************************/
 291
 292 /** @brief Register a file descriptor
 293  * @param ev Event loop
 294  * @param mode @c ev_read or @c ev_write
 295  * @param fd File descriptor
 296  * @param callback Called when @p is readable/writable
 297  * @param u Passed to @p callback
 298  * @param what Text description
 299  * @return 0 on success, non-0 on error
 300  *
 301  * Sets @ref ev_source::escape, so no further processing of file descriptors
 302  * will occur this time round the event loop.
 303  */
 304 int ev_fd(ev_source *ev,
 305           ev_fdmode mode,
 306           int fd,
 307           ev_fd_callback *callback,
 308           void *u,
 309           const char *what) {
 310   int n;
 311
 312   D(("registering %s fd %d callback %p %p", modenames[mode], fd,
 313      (void *)callback, u));
 314   /* FreeBSD defines FD_SETSIZE as 1024u for some reason */
 315   if((unsigned)fd >= FD_SETSIZE)
 316     return -1;
 317   assert(mode < ev_nmodes);
 318   if(ev->mode[mode].nfds >= ev->mode[mode].fdslots) {
 319     ev->mode[mode].fdslots = (ev->mode[mode].fdslots
 320                                ? 2 * ev->mode[mode].fdslots : 16);
 321     D(("expanding %s fd table to %d entries", modenames[mode],
 322        ev->mode[mode].fdslots));
 323     ev->mode[mode].fds = xrealloc(ev->mode[mode].fds,
 324                                   ev->mode[mode].fdslots * sizeof (struct fd));
 325   }
 326   n = ev->mode[mode].nfds++;
 327   FD_SET(fd, &ev->mode[mode].enabled);
 328   ev->mode[mode].fds[n].fd = fd;
 329   ev->mode[mode].fds[n].callback = callback;
 330   ev->mode[mode].fds[n].u = u;
 331   ev->mode[mode].fds[n].what = what;
 332   if(fd > ev->mode[mode].maxfd)
 333     ev->mode[mode].maxfd = fd;
 334   ev->escape = 1;
 335   return 0;
 336 }
 337
 338 /** @brief Cancel a file descriptor
 339  * @param ev Event loop
 340  * @param mode @c ev_read or @c ev_write
 341  * @param fd File descriptor
 342  * @return 0 on success, non-0 on error
 343  *
 344  * Sets @ref ev_source::escape, so no further processing of file descriptors
 345  * will occur this time round the event loop.
 346  */
 347 int ev_fd_cancel(ev_source *ev, ev_fdmode mode, int fd) {
 348   int n;
 349   int maxfd;
 350
 351   D(("cancelling mode %s fd %d", modenames[mode], fd));
 352   /* find the right struct fd */
 353   for(n = 0; n < ev->mode[mode].nfds && fd != ev->mode[mode].fds[n].fd; ++n)
 354     ;
 355   assert(n < ev->mode[mode].nfds);
 356   /* swap in the last fd and reduce the count */
 357   if(n != ev->mode[mode].nfds - 1)
 358     ev->mode[mode].fds[n] = ev->mode[mode].fds[ev->mode[mode].nfds - 1];
 359   --ev->mode[mode].nfds;
 360   /* if that was the biggest fd, find the new biggest one */
 361   if(fd == ev->mode[mode].maxfd) {
 362     maxfd = 0;
 363     for(n = 0; n < ev->mode[mode].nfds; ++n)
 364       if(ev->mode[mode].fds[n].fd > maxfd)
 365         maxfd = ev->mode[mode].fds[n].fd;
 366     ev->mode[mode].maxfd = maxfd;
 367   }
 368   /* don't tell select about this fd any more */
 369   FD_CLR(fd, &ev->mode[mode].enabled);
 370   ev->escape = 1;
 371   return 0;
 372 }
 373
 374 /** @brief Re-enable a file descriptor
 375  * @param ev Event loop
 376  * @param mode @c ev_read or @c ev_write
 377  * @param fd File descriptor
 378  * @return 0 on success, non-0 on error
 379  *
 380  * It is harmless if @p fd is currently disabled, but it must not have been
 381  * cancelled.
 382  */
 383 int ev_fd_enable(ev_source *ev, ev_fdmode mode, int fd) {
 384   assert(fd >= 0);
 385   D(("enabling mode %s fd %d", modenames[mode], fd));
 386   FD_SET(fd, &ev->mode[mode].enabled);
 387   return 0;
 388 }
 389
 390 /** @brief Temporarily disable a file descriptor
 391  * @param ev Event loop
 392  * @param mode @c ev_read or @c ev_write
 393  * @param fd File descriptor
 394  * @return 0 on success, non-0 on error
 395  *
 396  * Re-enable with ev_fd_enable().  It is harmless if @p fd is already disabled,
 397  * but it must not have been cancelled.
 398  */
 399 int ev_fd_disable(ev_source *ev, ev_fdmode mode, int fd) {
 400   D(("disabling mode %s fd %d", modenames[mode], fd));
 401   FD_CLR(fd, &ev->mode[mode].enabled);
 402   FD_CLR(fd, &ev->mode[mode].tripped);
 403   /* Suppress any pending callbacks */
 404   ev->escape = 1;
 405   return 0;
 406 }
 407
 408 /** @brief Log a report of file descriptor state */
 409 void ev_report(ev_source *ev) {
 410   int n, fd;
 411   ev_fdmode mode;
 412   struct dynstr d[1];
 413   char b[4096];
 414
 415   if(!debugging)
 416     return;
 417   dynstr_init(d);
 418   for(mode = 0; mode < ev_nmodes; ++mode) {
 419     D(("mode %s maxfd %d", modenames[mode], ev->mode[mode].maxfd));
 420     for(n = 0; n < ev->mode[mode].nfds; ++n) {
 421       fd = ev->mode[mode].fds[n].fd;
 422       D(("fd %s %d%s%s (%s)", modenames[mode], fd,
 423          FD_ISSET(fd, &ev->mode[mode].enabled) ? " enabled" : "",
 424          FD_ISSET(fd, &ev->mode[mode].tripped) ? " tripped" : "",
 425          ev->mode[mode].fds[n].what));
 426     }
 427     d->nvec = 0;
 428     for(fd = 0; fd <= ev->mode[mode].maxfd; ++fd) {
 429       if(!FD_ISSET(fd, &ev->mode[mode].enabled))
 430         continue;
 431       for(n = 0; n < ev->mode[mode].nfds; ++n) {
 432         if(ev->mode[mode].fds[n].fd == fd)
 433           break;
 434       }
 435       if(n < ev->mode[mode].nfds)
 436         snprintf(b, sizeof b, "%d(%s)", fd, ev->mode[mode].fds[n].what);
 437       else
 438         snprintf(b, sizeof b, "%d", fd);
 439       dynstr_append(d, ' ');
 440       dynstr_append_string(d, b);
 441     }
 442     dynstr_terminate(d);
 443     D(("%s enabled:%s", modenames[mode], d->vec));
 444   }
 445 }
 446
 447 /* timeouts *******************************************************************/
 448
 449 /** @brief Register a timeout
 450  * @param ev Event source
 451  * @param handlep Where to store timeout handle, or @c NULL
 452  * @param when Earliest time to call @p callback, or @c NULL
 453  * @param callback Function to call at or after @p when
 454  * @param u Passed to @p callback
 455  * @return 0 on success, non-0 on error
 456  *
 457  * If @p when is a null pointer then a time of 0 is assumed.  The effect is to
 458  * call the timeout handler from ev_run() next time around the event loop.
 459  * This is used internally to schedule various operations if it is not
 460  * convenient to call them from the current place in the call stack, or
 461  * externally to ensure that other clients of the event loop get a look in when
 462  * performing some lengthy operation.
 463  */
 464 int ev_timeout(ev_source *ev,
 465                ev_timeout_handle *handlep,
 466                const struct timeval *when,
 467                ev_timeout_callback *callback,
 468                void *u) {
 469   struct timeout *t;
 470
 471   D(("registering timeout at %ld.%ld callback %p %p",
 472      when ? (long)when->tv_sec : 0, when ? (long)when->tv_usec : 0,
 473      (void *)callback, u));
 474   t = xmalloc(sizeof *t);
 475   if(when)
 476     t->when = *when;
 477   t->callback = callback;
 478   t->u = u;
 479   t->active = 1;
 480   timeout_heap_insert(ev->timeouts, t);
 481   if(handlep)
 482     *handlep = t;
 483   return 0;
 484 }
 485
 486 /** @brief Cancel a timeout
 487  * @param ev Event loop
 488  * @param handle Handle returned from ev_timeout(), or 0
 489  * @return 0 on success, non-0 on error
 490  *
 491  * If @p handle is 0 then this is a no-op.
 492  */
 493 int ev_timeout_cancel(ev_source attribute((unused)) *ev,
 494                       ev_timeout_handle handle) {
 495   struct timeout *t = handle;
 496
 497   if(t)
 498     t->active = 0;
 499   return 0;
 500 }
 501
 502 /* signals ********************************************************************/
 503
 504 /** @brief Mapping of signals to pipe write ends
 505  *
 506  * The pipes are per-event loop, it's possible in theory for there to be
 507  * multiple event loops (e.g. in different threads), although in fact DisOrder
 508  * does not do this.
 509  */
 510 static int sigfd[NSIG];
 511
 512 /** @brief The signal handler
 513  * @param s Signal number
 514  *
 515  * Writes to @c sigfd[s].
 516  */
 517 static void sighandler(int s) {
 518   unsigned char sc = s;
 519   static const char errmsg[] = "error writing to signal pipe";
 520
 521   /* probably the reader has stopped listening for some reason */
 522   if(write(sigfd[s], &sc, 1) < 0) {
 523         /* do the best we can as we're about to abort; shut _up_, gcc */
 524         int _ignore = write(2, errmsg, sizeof errmsg - 1);
 525         (void)_ignore;
 526     abort();
 527   }
 528 }
 529
 530 /** @brief Read callback for signals */
 531 static int signal_read(ev_source *ev,
 532                        int attribute((unused)) fd,
 533                        void attribute((unused)) *u) {
 534   unsigned char s;
 535   int n;
 536   int ret;
 537
 538   if((n = read(ev->sigpipe[0], &s, 1)) == 1)
 539     if((ret = ev->signals[s].callback(ev, s, ev->signals[s].u)))
 540       return ret;
 541   assert(n != 0);
 542   if(n < 0 && (errno != EINTR && errno != EAGAIN)) {
 543     disorder_error(errno, "error reading from signal pipe %d", ev->sigpipe[0]);
 544     return -1;
 545   }
 546   return 0;
 547 }
 548
 549 /** @brief Close the signal pipe */
 550 static void close_sigpipe(ev_source *ev) {
 551   int save_errno = errno;
 552
 553   xclose(ev->sigpipe[0]);
 554   xclose(ev->sigpipe[1]);
 555   ev->sigpipe[0] = ev->sigpipe[1] = -1;
 556   errno = save_errno;
 557 }
 558
 559 /** @brief Register a signal handler
 560  * @param ev Event loop
 561  * @param sig Signal to handle
 562  * @param callback Called when signal is delivered
 563  * @param u Passed to @p callback
 564  * @return 0 on success, non-0 on error
 565  *
 566  * Note that @p callback is called from inside ev_run(), not from inside the
 567  * signal handler, so the usual restrictions on signal handlers do not apply.
 568  */
 569 int ev_signal(ev_source *ev,
 570               int sig,
 571               ev_signal_callback *callback,
 572               void *u) {
 573   int n;
 574   struct sigaction sa;
 575
 576   D(("registering signal %d handler callback %p %p", sig, (void *)callback, u));
 577   assert(sig > 0);
 578   assert(sig < NSIG);
 579   assert(sig <= UCHAR_MAX);
 580   if(ev->sigpipe[0] == -1) {
 581     D(("creating signal pipe"));
 582     xpipe(ev->sigpipe);
 583     D(("signal pipe is %d, %d", ev->sigpipe[0], ev->sigpipe[1]));
 584     for(n = 0; n < 2; ++n) {
 585       nonblock(ev->sigpipe[n]);
 586       cloexec(ev->sigpipe[n]);
 587     }
 588     if(ev_fd(ev, ev_read, ev->sigpipe[0], signal_read, 0, "sigpipe read")) {
 589       close_sigpipe(ev);
 590       return -1;
 591     }
 592   }
 593   sigaddset(&ev->sigmask, sig);
 594   xsigprocmask(SIG_BLOCK, &ev->sigmask, 0);
 595   sigfd[sig] = ev->sigpipe[1];
 596   ev->signals[sig].callback = callback;
 597   ev->signals[sig].u = u;
 598   sa.sa_handler = sighandler;
 599   sigfillset(&sa.sa_mask);
 600   sa.sa_flags = SA_RESTART;
 601   xsigaction(sig, &sa, &ev->signals[sig].oldsa);
 602   ev->escape = 1;
 603   return 0;
 604 }
 605
 606 /** @brief Cancel a signal handler
 607  * @param ev Event loop
 608  * @param sig Signal to cancel
 609  * @return 0 on success, non-0 on error
 610  */
 611 int ev_signal_cancel(ev_source *ev,
 612                      int sig) {
 613   sigset_t ss;
 614
 615   xsigaction(sig, &ev->signals[sig].oldsa, 0);
 616   ev->signals[sig].callback = 0;
 617   ev->escape = 1;
 618   sigdelset(&ev->sigmask, sig);
 619   sigemptyset(&ss);
 620   sigaddset(&ss, sig);
 621   xsigprocmask(SIG_UNBLOCK, &ss, 0);
 622   return 0;
 623 }
 624
 625 /** @brief Clean up signal handling
 626  * @param ev Event loop
 627  *
 628  * This function can be called from inside a fork.  It restores signal
 629  * handlers, unblocks the signals, and closes the signal pipe for @p ev.
 630  */
 631 void ev_signal_atfork(ev_source *ev) {
 632   int sig;
 633
 634   if(ev->sigpipe[0] != -1) {
 635     /* revert any handled signals to their original state */
 636     for(sig = 1; sig < NSIG; ++sig) {
 637       if(ev->signals[sig].callback != 0)
 638         xsigaction(sig, &ev->signals[sig].oldsa, 0);
 639     }
 640     /* and then unblock them */
 641     xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0);
 642     /* don't want a copy of the signal pipe open inside the fork */
 643     xclose(ev->sigpipe[0]);
 644     xclose(ev->sigpipe[1]);
 645   }
 646 }
 647
 648 /* child processes ************************************************************/
 649
 650 /** @brief Called on SIGCHLD */
 651 static int sigchld_callback(ev_source *ev,
 652                             int attribute((unused)) sig,
 653                             void attribute((unused)) *u) {
 654   struct rusage ru;
 655   pid_t r;
 656   int status, n, ret, revisit;
 657
 658   do {
 659     revisit = 0;
 660     for(n = 0; n < ev->nchildren; ++n) {
 661       r = wait4(ev->children[n].pid,
 662                 &status,
 663                 ev->children[n].options | WNOHANG,
 664                 &ru);
 665       if(r > 0) {
 666         ev_child_callback *c = ev->children[n].callback;
 667         void *cu = ev->children[n].u;
 668
 669         if(WIFEXITED(status) || WIFSIGNALED(status))
 670           ev_child_cancel(ev, r);
 671         revisit = 1;
 672         if((ret = c(ev, r, status, &ru, cu)))
 673           return ret;
 674       } else if(r < 0) {
 675         /* We should "never" get an ECHILD but it can in fact happen.  For
 676          * instance on Linux 2.4.31, and probably other versions, if someone
 677          * straces a child process and then a different child process
 678          * terminates, when we wait4() the trace process we will get ECHILD
 679          * because it has been reparented to strace.  Obviously this is a
 680          * hopeless design flaw in the tracing infrastructure, but we don't
 681          * want the disorder server to bomb out because of it.  So we just log
 682          * the problem and ignore it.
 683          */
 684         disorder_error(errno, "error calling wait4 for PID %lu (broken ptrace?)",
 685                        (unsigned long)ev->children[n].pid);
 686         if(errno != ECHILD)
 687           return -1;
 688       }
 689     }
 690   } while(revisit);
 691   return 0;
 692 }
 693
 694 /** @brief Configure event loop for child process handling
 695  * @return 0 on success, non-0 on error
 696  *
 697  * Currently at most one event loop can handle child processes and it must be
 698  * distinguished from others by calling this function on it.  This could be
 699  * fixed but since no process ever makes use of more than one event loop there
 700  * is no need.
 701  */
 702 int ev_child_setup(ev_source *ev) {
 703   D(("installing SIGCHLD handler"));
 704   return ev_signal(ev, SIGCHLD, sigchld_callback, 0);
 705 }
 706
 707 /** @brief Wait for a child process to terminate
 708  * @param ev Event loop
 709  * @param pid Process ID of child
 710  * @param options Options to pass to @c wait4()
 711  * @param callback Called when child terminates (or possibly when it stops)
 712  * @param u Passed to @p callback
 713  * @return 0 on success, non-0 on error
 714  *
 715  * You must have called ev_child_setup() on @p ev once first.
 716  */
 717 int ev_child(ev_source *ev,
 718              pid_t pid,
 719              int options,
 720              ev_child_callback *callback,
 721              void *u) {
 722   int n;
 723
 724   D(("registering child handling %ld options %d callback %p %p",
 725      (long)pid, options, (void *)callback, u));
 726   assert(ev->signals[SIGCHLD].callback == sigchld_callback);
 727   if(ev->nchildren >= ev->nchildslots) {
 728     ev->nchildslots = ev->nchildslots ? 2 * ev->nchildslots : 16;
 729     ev->children = xrealloc(ev->children,
 730                             ev->nchildslots * sizeof (struct child));
 731   }
 732   n = ev->nchildren++;
 733   ev->children[n].pid = pid;
 734   ev->children[n].options = options;
 735   ev->children[n].callback = callback;
 736   ev->children[n].u = u;
 737   return 0;
 738 }
 739
 740 /** @brief Stop waiting for a child process
 741  * @param ev Event loop
 742  * @param pid Child process ID
 743  * @return 0 on success, non-0 on error
 744  */
 745 int ev_child_cancel(ev_source *ev,
 746                     pid_t pid) {
 747   int n;
 748
 749   for(n = 0; n < ev->nchildren && ev->children[n].pid != pid; ++n)
 750     ;
 751   assert(n < ev->nchildren);
 752   if(n != ev->nchildren - 1)
 753     ev->children[n] = ev->children[ev->nchildren - 1];
 754   --ev->nchildren;
 755   return 0;
 756 }
 757
 758 /** @brief Terminate and wait for all child processes
 759  * @param ev Event loop
 760  *
 761  * Does *not* call the completion callbacks.  Only used during teardown.
 762  */
 763 void ev_child_killall(ev_source *ev) {
 764   int n, rc, w;
 765
 766   for(n = 0; n < ev->nchildren; ++n) {
 767     if(kill(ev->children[n].pid, SIGTERM) < 0) {
 768       disorder_error(errno, "sending SIGTERM to pid %lu",
 769                      (unsigned long)ev->children[n].pid);
 770       ev->children[n].pid = -1;
 771     }
 772   }
 773   for(n = 0; n < ev->nchildren; ++n) {
 774     if(ev->children[n].pid == -1)
 775       continue;
 776     do {
 777       rc = waitpid(ev->children[n].pid, &w, 0);
 778     } while(rc < 0 && errno == EINTR);
 779     if(rc < 0) {
 780       disorder_error(errno, "waiting for pid %lu",
 781                      (unsigned long)ev->children[n].pid);
 782       continue;
 783     }
 784   }
 785   ev->nchildren = 0;
 786 }
 787
 788 /* socket listeners ***********************************************************/
 789
 790 /** @brief State for a socket listener */
 791 struct listen_state {
 792   ev_listen_callback *callback;
 793   void *u;
 794 };
 795
 796 /** @brief Called when a listenign socket is readable */
 797 static int listen_callback(ev_source *ev, int fd, void *u) {
 798   const struct listen_state *l = u;
 799   int newfd;
 800   union {
 801     struct sockaddr_in in;
 802 #if HAVE_STRUCT_SOCKADDR_IN6
 803     struct sockaddr_in6 in6;
 804 #endif
 805     struct sockaddr_un un;
 806     struct sockaddr sa;
 807   } addr;
 808   socklen_t addrlen;
 809   int ret;
 810
 811   D(("callback for listener fd %d", fd));
 812   while((addrlen = sizeof addr),
 813         (newfd = accept(fd, &addr.sa, &addrlen)) >= 0) {
 814     if((ret = l->callback(ev, newfd, &addr.sa, addrlen, l->u)))
 815       return ret;
 816   }
 817   switch(errno) {
 818   case EINTR:
 819   case EAGAIN:
 820     break;
 821 #ifdef ECONNABORTED
 822   case ECONNABORTED:
 823     disorder_error(errno, "error calling accept");
 824     break;
 825 #endif
 826 #ifdef EPROTO
 827   case EPROTO:
 828     /* XXX on some systems EPROTO should be fatal, but we don't know if
 829      * we're running on one of them */
 830     disorder_error(errno, "error calling accept");
 831     break;
 832 #endif
 833   default:
 834     disorder_fatal(errno, "error calling accept");
 835     break;
 836   }
 837   if(errno != EINTR && errno != EAGAIN)
 838     disorder_error(errno, "error calling accept");
 839   return 0;
 840 }
 841
 842 /** @brief Listen on a socket for inbound stream connections
 843  * @param ev Event source
 844  * @param fd File descriptor of socket
 845  * @param callback Called when a new connection arrives
 846  * @param u Passed to @p callback
 847  * @param what Text description of socket
 848  * @return 0 on success, non-0 on error
 849  */
 850 int ev_listen(ev_source *ev,
 851               int fd,
 852               ev_listen_callback *callback,
 853               void *u,
 854               const char *what) {
 855   struct listen_state *l = xmalloc(sizeof *l);
 856
 857   D(("registering listener fd %d callback %p %p", fd, (void *)callback, u));
 858   l->callback = callback;
 859   l->u = u;
 860   return ev_fd(ev, ev_read, fd, listen_callback, l, what);
 861 }
 862
 863 /** @brief Stop listening on a socket
 864  * @param ev Event loop
 865  * @param fd File descriptor of socket
 866  * @return 0 on success, non-0 on error
 867  */
 868 int ev_listen_cancel(ev_source *ev, int fd) {
 869   D(("cancelling listener fd %d", fd));
 870   return ev_fd_cancel(ev, ev_read, fd);
 871 }
 872
 873 /* buffer *********************************************************************/
 874
 875 /** @brief Buffer structure */
 876 struct buffer {
 877   char *base, *start, *end, *top;
 878 };
 879
 880 /* @brief Make sure there is @p bytes available at @c b->end */
 881 static void buffer_space(struct buffer *b, size_t bytes) {
 882   D(("buffer_space %p %p %p %p want %lu",
 883      (void *)b->base, (void *)b->start, (void *)b->end, (void *)b->top,
 884      (unsigned long)bytes));
 885   if(b->start == b->end)
 886     b->start = b->end = b->base;
 887   if((size_t)(b->top - b->end) < bytes) {
 888     if((size_t)((b->top - b->end) + (b->start - b->base)) < bytes) {
 889       size_t newspace = b->end - b->start + bytes, n;
 890       char *newbase;
 891
 892       for(n = 16; n < newspace; n *= 2)
 893         ;
 894       newbase = xmalloc_noptr(n);
 895       memcpy(newbase, b->start, b->end - b->start);
 896       b->base = newbase;
 897       b->end = newbase + (b->end - b->start);
 898       b->top = newbase + n;
 899       b->start = newbase;               /* must be last */
 900     } else {
 901       memmove(b->base, b->start, b->end - b->start);
 902       b->end = b->base + (b->end - b->start);
 903       b->start = b->base;
 904     }
 905   }
 906   D(("result %p %p %p %p",
 907      (void *)b->base, (void *)b->start, (void *)b->end, (void *)b->top));
 908 }
 909
 910 /* readers and writers *******************************************************/
 911
 912 /** @brief State structure for a buffered writer */
 913 struct ev_writer {
 914   /** @brief Sink used for writing to the buffer */
 915   struct sink s;
 916
 917   /** @brief Output buffer */
 918   struct buffer b;
 919
 920   /** @brief File descriptor to write to */
 921   int fd;
 922
 923   /** @brief Set if there'll be no more output */
 924   int eof;
 925
 926   /** @brief Error/termination callback */
 927   ev_error_callback *callback;
 928
 929   /** @brief Passed to @p callback */
 930   void *u;
 931
 932   /** @brief Parent event source */
 933   ev_source *ev;
 934
 935   /** @brief Maximum amount of time between succesful writes, 0 = don't care */
 936   int timebound;
 937   /** @brief Maximum amount of data to buffer, 0 = don't care */
 938   int spacebound;
 939   /** @brief Error code to pass to @p callback (see writer_shutdown()) */
 940   int error;
 941   /** @brief Timeout handle for @p timebound (or 0) */
 942   ev_timeout_handle timeout;
 943
 944   /** @brief Description of this writer */
 945   const char *what;
 946
 947   /** @brief Tied reader or 0 */
 948   ev_reader *reader;
 949
 950   /** @brief Set when abandoned */
 951   int abandoned;
 952 };
 953
 954 /** @brief State structure for a buffered reader */
 955 struct ev_reader {
 956   /** @brief Input buffer */
 957   struct buffer b;
 958   /** @brief File descriptor read from */
 959   int fd;
 960   /** @brief Called when new data is available */
 961   ev_reader_callback *callback;
 962   /** @brief Called on error and shutdown */
 963   ev_error_callback *error_callback;
 964   /** @brief Passed to @p callback and @p error_callback */
 965   void *u;
 966   /** @brief Parent event loop */
 967   ev_source *ev;
 968   /** @brief Set when EOF is detected */
 969   int eof;
 970   /** @brief Error code to pass to error callback */
 971   int error;
 972   /** @brief Tied writer or NULL */
 973   ev_writer *writer;
 974 };
 975
 976 /* buffered writer ************************************************************/
 977
 978 /** @brief Shut down the writer
 979  *
 980  * This is called to shut down a writer.  The error callback is not called
 981  * through any other path.  Also we do not cancel @p fd from anywhere else,
 982  * though we might disable it.
 983  *
 984  * It has the signature of a timeout callback so that it can be called from a
 985  * time=0 timeout.
 986  *
 987  * Calls @p callback with @p w->syntherr as the error code (which might be 0).
 988  */
 989 static int writer_shutdown(ev_source *ev,
 990                            const attribute((unused)) struct timeval *now,
 991                            void *u) {
 992   ev_writer *w = u;
 993
 994   if(w->fd == -1)
 995     return 0;                           /* already shut down */
 996   D(("writer_shutdown fd=%d error=%d", w->fd, w->error));
 997   ev_timeout_cancel(ev, w->timeout);
 998   ev_fd_cancel(ev, ev_write, w->fd);
 999   w->timeout = 0;
1000   if(w->reader) {
1001     D(("found a tied reader"));
1002     /* If there is a reader still around we just untie it */
1003     w->reader->writer = 0;
1004     shutdown(w->fd, SHUT_WR);           /* there'll be no more writes */
1005   } else {
1006     D(("no tied reader"));
1007     /* There's no reader so we are free to close the FD */
1008     xclose(w->fd);
1009   }
1010   w->fd = -1;
1011   return w->callback(ev, w->error, w->u);
1012 }
1013
1014 /** @brief Called when a writer's @p timebound expires */
1015 static int writer_timebound_exceeded(ev_source *ev,
1016                                      const struct timeval *now,
1017                                      void *u) {
1018   ev_writer *const w = u;
1019
1020   if(!w->abandoned) {
1021     w->abandoned = 1;
1022     disorder_error(0, "abandoning writer '%s' because no writes within %ds",
1023                    w->what, w->timebound);
1024     w->error = ETIMEDOUT;
1025   }
1026   return writer_shutdown(ev, now, u);
1027 }
1028
1029 /** @brief Set the time bound callback (if not set already) */
1030 static void writer_set_timebound(ev_writer *w) {
1031   if(w->timebound && !w->timeout) {
1032     struct timeval when;
1033     ev_source *const ev = w->ev;
1034
1035     xgettimeofday(&when, 0);
1036     when.tv_sec += w->timebound;
1037     ev_timeout(ev, &w->timeout, &when, writer_timebound_exceeded, w);
1038   }
1039 }
1040
1041 /** @brief Called when a writer's file descriptor is writable */
1042 static int writer_callback(ev_source *ev, int fd, void *u) {
1043   ev_writer *const w = u;
1044   int n;
1045
1046   n = write(fd, w->b.start, w->b.end - w->b.start);
1047   D(("callback for writer fd %d, %ld bytes, n=%d, errno=%d",
1048      fd, (long)(w->b.end - w->b.start), n, errno));
1049   if(n >= 0) {
1050     /* Consume bytes from the buffer */
1051     w->b.start += n;
1052     /* Suppress any outstanding timeout */
1053     ev_timeout_cancel(ev, w->timeout);
1054     w->timeout = 0;
1055     if(w->b.start == w->b.end) {
1056       /* The buffer is empty */
1057       if(w->eof) {
1058         /* We're done, we can shut down this writer */
1059         w->error = 0;
1060         return writer_shutdown(ev, 0, w);
1061       } else
1062         /* There might be more to come but we don't need writer_callback() to
1063          * be called for the time being */
1064         ev_fd_disable(ev, ev_write, fd);
1065     } else
1066       /* The buffer isn't empty, set a timeout so we give up if we don't manage
1067        * to write some more within a reasonable time */
1068       writer_set_timebound(w);
1069   } else {
1070     switch(errno) {
1071     case EINTR:
1072     case EAGAIN:
1073       break;
1074     default:
1075       w->error = errno;
1076       return writer_shutdown(ev, 0, w);
1077     }
1078   }
1079   return 0;
1080 }
1081
1082 /** @brief Write bytes to a writer's buffer
1083  *
1084  * This is the sink write callback.
1085  *
1086  * Calls ev_fd_enable() if necessary (i.e. if the buffer was empty but
1087  * now is not).
1088  */
1089 static int ev_writer_write(struct sink *sk, const void *s, int n) {
1090   ev_writer *w = (ev_writer *)sk;
1091
1092   if(!n)
1093     return 0;                           /* avoid silliness */
1094   if(w->fd == -1)
1095     disorder_error(0, "ev_writer_write on %s after shutdown", w->what);
1096   if(w->spacebound && w->b.end - w->b.start + n > w->spacebound) {
1097     /* The new buffer contents will exceed the space bound.  We assume that the
1098      * remote client has gone away and TCP hasn't noticed yet, or that it's got
1099      * hopelessly stuck. */
1100     if(!w->abandoned) {
1101       w->abandoned = 1;
1102       disorder_error(0, "abandoning writer '%s' because buffer has reached %td bytes",
1103                      w->what, w->b.end - w->b.start);
1104       ev_fd_disable(w->ev, ev_write, w->fd);
1105       w->error = EPIPE;
1106       return ev_timeout(w->ev, 0, 0, writer_shutdown, w);
1107     } else
1108       return 0;
1109   }
1110   /* Make sure there is space */
1111   buffer_space(&w->b, n);
1112   /* If the buffer was formerly empty then we'll need to re-enable the FD */
1113   if(w->b.start == w->b.end)
1114     ev_fd_enable(w->ev, ev_write, w->fd);
1115   memcpy(w->b.end, s, n);
1116   w->b.end += n;
1117   /* Arrange a timeout if there wasn't one set already */
1118   writer_set_timebound(w);
1119   return 0;
1120 }
1121
1122 /** @brief Create a new buffered writer
1123  * @param ev Event loop
1124  * @param fd File descriptor to write to
1125  * @param callback Called if an error occurs and when finished
1126  * @param u Passed to @p callback
1127  * @param what Text description
1128  * @return New writer or @c NULL
1129  *
1130  * Writers own their file descriptor and close it when they have finished with
1131  * it.
1132  *
1133  * If you pass the same fd to a reader and writer, you must tie them together
1134  * with ev_tie().
1135  */
1136 ev_writer *ev_writer_new(ev_source *ev,
1137                          int fd,
1138                          ev_error_callback *callback,
1139                          void *u,
1140                          const char *what) {
1141   ev_writer *w = xmalloc(sizeof *w);
1142
1143   D(("registering writer fd %d callback %p %p", fd, (void *)callback, u));
1144   w->s.write = ev_writer_write;
1145   w->fd = fd;
1146   w->callback = callback;
1147   w->u = u;
1148   w->ev = ev;
1149   w->timebound = 10 * 60;
1150   w->spacebound = 512 * 1024;
1151   w->what = what;
1152   if(ev_fd(ev, ev_write, fd, writer_callback, w, what))
1153     return 0;
1154   /* Buffer is initially empty so we don't want a callback */
1155   ev_fd_disable(ev, ev_write, fd);
1156   return w;
1157 }
1158
1159 /** @brief Get/set the time bound
1160  * @param w Writer
1161  * @param new_time_bound New bound or -1 for no change
1162  * @return Latest time bound
1163  *
1164  * If @p new_time_bound is negative then the current time bound is returned.
1165  * Otherwise it is set and the new value returned.
1166  *
1167  * The time bound is the number of seconds allowed between writes.  If it takes
1168  * longer than this to flush a buffer then the peer will be assumed to be dead
1169  * and an error will be synthesized.  0 means "don't care".  The default time
1170  * bound is 10 minutes.
1171  *
1172  * Note that this value does not take into account kernel buffering and
1173  * timeouts.
1174  */
1175 int ev_writer_time_bound(ev_writer *w,
1176                          int new_time_bound) {
1177   if(new_time_bound >= 0)
1178     w->timebound = new_time_bound;
1179   return w->timebound;
1180 }
1181
1182 /** @brief Get/set the space bound
1183  * @param w Writer
1184  * @param new_space_bound New bound or -1 for no change
1185  * @return Latest space bound
1186  *
1187  * If @p new_space_bound is negative then the current space bound is returned.
1188  * Otherwise it is set and the new value returned.
1189  *
1190  * The space bound is the number of bytes allowed between in the buffer.  If
1191  * the buffer exceeds this size an error will be synthesized.  0 means "don't
1192  * care".  The default space bound is 512Kbyte.
1193  *
1194  * Note that this value does not take into account kernel buffering.
1195  */
1196 int ev_writer_space_bound(ev_writer *w,
1197                           int new_space_bound) {
1198   if(new_space_bound >= 0)
1199     w->spacebound = new_space_bound;
1200   return w->spacebound;
1201 }
1202
1203 /** @brief Return the sink associated with a writer
1204  * @param w Writer
1205  * @return Pointer to sink
1206  *
1207  * Writing to the sink will arrange for those bytes to be written to the file
1208  * descriptor as and when it is writable.
1209  */
1210 struct sink *ev_writer_sink(ev_writer *w) {
1211   if(!w)
1212     disorder_fatal(0, "ev_write_sink called with null writer");
1213   return &w->s;
1214 }
1215
1216 /** @brief Close a writer
1217  * @param w Writer to close
1218  * @return 0 on success, non-0 on error
1219  *
1220  * Close a writer.  No more bytes should be written to its sink.
1221  *
1222  * When the last byte has been written the callback will be called with an
1223  * error code of 0.  It is guaranteed that this will NOT happen before
1224  * ev_writer_close() returns (although the file descriptor for the writer might
1225  * be cancelled by the time it returns).
1226  */
1227 int ev_writer_close(ev_writer *w) {
1228   D(("close writer fd %d", w->fd));
1229   if(w->eof)
1230     return 0;                           /* already closed */
1231   w->eof = 1;
1232   if(w->b.start == w->b.end) {
1233     /* We're already finished */
1234     w->error = 0;                       /* no error */
1235     return ev_timeout(w->ev, 0, 0, writer_shutdown, w);
1236   }
1237   return 0;
1238 }
1239
1240 /** @brief Attempt to flush a writer
1241  * @param w Writer to flush
1242  * @return 0 on success, non-0 on error
1243  *
1244  * Does a speculative write of any buffered data.  Does not block if it cannot
1245  * be written.
1246  */
1247 int ev_writer_flush(ev_writer *w) {
1248   return writer_callback(w->ev, w->fd, w);
1249 }
1250
1251 /* buffered reader ************************************************************/
1252
1253 /** @brief Shut down a reader
1254  *
1255  * This is the only path through which we cancel and close the file descriptor.
1256  * As with the writer case it is given timeout signature to allow it be
1257  * deferred to the next iteration of the event loop.
1258  *
1259  * We only call @p error_callback if @p error is nonzero (unlike the writer
1260  * case).
1261  */
1262 static int reader_shutdown(ev_source *ev,
1263                            const attribute((unused)) struct timeval *now,
1264                            void *u) {
1265   ev_reader *const r = u;
1266
1267   if(r->fd == -1)
1268     return 0;                           /* already shut down */
1269   D(("reader_shutdown fd=%d", r->fd));
1270   ev_fd_cancel(ev, ev_read, r->fd);
1271   r->eof = 1;
1272   if(r->writer) {
1273     D(("found a tied writer"));
1274     /* If there is a writer still around we just untie it */
1275     r->writer->reader = 0;
1276     shutdown(r->fd, SHUT_RD);           /* there'll be no more reads */
1277   } else {
1278     D(("no tied writer found"));
1279     /* There's no writer so we are free to close the FD */
1280     xclose(r->fd);
1281   }
1282   r->fd = -1;
1283   if(r->error)
1284     return r->error_callback(ev, r->error, r->u);
1285   else
1286     return 0;
1287 }
1288
1289 /** @brief Called when a reader's @p fd is readable */
1290 static int reader_callback(ev_source *ev, int fd, void *u) {
1291   ev_reader *r = u;
1292   int n;
1293
1294   buffer_space(&r->b, 1);
1295   n = read(fd, r->b.end, r->b.top - r->b.end);
1296   D(("read fd %d buffer %d returned %d errno %d",
1297      fd, (int)(r->b.top - r->b.end), n, errno));
1298   if(n > 0) {
1299     r->b.end += n;
1300     return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 0, r->u);
1301   } else if(n == 0) {
1302     /* No more read callbacks needed */
1303     ev_fd_disable(r->ev, ev_read, r->fd);
1304     ev_timeout(r->ev, 0, 0, reader_shutdown, r);
1305     /* Pass the remaining data and an eof indicator to the user */
1306     return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 1, r->u);
1307   } else {
1308     switch(errno) {
1309     case EINTR:
1310     case EAGAIN:
1311       break;
1312     default:
1313       /* Fatal error, kill the reader now */
1314       r->error = errno;
1315       return reader_shutdown(ev, 0, r);
1316     }
1317   }
1318   return 0;
1319 }
1320
1321 /** @brief Create a new buffered reader
1322  * @param ev Event loop
1323  * @param fd File descriptor to read from
1324  * @param callback Called when new data is available
1325  * @param error_callback Called if an error occurs
1326  * @param u Passed to callbacks
1327  * @param what Text description
1328  * @return New reader or @c NULL
1329  *
1330  * Readers own their fd and close it when they are finished with it.
1331  *
1332  * If you pass the same fd to a reader and writer, you must tie them together
1333  * with ev_tie().
1334  */
1335 ev_reader *ev_reader_new(ev_source *ev,
1336                          int fd,
1337                          ev_reader_callback *callback,
1338                          ev_error_callback *error_callback,
1339                          void *u,
1340                          const char *what) {
1341   ev_reader *r = xmalloc(sizeof *r);
1342
1343   D(("registering reader fd %d callback %p %p %p",
1344      fd, (void *)callback, (void *)error_callback, u));
1345   r->fd = fd;
1346   r->callback = callback;
1347   r->error_callback = error_callback;
1348   r->u = u;
1349   r->ev = ev;
1350   if(ev_fd(ev, ev_read, fd, reader_callback, r, what))
1351     return 0;
1352   return r;
1353 }
1354
1355 void ev_reader_buffer(ev_reader *r, size_t nbytes) {
1356   buffer_space(&r->b, nbytes - (r->b.end - r->b.start));
1357 }
1358
1359 /** @brief Consume @p n bytes from the reader's buffer
1360  * @param r Reader
1361  * @param n Number of bytes to consume
1362  *
1363  * Tells the reader than the next @p n bytes have been dealt with and can now
1364  * be discarded.
1365  */
1366 void ev_reader_consume(ev_reader *r, size_t n) {
1367   r->b.start += n;
1368 }
1369
1370 /** @brief Cancel a reader
1371  * @param r Reader
1372  * @return 0 on success, non-0 on error
1373  *
1374  * No further callbacks will be made, and the FD will be closed (in a later
1375  * iteration of the event loop).
1376  */
1377 int ev_reader_cancel(ev_reader *r) {
1378   D(("cancel reader fd %d", r->fd));
1379   if(r->fd == -1)
1380     return 0;                           /* already thoroughly cancelled */
1381   ev_fd_disable(r->ev, ev_read, r->fd);
1382   return ev_timeout(r->ev, 0, 0, reader_shutdown, r);
1383 }
1384
1385 /** @brief Temporarily disable a reader
1386  * @param r Reader
1387  * @return 0 on success, non-0 on error
1388  *
1389  * No further callbacks for this reader will be made.  Re-enable with
1390  * ev_reader_enable().
1391  */
1392 int ev_reader_disable(ev_reader *r) {
1393   D(("disable reader fd %d", r->fd));
1394   return ev_fd_disable(r->ev, ev_read, r->fd);
1395 }
1396
1397 /** @brief Called from ev_run() for ev_reader_incomplete() */
1398 static int reader_continuation(ev_source attribute((unused)) *ev,
1399                                const attribute((unused)) struct timeval *now,
1400                                void *u) {
1401   ev_reader *r = u;
1402
1403   D(("reader continuation callback fd %d", r->fd));
1404   /* If not at EOF turn the FD back on */
1405   if(!r->eof)
1406     if(ev_fd_enable(r->ev, ev_read, r->fd))
1407       return -1;
1408   /* We're already in a timeout callback so there's no reason we can't call the
1409    * user callback directly (compare ev_reader_enable()). */
1410   return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u);
1411 }
1412
1413 /** @brief Arrange another callback
1414  * @param r reader
1415  * @return 0 on success, non-0 on error
1416  *
1417  * Indicates that the reader can process more input but would like to yield to
1418  * other clients of the event loop.  Input will be disabled but it will be
1419  * re-enabled on the next iteration of the event loop and the read callback
1420  * will be called again (even if no further bytes are available).
1421  */
1422 int ev_reader_incomplete(ev_reader *r) {
1423   if(ev_fd_disable(r->ev, ev_read, r->fd)) return -1;
1424   return ev_timeout(r->ev, 0, 0, reader_continuation, r);
1425 }
1426
1427 static int reader_enabled(ev_source *ev,
1428                           const attribute((unused)) struct timeval *now,
1429                           void *u) {
1430   ev_reader *r = u;
1431
1432   D(("reader enabled callback fd %d", r->fd));
1433   return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u);
1434 }
1435
1436 /** @brief Re-enable reading
1437  * @param r reader
1438  * @return 0 on success, non-0 on error
1439  *
1440  * If there is unconsumed data then you get a callback next time round the
1441  * event loop even if nothing new has been read.
1442  *
1443  * The idea is in your read callback you come across a line (or whatever) that
1444  * can't be processed immediately.  So you set up processing and disable
1445  * reading with ev_reader_disable().  Later when you finish processing you
1446  * re-enable.  You'll automatically get another callback directly from the
1447  * event loop (i.e. not from inside ev_reader_enable()) so you can handle the
1448  * next line (or whatever) if the whole thing has in fact already arrived.
1449  *
1450  * The difference between this process and calling ev_reader_incomplete() is
1451  * ev_reader_incomplete() deals with the case where you can process now but
1452  * would rather yield to other clients of the event loop, while using
1453  * ev_reader_disable() and ev_reader_enable() deals with the case where you
1454  * cannot process input yet because some other process is actually not
1455  * complete.
1456  */
1457 int ev_reader_enable(ev_reader *r) {
1458   D(("enable reader fd %d", r->fd));
1459
1460   /* First if we're not at EOF then we re-enable reading */
1461   if(!r->eof)
1462     if(ev_fd_enable(r->ev, ev_read, r->fd))
1463       return -1;
1464   /* Arrange another callback next time round the event loop */
1465   return ev_timeout(r->ev, 0, 0, reader_enabled, r);
1466 }
1467
1468 /** @brief Tie a reader and a writer together
1469  * @param r Reader
1470  * @param w Writer
1471  * @return 0 on success, non-0 on error
1472  *
1473  * This function must be called if @p r and @p w share a file descritptor.
1474  */
1475 int ev_tie(ev_reader *r, ev_writer *w) {
1476   assert(r->writer == 0);
1477   assert(w->reader == 0);
1478   r->writer = w;
1479   w->reader = r;
1480   return 0;
1481 }
1482
1483 /*
1484 Local Variables:
1485 c-basic-offset:2
1486 comment-column:40
1487 fill-column:79
1488 End:
1489 */