2 * This file is part of DisOrder.
3 * Copyright (C) 2004, 2005, 2007, 2008 Richard Kettlewell
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21 * @brief DisOrder event loop
29 #include <sys/types.h>
30 #include <sys/resource.h>
39 #include <sys/socket.h>
40 #include <netinet/in.h>
53 /** @brief A timeout */
57 ev_timeout_callback *callback;
62 /** @brief Comparison function for timeouts */
63 static int timeout_lt(const struct timeout *a,
64 const struct timeout *b) {
65 return tvlt(&a->when, &b->when);
68 HEAP_TYPE(timeout_heap, struct timeout *, timeout_lt);
69 HEAP_DEFINE(timeout_heap, struct timeout *, timeout_lt);
71 /** @brief A file descriptor in one mode */
74 ev_fd_callback *callback;
79 /** @brief All the file descriptors in a given mode */
81 /** @brief Mask of active file descriptors passed to @c select() */
84 /** @brief File descriptor mask returned from @c select() */
87 /** @brief Number of file descriptors in @p fds */
90 /** @brief Number of slots in @p fds */
93 /** @brief Array of all active file descriptors */
96 /** @brief Highest-numbered file descriptor or 0 */
100 /** @brief A signal handler */
102 struct sigaction oldsa;
103 ev_signal_callback *callback;
107 /** @brief A child process */
111 ev_child_callback *callback;
115 /** @brief An event loop */
117 /** @brief File descriptors, per mode */
118 struct fdmode mode[ev_nmodes];
120 /** @brief Heap of timeouts */
121 struct timeout_heap timeouts[1];
123 /** @brief Array of handled signals */
124 struct signal signals[NSIG];
126 /** @brief Mask of handled signals */
129 /** @brief Escape early from handling of @c select() results
131 * This is set if any of the file descriptor arrays are invalidated, since
132 * it's then not safe for processing of them to continue.
136 /** @brief Signal handling pipe
138 * The signal handle writes signal numbers down this pipe.
142 /** @brief Number of child processes in @p children */
145 /** @brief Number of slots in @p children */
148 /** @brief Array of child processes */
149 struct child *children;
152 /** @brief Names of file descriptor modes */
153 static const char *modenames[] = { "read", "write", "except" };
155 /* utilities ******************************************************************/
157 /* creation *******************************************************************/
159 /** @brief Create a new event loop */
160 ev_source *ev_new(void) {
161 ev_source *ev = xmalloc(sizeof *ev);
164 memset(ev, 0, sizeof *ev);
165 for(n = 0; n < ev_nmodes; ++n)
166 FD_ZERO(&ev->mode[n].enabled);
167 ev->sigpipe[0] = ev->sigpipe[1] = -1;
168 sigemptyset(&ev->sigmask);
169 timeout_heap_init(ev->timeouts);
173 /* event loop *****************************************************************/
175 /** @brief Run the event loop
176 * @return -1 on error, non-0 if any callback returned non-0
178 int ev_run(ev_source *ev) {
181 struct timeval delta;
185 struct timeout *timeouts, *t, **tt;
188 xgettimeofday(&now, 0);
189 /* Handle timeouts. We don't want to handle any timeouts that are added
190 * while we're handling them (otherwise we'd have to break out of infinite
191 * loops, preferrably without starving better-behaved subsystems). Hence
192 * the slightly complicated two-phase approach here. */
193 /* First we read those timeouts that have triggered out of the heap. We
194 * keep them in the same order they came out of the heap in. */
196 while(timeout_heap_count(ev->timeouts)
197 && tvle(&timeout_heap_first(ev->timeouts)->when, &now)) {
198 /* This timeout has reached its trigger time; provided it has not been
199 * cancelled we add it to the timeouts list. */
200 t = timeout_heap_remove(ev->timeouts);
207 /* Now we can run the callbacks for those timeouts. They might add further
208 * timeouts that are already in the past but they won't trigger until the
209 * next time round the event loop. */
210 for(t = timeouts; t; t = t->next) {
211 D(("calling timeout for %ld.%ld callback %p %p",
212 (long)t->when.tv_sec, (long)t->when.tv_usec,
213 (void *)t->callback, t->u));
214 ret = t->callback(ev, &now, t->u);
219 for(mode = 0; mode < ev_nmodes; ++mode) {
220 ev->mode[mode].tripped = ev->mode[mode].enabled;
221 if(ev->mode[mode].maxfd > maxfd)
222 maxfd = ev->mode[mode].maxfd;
224 xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0);
226 if(timeout_heap_count(ev->timeouts)) {
227 t = timeout_heap_first(ev->timeouts);
228 xgettimeofday(&now, 0);
229 delta.tv_sec = t->when.tv_sec - now.tv_sec;
230 delta.tv_usec = t->when.tv_usec - now.tv_usec;
231 if(delta.tv_usec < 0) {
232 delta.tv_usec += 1000000;
236 delta.tv_sec = delta.tv_usec = 0;
237 n = select(maxfd + 1,
238 &ev->mode[ev_read].tripped,
239 &ev->mode[ev_write].tripped,
240 &ev->mode[ev_except].tripped,
243 n = select(maxfd + 1,
244 &ev->mode[ev_read].tripped,
245 &ev->mode[ev_write].tripped,
246 &ev->mode[ev_except].tripped,
249 } while(n < 0 && errno == EINTR);
250 xsigprocmask(SIG_BLOCK, &ev->sigmask, 0);
252 error(errno, "error calling select");
254 /* If there's a bad FD in the mix then check them all and log what we
255 * find, to ease debugging */
256 for(mode = 0; mode < ev_nmodes; ++mode) {
257 for(n = 0; n < ev->mode[mode].nfds; ++n) {
258 const int fd = ev->mode[mode].fds[n].fd;
260 if(FD_ISSET(fd, &ev->mode[mode].enabled)
261 && fstat(fd, &sb) < 0)
262 error(errno, "mode %s fstat %d (%s)",
263 modenames[mode], fd, ev->mode[mode].fds[n].what);
265 for(n = 0; n <= maxfd; ++n)
266 if(FD_ISSET(n, &ev->mode[mode].enabled)
267 && fstat(n, &sb) < 0)
268 error(errno, "mode %s fstat %d", modenames[mode], n);
274 /* if anything deranges the meaning of an fd, or re-orders the
275 * fds[] tables, we'd better give up; such operations will
276 * therefore set @escape@. */
278 for(mode = 0; mode < ev_nmodes && !ev->escape; ++mode)
279 for(n = 0; n < ev->mode[mode].nfds && !ev->escape; ++n) {
280 int fd = ev->mode[mode].fds[n].fd;
281 if(FD_ISSET(fd, &ev->mode[mode].tripped)) {
282 D(("calling %s fd %d callback %p %p", modenames[mode], fd,
283 (void *)ev->mode[mode].fds[n].callback,
284 ev->mode[mode].fds[n].u));
285 ret = ev->mode[mode].fds[n].callback(ev, fd,
286 ev->mode[mode].fds[n].u);
292 /* we'll pick up timeouts back round the loop */
296 /* file descriptors ***********************************************************/
298 /** @brief Register a file descriptor
299 * @param ev Event loop
300 * @param mode @c ev_read or @c ev_write
301 * @param fd File descriptor
302 * @param callback Called when @p is readable/writable
303 * @param u Passed to @p callback
304 * @param what Text description
305 * @return 0 on success, non-0 on error
307 * Sets @ref ev_source::escape, so no further processing of file descriptors
308 * will occur this time round the event loop.
310 int ev_fd(ev_source *ev,
313 ev_fd_callback *callback,
318 D(("registering %s fd %d callback %p %p", modenames[mode], fd,
319 (void *)callback, u));
320 assert(mode < ev_nmodes);
321 if(ev->mode[mode].nfds >= ev->mode[mode].fdslots) {
322 ev->mode[mode].fdslots = (ev->mode[mode].fdslots
323 ? 2 * ev->mode[mode].fdslots : 16);
324 D(("expanding %s fd table to %d entries", modenames[mode],
325 ev->mode[mode].fdslots));
326 ev->mode[mode].fds = xrealloc(ev->mode[mode].fds,
327 ev->mode[mode].fdslots * sizeof (struct fd));
329 n = ev->mode[mode].nfds++;
330 FD_SET(fd, &ev->mode[mode].enabled);
331 ev->mode[mode].fds[n].fd = fd;
332 ev->mode[mode].fds[n].callback = callback;
333 ev->mode[mode].fds[n].u = u;
334 ev->mode[mode].fds[n].what = what;
335 if(fd > ev->mode[mode].maxfd)
336 ev->mode[mode].maxfd = fd;
341 /** @brief Cancel a file descriptor
342 * @param ev Event loop
343 * @param mode @c ev_read or @c ev_write
344 * @param fd File descriptor
345 * @return 0 on success, non-0 on error
347 * Sets @ref ev_source::escape, so no further processing of file descriptors
348 * will occur this time round the event loop.
350 int ev_fd_cancel(ev_source *ev, ev_fdmode mode, int fd) {
354 D(("cancelling mode %s fd %d", modenames[mode], fd));
355 /* find the right struct fd */
356 for(n = 0; n < ev->mode[mode].nfds && fd != ev->mode[mode].fds[n].fd; ++n)
358 assert(n < ev->mode[mode].nfds);
359 /* swap in the last fd and reduce the count */
360 if(n != ev->mode[mode].nfds - 1)
361 ev->mode[mode].fds[n] = ev->mode[mode].fds[ev->mode[mode].nfds - 1];
362 --ev->mode[mode].nfds;
363 /* if that was the biggest fd, find the new biggest one */
364 if(fd == ev->mode[mode].maxfd) {
366 for(n = 0; n < ev->mode[mode].nfds; ++n)
367 if(ev->mode[mode].fds[n].fd > maxfd)
368 maxfd = ev->mode[mode].fds[n].fd;
369 ev->mode[mode].maxfd = maxfd;
371 /* don't tell select about this fd any more */
372 FD_CLR(fd, &ev->mode[mode].enabled);
377 /** @brief Re-enable a file descriptor
378 * @param ev Event loop
379 * @param mode @c ev_read or @c ev_write
380 * @param fd File descriptor
381 * @return 0 on success, non-0 on error
383 * It is harmless if @p fd is currently disabled, but it must not have been
386 int ev_fd_enable(ev_source *ev, ev_fdmode mode, int fd) {
388 D(("enabling mode %s fd %d", modenames[mode], fd));
389 FD_SET(fd, &ev->mode[mode].enabled);
393 /** @brief Temporarily disable a file descriptor
394 * @param ev Event loop
395 * @param mode @c ev_read or @c ev_write
396 * @param fd File descriptor
397 * @return 0 on success, non-0 on error
399 * Re-enable with ev_fd_enable(). It is harmless if @p fd is already disabled,
400 * but it must not have been cancelled.
402 int ev_fd_disable(ev_source *ev, ev_fdmode mode, int fd) {
403 D(("disabling mode %s fd %d", modenames[mode], fd));
404 FD_CLR(fd, &ev->mode[mode].enabled);
405 FD_CLR(fd, &ev->mode[mode].tripped);
406 /* Suppress any pending callbacks */
411 /** @brief Log a report of file descriptor state */
412 void ev_report(ev_source *ev) {
421 for(mode = 0; mode < ev_nmodes; ++mode) {
422 D(("mode %s maxfd %d", modenames[mode], ev->mode[mode].maxfd));
423 for(n = 0; n < ev->mode[mode].nfds; ++n) {
424 fd = ev->mode[mode].fds[n].fd;
425 D(("fd %s %d%s%s (%s)", modenames[mode], fd,
426 FD_ISSET(fd, &ev->mode[mode].enabled) ? " enabled" : "",
427 FD_ISSET(fd, &ev->mode[mode].tripped) ? " tripped" : "",
428 ev->mode[mode].fds[n].what));
431 for(fd = 0; fd <= ev->mode[mode].maxfd; ++fd) {
432 if(!FD_ISSET(fd, &ev->mode[mode].enabled))
434 for(n = 0; n < ev->mode[mode].nfds; ++n) {
435 if(ev->mode[mode].fds[n].fd == fd)
438 if(n < ev->mode[mode].nfds)
439 snprintf(b, sizeof b, "%d(%s)", fd, ev->mode[mode].fds[n].what);
441 snprintf(b, sizeof b, "%d", fd);
442 dynstr_append(d, ' ');
443 dynstr_append_string(d, b);
446 D(("%s enabled:%s", modenames[mode], d->vec));
450 /* timeouts *******************************************************************/
452 /** @brief Register a timeout
453 * @param ev Event source
454 * @param handlep Where to store timeout handle, or @c NULL
455 * @param when Earliest time to call @p callback, or @c NULL
456 * @param callback Function to call at or after @p when
457 * @param u Passed to @p callback
458 * @return 0 on success, non-0 on error
460 * If @p when is a null pointer then a time of 0 is assumed. The effect is to
461 * call the timeout handler from ev_run() next time around the event loop.
462 * This is used internally to schedule various operations if it is not
463 * convenient to call them from the current place in the call stack, or
464 * externally to ensure that other clients of the event loop get a look in when
465 * performing some lengthy operation.
467 int ev_timeout(ev_source *ev,
468 ev_timeout_handle *handlep,
469 const struct timeval *when,
470 ev_timeout_callback *callback,
474 D(("registering timeout at %ld.%ld callback %p %p",
475 when ? (long)when->tv_sec : 0, when ? (long)when->tv_usec : 0,
476 (void *)callback, u));
477 t = xmalloc(sizeof *t);
480 t->callback = callback;
483 timeout_heap_insert(ev->timeouts, t);
489 /** @brief Cancel a timeout
490 * @param ev Event loop
491 * @param handle Handle returned from ev_timeout(), or 0
492 * @return 0 on success, non-0 on error
494 * If @p handle is 0 then this is a no-op.
496 int ev_timeout_cancel(ev_source attribute((unused)) *ev,
497 ev_timeout_handle handle) {
498 struct timeout *t = handle;
505 /* signals ********************************************************************/
507 /** @brief Mapping of signals to pipe write ends
509 * The pipes are per-event loop, it's possible in theory for there to be
510 * multiple event loops (e.g. in different threads), although in fact DisOrder
513 static int sigfd[NSIG];
515 /** @brief The signal handler
516 * @param s Signal number
518 * Writes to @c sigfd[s].
520 static void sighandler(int s) {
521 unsigned char sc = s;
522 static const char errmsg[] = "error writing to signal pipe";
524 /* probably the reader has stopped listening for some reason */
525 if(write(sigfd[s], &sc, 1) < 0) {
526 write(2, errmsg, sizeof errmsg - 1);
531 /** @brief Read callback for signals */
532 static int signal_read(ev_source *ev,
533 int attribute((unused)) fd,
534 void attribute((unused)) *u) {
539 if((n = read(ev->sigpipe[0], &s, 1)) == 1)
540 if((ret = ev->signals[s].callback(ev, s, ev->signals[s].u)))
543 if(n < 0 && (errno != EINTR && errno != EAGAIN)) {
544 error(errno, "error reading from signal pipe %d", ev->sigpipe[0]);
550 /** @brief Close the signal pipe */
551 static void close_sigpipe(ev_source *ev) {
552 int save_errno = errno;
554 xclose(ev->sigpipe[0]);
555 xclose(ev->sigpipe[1]);
556 ev->sigpipe[0] = ev->sigpipe[1] = -1;
560 /** @brief Register a signal handler
561 * @param ev Event loop
562 * @param sig Signal to handle
563 * @param callback Called when signal is delivered
564 * @param u Passed to @p callback
565 * @return 0 on success, non-0 on error
567 * Note that @p callback is called from inside ev_run(), not from inside the
568 * signal handler, so the usual restrictions on signal handlers do not apply.
570 int ev_signal(ev_source *ev,
572 ev_signal_callback *callback,
577 D(("registering signal %d handler callback %p %p", sig, (void *)callback, u));
580 assert(sig <= UCHAR_MAX);
581 if(ev->sigpipe[0] == -1) {
582 D(("creating signal pipe"));
584 D(("signal pipe is %d, %d", ev->sigpipe[0], ev->sigpipe[1]));
585 for(n = 0; n < 2; ++n) {
586 nonblock(ev->sigpipe[n]);
587 cloexec(ev->sigpipe[n]);
589 if(ev_fd(ev, ev_read, ev->sigpipe[0], signal_read, 0, "sigpipe read")) {
594 sigaddset(&ev->sigmask, sig);
595 xsigprocmask(SIG_BLOCK, &ev->sigmask, 0);
596 sigfd[sig] = ev->sigpipe[1];
597 ev->signals[sig].callback = callback;
598 ev->signals[sig].u = u;
599 sa.sa_handler = sighandler;
600 sigfillset(&sa.sa_mask);
601 sa.sa_flags = SA_RESTART;
602 xsigaction(sig, &sa, &ev->signals[sig].oldsa);
607 /** @brief Cancel a signal handler
608 * @param ev Event loop
609 * @param sig Signal to cancel
610 * @return 0 on success, non-0 on error
612 int ev_signal_cancel(ev_source *ev,
616 xsigaction(sig, &ev->signals[sig].oldsa, 0);
617 ev->signals[sig].callback = 0;
619 sigdelset(&ev->sigmask, sig);
622 xsigprocmask(SIG_UNBLOCK, &ss, 0);
626 /** @brief Clean up signal handling
627 * @param ev Event loop
629 * This function can be called from inside a fork. It restores signal
630 * handlers, unblocks the signals, and closes the signal pipe for @p ev.
632 void ev_signal_atfork(ev_source *ev) {
635 if(ev->sigpipe[0] != -1) {
636 /* revert any handled signals to their original state */
637 for(sig = 1; sig < NSIG; ++sig) {
638 if(ev->signals[sig].callback != 0)
639 xsigaction(sig, &ev->signals[sig].oldsa, 0);
641 /* and then unblock them */
642 xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0);
643 /* don't want a copy of the signal pipe open inside the fork */
644 xclose(ev->sigpipe[0]);
645 xclose(ev->sigpipe[1]);
649 /* child processes ************************************************************/
651 /** @brief Called on SIGCHLD */
652 static int sigchld_callback(ev_source *ev,
653 int attribute((unused)) sig,
654 void attribute((unused)) *u) {
657 int status, n, ret, revisit;
661 for(n = 0; n < ev->nchildren; ++n) {
662 r = wait4(ev->children[n].pid,
664 ev->children[n].options | WNOHANG,
667 ev_child_callback *c = ev->children[n].callback;
668 void *cu = ev->children[n].u;
670 if(WIFEXITED(status) || WIFSIGNALED(status))
671 ev_child_cancel(ev, r);
673 if((ret = c(ev, r, status, &ru, cu)))
676 /* We should "never" get an ECHILD but it can in fact happen. For
677 * instance on Linux 2.4.31, and probably other versions, if someone
678 * straces a child process and then a different child process
679 * terminates, when we wait4() the trace process we will get ECHILD
680 * because it has been reparented to strace. Obviously this is a
681 * hopeless design flaw in the tracing infrastructure, but we don't
682 * want the disorder server to bomb out because of it. So we just log
683 * the problem and ignore it.
685 error(errno, "error calling wait4 for PID %lu (broken ptrace?)",
686 (unsigned long)ev->children[n].pid);
695 /** @brief Configure event loop for child process handling
696 * @return 0 on success, non-0 on error
698 * Currently at most one event loop can handle child processes and it must be
699 * distinguished from others by calling this function on it. This could be
700 * fixed but since no process ever makes use of more than one event loop there
703 int ev_child_setup(ev_source *ev) {
704 D(("installing SIGCHLD handler"));
705 return ev_signal(ev, SIGCHLD, sigchld_callback, 0);
708 /** @brief Wait for a child process to terminate
709 * @param ev Event loop
710 * @param pid Process ID of child
711 * @param options Options to pass to @c wait4()
712 * @param callback Called when child terminates (or possibly when it stops)
713 * @param u Passed to @p callback
714 * @return 0 on success, non-0 on error
716 * You must have called ev_child_setup() on @p ev once first.
718 int ev_child(ev_source *ev,
721 ev_child_callback *callback,
725 D(("registering child handling %ld options %d callback %p %p",
726 (long)pid, options, (void *)callback, u));
727 assert(ev->signals[SIGCHLD].callback == sigchld_callback);
728 if(ev->nchildren >= ev->nchildslots) {
729 ev->nchildslots = ev->nchildslots ? 2 * ev->nchildslots : 16;
730 ev->children = xrealloc(ev->children,
731 ev->nchildslots * sizeof (struct child));
734 ev->children[n].pid = pid;
735 ev->children[n].options = options;
736 ev->children[n].callback = callback;
737 ev->children[n].u = u;
741 /** @brief Stop waiting for a child process
742 * @param ev Event loop
743 * @param pid Child process ID
744 * @return 0 on success, non-0 on error
746 int ev_child_cancel(ev_source *ev,
750 for(n = 0; n < ev->nchildren && ev->children[n].pid != pid; ++n)
752 assert(n < ev->nchildren);
753 if(n != ev->nchildren - 1)
754 ev->children[n] = ev->children[ev->nchildren - 1];
759 /* socket listeners ***********************************************************/
761 /** @brief State for a socket listener */
762 struct listen_state {
763 ev_listen_callback *callback;
767 /** @brief Called when a listenign socket is readable */
768 static int listen_callback(ev_source *ev, int fd, void *u) {
769 const struct listen_state *l = u;
772 struct sockaddr_in in;
773 #if HAVE_STRUCT_SOCKADDR_IN6
774 struct sockaddr_in6 in6;
776 struct sockaddr_un un;
782 D(("callback for listener fd %d", fd));
783 while((addrlen = sizeof addr),
784 (newfd = accept(fd, &addr.sa, &addrlen)) >= 0) {
785 if((ret = l->callback(ev, newfd, &addr.sa, addrlen, l->u)))
794 error(errno, "error calling accept");
799 /* XXX on some systems EPROTO should be fatal, but we don't know if
800 * we're running on one of them */
801 error(errno, "error calling accept");
805 fatal(errno, "error calling accept");
808 if(errno != EINTR && errno != EAGAIN)
809 error(errno, "error calling accept");
813 /** @brief Listen on a socket for inbound stream connections
814 * @param ev Event source
815 * @param fd File descriptor of socket
816 * @param callback Called when a new connection arrives
817 * @param u Passed to @p callback
818 * @param what Text description of socket
819 * @return 0 on success, non-0 on error
821 int ev_listen(ev_source *ev,
823 ev_listen_callback *callback,
826 struct listen_state *l = xmalloc(sizeof *l);
828 D(("registering listener fd %d callback %p %p", fd, (void *)callback, u));
829 l->callback = callback;
831 return ev_fd(ev, ev_read, fd, listen_callback, l, what);
834 /** @brief Stop listening on a socket
835 * @param ev Event loop
836 * @param fd File descriptor of socket
837 * @return 0 on success, non-0 on error
839 int ev_listen_cancel(ev_source *ev, int fd) {
840 D(("cancelling listener fd %d", fd));
841 return ev_fd_cancel(ev, ev_read, fd);
844 /* buffer *********************************************************************/
846 /** @brief Buffer structure */
848 char *base, *start, *end, *top;
851 /* @brief Make sure there is @p bytes available at @c b->end */
852 static void buffer_space(struct buffer *b, size_t bytes) {
853 D(("buffer_space %p %p %p %p want %lu",
854 (void *)b->base, (void *)b->start, (void *)b->end, (void *)b->top,
855 (unsigned long)bytes));
856 if(b->start == b->end)
857 b->start = b->end = b->base;
858 if((size_t)(b->top - b->end) < bytes) {
859 if((size_t)((b->top - b->end) + (b->start - b->base)) < bytes) {
860 size_t newspace = b->end - b->start + bytes, n;
863 for(n = 16; n < newspace; n *= 2)
865 newbase = xmalloc_noptr(n);
866 memcpy(newbase, b->start, b->end - b->start);
868 b->end = newbase + (b->end - b->start);
869 b->top = newbase + n;
870 b->start = newbase; /* must be last */
872 memmove(b->base, b->start, b->end - b->start);
873 b->end = b->base + (b->end - b->start);
877 D(("result %p %p %p %p",
878 (void *)b->base, (void *)b->start, (void *)b->end, (void *)b->top));
881 /* readers and writers *******************************************************/
883 /** @brief State structure for a buffered writer */
885 /** @brief Sink used for writing to the buffer */
888 /** @brief Output buffer */
891 /** @brief File descriptor to write to */
894 /** @brief Set if there'll be no more output */
897 /** @brief Error/termination callback */
898 ev_error_callback *callback;
900 /** @brief Passed to @p callback */
903 /** @brief Parent event source */
906 /** @brief Maximum amount of time between succesful writes, 0 = don't care */
908 /** @brief Maximum amount of data to buffer, 0 = don't care */
910 /** @brief Error code to pass to @p callback (see writer_shutdown()) */
912 /** @brief Timeout handle for @p timebound (or 0) */
913 ev_timeout_handle timeout;
915 /** @brief Description of this writer */
918 /** @brief Tied reader or 0 */
921 /** @brief Set when abandoned */
925 /** @brief State structure for a buffered reader */
927 /** @brief Input buffer */
929 /** @brief File descriptor read from */
931 /** @brief Called when new data is available */
932 ev_reader_callback *callback;
933 /** @brief Called on error and shutdown */
934 ev_error_callback *error_callback;
935 /** @brief Passed to @p callback and @p error_callback */
937 /** @brief Parent event loop */
939 /** @brief Set when EOF is detected */
941 /** @brief Error code to pass to error callback */
943 /** @brief Tied writer or NULL */
947 /* buffered writer ************************************************************/
949 /** @brief Shut down the writer
951 * This is called to shut down a writer. The error callback is not called
952 * through any other path. Also we do not cancel @p fd from anywhere else,
953 * though we might disable it.
955 * It has the signature of a timeout callback so that it can be called from a
958 * Calls @p callback with @p w->syntherr as the error code (which might be 0).
960 static int writer_shutdown(ev_source *ev,
961 const attribute((unused)) struct timeval *now,
966 return 0; /* already shut down */
967 D(("writer_shutdown fd=%d error=%d", w->fd, w->error));
968 ev_timeout_cancel(ev, w->timeout);
969 ev_fd_cancel(ev, ev_write, w->fd);
972 D(("found a tied reader"));
973 /* If there is a reader still around we just untie it */
974 w->reader->writer = 0;
975 shutdown(w->fd, SHUT_WR); /* there'll be no more writes */
977 D(("no tied reader"));
978 /* There's no reader so we are free to close the FD */
982 return w->callback(ev, w->error, w->u);
985 /** @brief Called when a writer's @p timebound expires */
986 static int writer_timebound_exceeded(ev_source *ev,
987 const struct timeval *now,
989 ev_writer *const w = u;
993 error(0, "abandoning writer '%s' because no writes within %ds",
994 w->what, w->timebound);
995 w->error = ETIMEDOUT;
997 return writer_shutdown(ev, now, u);
1000 /** @brief Set the time bound callback (if not set already) */
1001 static void writer_set_timebound(ev_writer *w) {
1002 if(w->timebound && !w->timeout) {
1003 struct timeval when;
1004 ev_source *const ev = w->ev;
1006 xgettimeofday(&when, 0);
1007 when.tv_sec += w->timebound;
1008 ev_timeout(ev, &w->timeout, &when, writer_timebound_exceeded, w);
1012 /** @brief Called when a writer's file descriptor is writable */
1013 static int writer_callback(ev_source *ev, int fd, void *u) {
1014 ev_writer *const w = u;
1017 n = write(fd, w->b.start, w->b.end - w->b.start);
1018 D(("callback for writer fd %d, %ld bytes, n=%d, errno=%d",
1019 fd, (long)(w->b.end - w->b.start), n, errno));
1021 /* Consume bytes from the buffer */
1023 /* Suppress any outstanding timeout */
1024 ev_timeout_cancel(ev, w->timeout);
1026 if(w->b.start == w->b.end) {
1027 /* The buffer is empty */
1029 /* We're done, we can shut down this writer */
1031 return writer_shutdown(ev, 0, w);
1033 /* There might be more to come but we don't need writer_callback() to
1034 * be called for the time being */
1035 ev_fd_disable(ev, ev_write, fd);
1037 /* The buffer isn't empty, set a timeout so we give up if we don't manage
1038 * to write some more within a reasonable time */
1039 writer_set_timebound(w);
1047 return writer_shutdown(ev, 0, w);
1053 /** @brief Write bytes to a writer's buffer
1055 * This is the sink write callback.
1057 * Calls ev_fd_enable() if necessary (i.e. if the buffer was empty but
1060 static int ev_writer_write(struct sink *sk, const void *s, int n) {
1061 ev_writer *w = (ev_writer *)sk;
1064 return 0; /* avoid silliness */
1066 error(0, "ev_writer_write on %s after shutdown", w->what);
1067 if(w->spacebound && w->b.end - w->b.start + n > w->spacebound) {
1068 /* The new buffer contents will exceed the space bound. We assume that the
1069 * remote client has gone away and TCP hasn't noticed yet, or that it's got
1070 * hopelessly stuck. */
1073 error(0, "abandoning writer '%s' because buffer has reached %td bytes",
1074 w->what, w->b.end - w->b.start);
1075 ev_fd_disable(w->ev, ev_write, w->fd);
1077 return ev_timeout(w->ev, 0, 0, writer_shutdown, w);
1081 /* Make sure there is space */
1082 buffer_space(&w->b, n);
1083 /* If the buffer was formerly empty then we'll need to re-enable the FD */
1084 if(w->b.start == w->b.end)
1085 ev_fd_enable(w->ev, ev_write, w->fd);
1086 memcpy(w->b.end, s, n);
1088 /* Arrange a timeout if there wasn't one set already */
1089 writer_set_timebound(w);
1093 /** @brief Create a new buffered writer
1094 * @param ev Event loop
1095 * @param fd File descriptor to write to
1096 * @param callback Called if an error occurs and when finished
1097 * @param u Passed to @p callback
1098 * @param what Text description
1099 * @return New writer or @c NULL
1101 * Writers own their file descriptor and close it when they have finished with
1104 * If you pass the same fd to a reader and writer, you must tie them together
1107 ev_writer *ev_writer_new(ev_source *ev,
1109 ev_error_callback *callback,
1112 ev_writer *w = xmalloc(sizeof *w);
1114 D(("registering writer fd %d callback %p %p", fd, (void *)callback, u));
1115 w->s.write = ev_writer_write;
1117 w->callback = callback;
1120 w->timebound = 10 * 60;
1121 w->spacebound = 512 * 1024;
1123 if(ev_fd(ev, ev_write, fd, writer_callback, w, what))
1125 /* Buffer is initially empty so we don't want a callback */
1126 ev_fd_disable(ev, ev_write, fd);
1130 /** @brief Get/set the time bound
1132 * @param new_time_bound New bound or -1 for no change
1133 * @return Latest time bound
1135 * If @p new_time_bound is negative then the current time bound is returned.
1136 * Otherwise it is set and the new value returned.
1138 * The time bound is the number of seconds allowed between writes. If it takes
1139 * longer than this to flush a buffer then the peer will be assumed to be dead
1140 * and an error will be synthesized. 0 means "don't care". The default time
1141 * bound is 10 minutes.
1143 * Note that this value does not take into account kernel buffering and
1146 int ev_writer_time_bound(ev_writer *w,
1147 int new_time_bound) {
1148 if(new_time_bound >= 0)
1149 w->timebound = new_time_bound;
1150 return w->timebound;
1153 /** @brief Get/set the space bound
1155 * @param new_space_bound New bound or -1 for no change
1156 * @return Latest space bound
1158 * If @p new_space_bound is negative then the current space bound is returned.
1159 * Otherwise it is set and the new value returned.
1161 * The space bound is the number of bytes allowed between in the buffer. If
1162 * the buffer exceeds this size an error will be synthesized. 0 means "don't
1163 * care". The default space bound is 512Kbyte.
1165 * Note that this value does not take into account kernel buffering.
1167 int ev_writer_space_bound(ev_writer *w,
1168 int new_space_bound) {
1169 if(new_space_bound >= 0)
1170 w->spacebound = new_space_bound;
1171 return w->spacebound;
1174 /** @brief Return the sink associated with a writer
1176 * @return Pointer to sink
1178 * Writing to the sink will arrange for those bytes to be written to the file
1179 * descriptor as and when it is writable.
1181 struct sink *ev_writer_sink(ev_writer *w) {
1183 fatal(0, "ev_write_sink called with null writer");
1187 /** @brief Close a writer
1188 * @param w Writer to close
1189 * @return 0 on success, non-0 on error
1191 * Close a writer. No more bytes should be written to its sink.
1193 * When the last byte has been written the callback will be called with an
1194 * error code of 0. It is guaranteed that this will NOT happen before
1195 * ev_writer_close() returns (although the file descriptor for the writer might
1196 * be cancelled by the time it returns).
1198 int ev_writer_close(ev_writer *w) {
1199 D(("close writer fd %d", w->fd));
1201 return 0; /* already closed */
1203 if(w->b.start == w->b.end) {
1204 /* We're already finished */
1205 w->error = 0; /* no error */
1206 return ev_timeout(w->ev, 0, 0, writer_shutdown, w);
1211 /** @brief Attempt to flush a writer
1212 * @param w Writer to flush
1213 * @return 0 on success, non-0 on error
1215 * Does a speculative write of any buffered data. Does not block if it cannot
1218 int ev_writer_flush(ev_writer *w) {
1219 return writer_callback(w->ev, w->fd, w);
1222 /* buffered reader ************************************************************/
1224 /** @brief Shut down a reader
1226 * This is the only path through which we cancel and close the file descriptor.
1227 * As with the writer case it is given timeout signature to allow it be
1228 * deferred to the next iteration of the event loop.
1230 * We only call @p error_callback if @p error is nonzero (unlike the writer
1233 static int reader_shutdown(ev_source *ev,
1234 const attribute((unused)) struct timeval *now,
1236 ev_reader *const r = u;
1239 return 0; /* already shut down */
1240 D(("reader_shutdown fd=%d", r->fd));
1241 ev_fd_cancel(ev, ev_read, r->fd);
1244 D(("found a tied writer"));
1245 /* If there is a writer still around we just untie it */
1246 r->writer->reader = 0;
1247 shutdown(r->fd, SHUT_RD); /* there'll be no more reads */
1249 D(("no tied writer found"));
1250 /* There's no writer so we are free to close the FD */
1255 return r->error_callback(ev, r->error, r->u);
1260 /** @brief Called when a reader's @p fd is readable */
1261 static int reader_callback(ev_source *ev, int fd, void *u) {
1265 buffer_space(&r->b, 1);
1266 n = read(fd, r->b.end, r->b.top - r->b.end);
1267 D(("read fd %d buffer %d returned %d errno %d",
1268 fd, (int)(r->b.top - r->b.end), n, errno));
1271 return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 0, r->u);
1273 /* No more read callbacks needed */
1274 ev_fd_disable(r->ev, ev_read, r->fd);
1275 ev_timeout(r->ev, 0, 0, reader_shutdown, r);
1276 /* Pass the remaining data and an eof indicator to the user */
1277 return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 1, r->u);
1284 /* Fatal error, kill the reader now */
1286 return reader_shutdown(ev, 0, r);
1292 /** @brief Create a new buffered reader
1293 * @param ev Event loop
1294 * @param fd File descriptor to read from
1295 * @param callback Called when new data is available
1296 * @param error_callback Called if an error occurs
1297 * @param u Passed to callbacks
1298 * @param what Text description
1299 * @return New reader or @c NULL
1301 * Readers own their fd and close it when they are finished with it.
1303 * If you pass the same fd to a reader and writer, you must tie them together
1306 ev_reader *ev_reader_new(ev_source *ev,
1308 ev_reader_callback *callback,
1309 ev_error_callback *error_callback,
1312 ev_reader *r = xmalloc(sizeof *r);
1314 D(("registering reader fd %d callback %p %p %p",
1315 fd, (void *)callback, (void *)error_callback, u));
1317 r->callback = callback;
1318 r->error_callback = error_callback;
1321 if(ev_fd(ev, ev_read, fd, reader_callback, r, what))
1326 void ev_reader_buffer(ev_reader *r, size_t nbytes) {
1327 buffer_space(&r->b, nbytes - (r->b.end - r->b.start));
1330 /** @brief Consume @p n bytes from the reader's buffer
1332 * @param n Number of bytes to consume
1334 * Tells the reader than the next @p n bytes have been dealt with and can now
1337 void ev_reader_consume(ev_reader *r, size_t n) {
1341 /** @brief Cancel a reader
1343 * @return 0 on success, non-0 on error
1345 * No further callbacks will be made, and the FD will be closed (in a later
1346 * iteration of the event loop).
1348 int ev_reader_cancel(ev_reader *r) {
1349 D(("cancel reader fd %d", r->fd));
1351 return 0; /* already thoroughly cancelled */
1352 ev_fd_disable(r->ev, ev_read, r->fd);
1353 return ev_timeout(r->ev, 0, 0, reader_shutdown, r);
1356 /** @brief Temporarily disable a reader
1358 * @return 0 on success, non-0 on error
1360 * No further callbacks for this reader will be made. Re-enable with
1361 * ev_reader_enable().
1363 int ev_reader_disable(ev_reader *r) {
1364 D(("disable reader fd %d", r->fd));
1365 return ev_fd_disable(r->ev, ev_read, r->fd);
1368 /** @brief Called from ev_run() for ev_reader_incomplete() */
1369 static int reader_continuation(ev_source attribute((unused)) *ev,
1370 const attribute((unused)) struct timeval *now,
1374 D(("reader continuation callback fd %d", r->fd));
1375 /* If not at EOF turn the FD back on */
1377 if(ev_fd_enable(r->ev, ev_read, r->fd))
1379 /* We're already in a timeout callback so there's no reason we can't call the
1380 * user callback directly (compare ev_reader_enable()). */
1381 return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u);
1384 /** @brief Arrange another callback
1386 * @return 0 on success, non-0 on error
1388 * Indicates that the reader can process more input but would like to yield to
1389 * other clients of the event loop. Input will be disabled but it will be
1390 * re-enabled on the next iteration of the event loop and the read callback
1391 * will be called again (even if no further bytes are available).
1393 int ev_reader_incomplete(ev_reader *r) {
1394 if(ev_fd_disable(r->ev, ev_read, r->fd)) return -1;
1395 return ev_timeout(r->ev, 0, 0, reader_continuation, r);
1398 static int reader_enabled(ev_source *ev,
1399 const attribute((unused)) struct timeval *now,
1403 D(("reader enabled callback fd %d", r->fd));
1404 return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u);
1407 /** @brief Re-enable reading
1409 * @return 0 on success, non-0 on error
1411 * If there is unconsumed data then you get a callback next time round the
1412 * event loop even if nothing new has been read.
1414 * The idea is in your read callback you come across a line (or whatever) that
1415 * can't be processed immediately. So you set up processing and disable
1416 * reading with ev_reader_disable(). Later when you finish processing you
1417 * re-enable. You'll automatically get another callback directly from the
1418 * event loop (i.e. not from inside ev_reader_enable()) so you can handle the
1419 * next line (or whatever) if the whole thing has in fact already arrived.
1421 * The difference between this process and calling ev_reader_incomplete() is
1422 * ev_reader_incomplete() deals with the case where you can process now but
1423 * would rather yield to other clients of the event loop, while using
1424 * ev_reader_disable() and ev_reader_enable() deals with the case where you
1425 * cannot process input yet because some other process is actually not
1428 int ev_reader_enable(ev_reader *r) {
1429 D(("enable reader fd %d", r->fd));
1431 /* First if we're not at EOF then we re-enable reading */
1433 if(ev_fd_enable(r->ev, ev_read, r->fd))
1435 /* Arrange another callback next time round the event loop */
1436 return ev_timeout(r->ev, 0, 0, reader_enabled, r);
1439 /** @brief Tie a reader and a writer together
1442 * @return 0 on success, non-0 on error
1444 * This function must be called if @p r and @p w share a file descritptor.
1446 int ev_tie(ev_reader *r, ev_writer *w) {
1447 assert(r->writer == 0);
1448 assert(w->reader == 0);