chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2004, 2005, 2007, 2008 Richard Kettlewell
	4	*
	5	* This program is free software: you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation, either version 3 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	17	*/
	18	/** @file lib/event.c
	19	* @brief DisOrder event loop
	20	*/
	21
	22	#include "common.h"
	23
	24	#include <unistd.h>
	25	#include <fcntl.h>
	26	#include <sys/time.h>
	27	#include <sys/types.h>
	28	#include <sys/resource.h>
	29	#include <sys/wait.h>
	30	#include <sys/stat.h>
	31	#include <unistd.h>
	32	#include <signal.h>
	33	#include <errno.h>
	34	#include <sys/socket.h>
	35	#include <netinet/in.h>
	36	#include <sys/un.h>
	37	#include "event.h"
	38	#include "mem.h"
	39	#include "log.h"
	40	#include "syscalls.h"
	41	#include "printf.h"
	42	#include "sink.h"
	43	#include "vector.h"
	44	#include "timeval.h"
	45	#include "heap.h"
	46
	47	/** @brief A timeout */
	48	struct timeout {
	49	struct timeout *next;
	50	struct timeval when;
	51	ev_timeout_callback *callback;
	52	void *u;
	53	int active;
	54	};
	55
	56	/** @brief Comparison function for timeouts */
	57	static int timeout_lt(const struct timeout *a,
	58	const struct timeout *b) {
	59	return tvlt(&a->when, &b->when);
	60	}
	61
	62	HEAP_TYPE(timeout_heap, struct timeout *, timeout_lt);
	63	HEAP_DEFINE(timeout_heap, struct timeout *, timeout_lt);
	64
	65	/** @brief A file descriptor in one mode */
	66	struct fd {
	67	int fd;
	68	ev_fd_callback *callback;
	69	void *u;
	70	const char *what;
	71	};
	72
	73	/** @brief All the file descriptors in a given mode */
	74	struct fdmode {
	75	/** @brief Mask of active file descriptors passed to @c select() */
	76	fd_set enabled;
	77
	78	/** @brief File descriptor mask returned from @c select() */
	79	fd_set tripped;
	80
	81	/** @brief Number of file descriptors in @p fds */
	82	int nfds;
	83
	84	/** @brief Number of slots in @p fds */
	85	int fdslots;
	86
	87	/** @brief Array of all active file descriptors */
	88	struct fd *fds;
	89
	90	/** @brief Highest-numbered file descriptor or 0 */
	91	int maxfd;
	92	};
	93
	94	/** @brief A signal handler */
	95	struct signal {
	96	struct sigaction oldsa;
	97	ev_signal_callback *callback;
	98	void *u;
	99	};
	100
	101	/** @brief A child process */
	102	struct child {
	103	pid_t pid;
	104	int options;
	105	ev_child_callback *callback;
	106	void *u;
	107	};
	108
	109	/** @brief An event loop */
	110	struct ev_source {
	111	/** @brief File descriptors, per mode */
	112	struct fdmode mode[ev_nmodes];
	113
	114	/** @brief Heap of timeouts */
	115	struct timeout_heap timeouts[1];
	116
	117	/** @brief Array of handled signals */
	118	struct signal signals[NSIG];
	119
	120	/** @brief Mask of handled signals */
	121	sigset_t sigmask;
	122
	123	/** @brief Escape early from handling of @c select() results
	124	*
	125	* This is set if any of the file descriptor arrays are invalidated, since
	126	* it's then not safe for processing of them to continue.
	127	*/
	128	int escape;
	129
	130	/** @brief Signal handling pipe
	131	*
	132	* The signal handle writes signal numbers down this pipe.
	133	*/
	134	int sigpipe[2];
	135
	136	/** @brief Number of child processes in @p children */
	137	int nchildren;
	138
	139	/** @brief Number of slots in @p children */
	140	int nchildslots;
	141
	142	/** @brief Array of child processes */
	143	struct child *children;
	144	};
	145
	146	/** @brief Names of file descriptor modes */
	147	static const char *modenames[] = { "read", "write", "except" };
	148
	149	/* utilities ******************************************************************/
	150
	151	/* creation *******************************************************************/
	152
	153	/** @brief Create a new event loop */
	154	ev_source *ev_new(void) {
	155	ev_source ev = xmalloc(sizeof ev);
	156	int n;
	157
	158	memset(ev, 0, sizeof *ev);
	159	for(n = 0; n < ev_nmodes; ++n)
	160	FD_ZERO(&ev->mode[n].enabled);
	161	ev->sigpipe[0] = ev->sigpipe[1] = -1;
	162	sigemptyset(&ev->sigmask);
	163	timeout_heap_init(ev->timeouts);
	164	return ev;
	165	}
	166
	167	/* event loop *****************************************************************/
	168
	169	/** @brief Run the event loop
	170	* @return -1 on error, non-0 if any callback returned non-0
	171	*/
	172	int ev_run(ev_source *ev) {
	173	for(;;) {
	174	struct timeval now;
	175	struct timeval delta;
	176	int n, mode;
	177	int ret;
	178	int maxfd;
	179	struct timeout timeouts, t, **tt;
	180	struct stat sb;
	181
	182	xgettimeofday(&now, 0);
	183	/* Handle timeouts. We don't want to handle any timeouts that are added
	184	* while we're handling them (otherwise we'd have to break out of infinite
	185	* loops, preferrably without starving better-behaved subsystems). Hence
	186	* the slightly complicated two-phase approach here. */
	187	/* First we read those timeouts that have triggered out of the heap. We
	188	* keep them in the same order they came out of the heap in. */
	189	tt = &timeouts;
	190	while(timeout_heap_count(ev->timeouts)
	191	&& tvle(&timeout_heap_first(ev->timeouts)->when, &now)) {
	192	/* This timeout has reached its trigger time; provided it has not been
	193	* cancelled we add it to the timeouts list. */
	194	t = timeout_heap_remove(ev->timeouts);
	195	if(t->active) {
	196	*tt = t;
	197	tt = &t->next;
	198	}
	199	}
	200	*tt = 0;
	201	/* Now we can run the callbacks for those timeouts. They might add further
	202	* timeouts that are already in the past but they won't trigger until the
	203	* next time round the event loop. */
	204	for(t = timeouts; t; t = t->next) {
	205	D(("calling timeout for %ld.%ld callback %p %p",
	206	(long)t->when.tv_sec, (long)t->when.tv_usec,
	207	(void *)t->callback, t->u));
	208	ret = t->callback(ev, &now, t->u);
	209	if(ret)
	210	return ret;
	211	}
	212	maxfd = 0;
	213	for(mode = 0; mode < ev_nmodes; ++mode) {
	214	ev->mode[mode].tripped = ev->mode[mode].enabled;
	215	if(ev->mode[mode].maxfd > maxfd)
	216	maxfd = ev->mode[mode].maxfd;
	217	}
	218	xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0);
	219	do {
	220	if(timeout_heap_count(ev->timeouts)) {
	221	t = timeout_heap_first(ev->timeouts);
	222	xgettimeofday(&now, 0);
	223	delta.tv_sec = t->when.tv_sec - now.tv_sec;
	224	delta.tv_usec = t->when.tv_usec - now.tv_usec;
	225	if(delta.tv_usec < 0) {
	226	delta.tv_usec += 1000000;
	227	--delta.tv_sec;
	228	}
	229	if(delta.tv_sec < 0)
	230	delta.tv_sec = delta.tv_usec = 0;
	231	n = select(maxfd + 1,
	232	&ev->mode[ev_read].tripped,
	233	&ev->mode[ev_write].tripped,
	234	&ev->mode[ev_except].tripped,
	235	&delta);
	236	} else {
	237	n = select(maxfd + 1,
	238	&ev->mode[ev_read].tripped,
	239	&ev->mode[ev_write].tripped,
	240	&ev->mode[ev_except].tripped,
	241	0);
	242	}
	243	} while(n < 0 && errno == EINTR);
	244	xsigprocmask(SIG_BLOCK, &ev->sigmask, 0);
	245	if(n < 0) {
	246	disorder_error(errno, "error calling select");
	247	if(errno == EBADF) {
	248	/* If there's a bad FD in the mix then check them all and log what we
	249	* find, to ease debugging */
	250	for(mode = 0; mode < ev_nmodes; ++mode) {
	251	for(n = 0; n < ev->mode[mode].nfds; ++n) {
	252	const int fd = ev->mode[mode].fds[n].fd;
	253
	254	if(FD_ISSET(fd, &ev->mode[mode].enabled)
	255	&& fstat(fd, &sb) < 0)
	256	disorder_error(errno, "mode %s fstat %d (%s)",
	257	modenames[mode], fd, ev->mode[mode].fds[n].what);
	258	}
	259	for(n = 0; n <= maxfd; ++n)
	260	if(FD_ISSET(n, &ev->mode[mode].enabled)
	261	&& fstat(n, &sb) < 0)
	262	disorder_error(errno, "mode %s fstat %d", modenames[mode], n);
	263	}
	264	}
	265	return -1;
	266	}
	267	if(n > 0) {
	268	/* if anything deranges the meaning of an fd, or re-orders the
	269	* fds[] tables, we'd better give up; such operations will
	270	* therefore set @escape@. */
	271	ev->escape = 0;
	272	for(mode = 0; mode < ev_nmodes && !ev->escape; ++mode)
	273	for(n = 0; n < ev->mode[mode].nfds && !ev->escape; ++n) {
	274	int fd = ev->mode[mode].fds[n].fd;
	275	if(FD_ISSET(fd, &ev->mode[mode].tripped)) {
	276	D(("calling %s fd %d callback %p %p", modenames[mode], fd,
	277	(void *)ev->mode[mode].fds[n].callback,
	278	ev->mode[mode].fds[n].u));
	279	ret = ev->mode[mode].fds[n].callback(ev, fd,
	280	ev->mode[mode].fds[n].u);
	281	if(ret)
	282	return ret;
	283	}
	284	}
	285	}
	286	/* we'll pick up timeouts back round the loop */
	287	}
	288	}
	289
	290	/* file descriptors ***********************************************************/
	291
	292	/** @brief Register a file descriptor
	293	* @param ev Event loop
	294	* @param mode @c ev_read or @c ev_write
	295	* @param fd File descriptor
	296	* @param callback Called when @p is readable/writable
	297	* @param u Passed to @p callback
	298	* @param what Text description
	299	* @return 0 on success, non-0 on error
	300	*
	301	* Sets @ref ev_source::escape, so no further processing of file descriptors
	302	* will occur this time round the event loop.
	303	*/
	304	int ev_fd(ev_source *ev,
	305	ev_fdmode mode,
	306	int fd,
	307	ev_fd_callback *callback,
	308	void *u,
	309	const char *what) {
	310	int n;
	311
	312	D(("registering %s fd %d callback %p %p", modenames[mode], fd,
	313	(void *)callback, u));
	314	if(fd >= FD_SETSIZE)
	315	return -1;
	316	assert(mode < ev_nmodes);
	317	if(ev->mode[mode].nfds >= ev->mode[mode].fdslots) {
	318	ev->mode[mode].fdslots = (ev->mode[mode].fdslots
	319	? 2 * ev->mode[mode].fdslots : 16);
	320	D(("expanding %s fd table to %d entries", modenames[mode],
	321	ev->mode[mode].fdslots));
	322	ev->mode[mode].fds = xrealloc(ev->mode[mode].fds,
	323	ev->mode[mode].fdslots * sizeof (struct fd));
	324	}
	325	n = ev->mode[mode].nfds++;
	326	FD_SET(fd, &ev->mode[mode].enabled);
	327	ev->mode[mode].fds[n].fd = fd;
	328	ev->mode[mode].fds[n].callback = callback;
	329	ev->mode[mode].fds[n].u = u;
	330	ev->mode[mode].fds[n].what = what;
	331	if(fd > ev->mode[mode].maxfd)
	332	ev->mode[mode].maxfd = fd;
	333	ev->escape = 1;
	334	return 0;
	335	}
	336
	337	/** @brief Cancel a file descriptor
	338	* @param ev Event loop
	339	* @param mode @c ev_read or @c ev_write
	340	* @param fd File descriptor
	341	* @return 0 on success, non-0 on error
	342	*
	343	* Sets @ref ev_source::escape, so no further processing of file descriptors
	344	* will occur this time round the event loop.
	345	*/
	346	int ev_fd_cancel(ev_source *ev, ev_fdmode mode, int fd) {
	347	int n;
	348	int maxfd;
	349
	350	D(("cancelling mode %s fd %d", modenames[mode], fd));
	351	/* find the right struct fd */
	352	for(n = 0; n < ev->mode[mode].nfds && fd != ev->mode[mode].fds[n].fd; ++n)
	353	;
	354	assert(n < ev->mode[mode].nfds);
	355	/* swap in the last fd and reduce the count */
	356	if(n != ev->mode[mode].nfds - 1)
	357	ev->mode[mode].fds[n] = ev->mode[mode].fds[ev->mode[mode].nfds - 1];
	358	--ev->mode[mode].nfds;
	359	/* if that was the biggest fd, find the new biggest one */
	360	if(fd == ev->mode[mode].maxfd) {
	361	maxfd = 0;
	362	for(n = 0; n < ev->mode[mode].nfds; ++n)
	363	if(ev->mode[mode].fds[n].fd > maxfd)
	364	maxfd = ev->mode[mode].fds[n].fd;
	365	ev->mode[mode].maxfd = maxfd;
	366	}
	367	/* don't tell select about this fd any more */
	368	FD_CLR(fd, &ev->mode[mode].enabled);
	369	ev->escape = 1;
	370	return 0;
	371	}
	372
	373	/** @brief Re-enable a file descriptor
	374	* @param ev Event loop
	375	* @param mode @c ev_read or @c ev_write
	376	* @param fd File descriptor
	377	* @return 0 on success, non-0 on error
	378	*
	379	* It is harmless if @p fd is currently disabled, but it must not have been
	380	* cancelled.
	381	*/
	382	int ev_fd_enable(ev_source *ev, ev_fdmode mode, int fd) {
	383	assert(fd >= 0);
	384	D(("enabling mode %s fd %d", modenames[mode], fd));
	385	FD_SET(fd, &ev->mode[mode].enabled);
	386	return 0;
	387	}
	388
	389	/** @brief Temporarily disable a file descriptor
	390	* @param ev Event loop
	391	* @param mode @c ev_read or @c ev_write
	392	* @param fd File descriptor
	393	* @return 0 on success, non-0 on error
	394	*
	395	* Re-enable with ev_fd_enable(). It is harmless if @p fd is already disabled,
	396	* but it must not have been cancelled.
	397	*/
	398	int ev_fd_disable(ev_source *ev, ev_fdmode mode, int fd) {
	399	D(("disabling mode %s fd %d", modenames[mode], fd));
	400	FD_CLR(fd, &ev->mode[mode].enabled);
	401	FD_CLR(fd, &ev->mode[mode].tripped);
	402	/* Suppress any pending callbacks */
	403	ev->escape = 1;
	404	return 0;
	405	}
	406
	407	/** @brief Log a report of file descriptor state */
	408	void ev_report(ev_source *ev) {
	409	int n, fd;
	410	ev_fdmode mode;
	411	struct dynstr d[1];
	412	char b[4096];
	413
	414	if(!debugging)
	415	return;
	416	dynstr_init(d);
	417	for(mode = 0; mode < ev_nmodes; ++mode) {
	418	D(("mode %s maxfd %d", modenames[mode], ev->mode[mode].maxfd));
	419	for(n = 0; n < ev->mode[mode].nfds; ++n) {
	420	fd = ev->mode[mode].fds[n].fd;
	421	D(("fd %s %d%s%s (%s)", modenames[mode], fd,
	422	FD_ISSET(fd, &ev->mode[mode].enabled) ? " enabled" : "",
	423	FD_ISSET(fd, &ev->mode[mode].tripped) ? " tripped" : "",
	424	ev->mode[mode].fds[n].what));
	425	}
	426	d->nvec = 0;
	427	for(fd = 0; fd <= ev->mode[mode].maxfd; ++fd) {
	428	if(!FD_ISSET(fd, &ev->mode[mode].enabled))
	429	continue;
	430	for(n = 0; n < ev->mode[mode].nfds; ++n) {
	431	if(ev->mode[mode].fds[n].fd == fd)
	432	break;
	433	}
	434	if(n < ev->mode[mode].nfds)
	435	snprintf(b, sizeof b, "%d(%s)", fd, ev->mode[mode].fds[n].what);
	436	else
	437	snprintf(b, sizeof b, "%d", fd);
	438	dynstr_append(d, ' ');
	439	dynstr_append_string(d, b);
	440	}
	441	dynstr_terminate(d);
	442	D(("%s enabled:%s", modenames[mode], d->vec));
	443	}
	444	}
	445
	446	/* timeouts *******************************************************************/
	447
	448	/** @brief Register a timeout
	449	* @param ev Event source
	450	* @param handlep Where to store timeout handle, or @c NULL
	451	* @param when Earliest time to call @p callback, or @c NULL
	452	* @param callback Function to call at or after @p when
	453	* @param u Passed to @p callback
	454	* @return 0 on success, non-0 on error
	455	*
	456	* If @p when is a null pointer then a time of 0 is assumed. The effect is to
	457	* call the timeout handler from ev_run() next time around the event loop.
	458	* This is used internally to schedule various operations if it is not
	459	* convenient to call them from the current place in the call stack, or
	460	* externally to ensure that other clients of the event loop get a look in when
	461	* performing some lengthy operation.
	462	*/
	463	int ev_timeout(ev_source *ev,
	464	ev_timeout_handle *handlep,
	465	const struct timeval *when,
	466	ev_timeout_callback *callback,
	467	void *u) {
	468	struct timeout *t;
	469
	470	D(("registering timeout at %ld.%ld callback %p %p",
	471	when ? (long)when->tv_sec : 0, when ? (long)when->tv_usec : 0,
	472	(void *)callback, u));
	473	t = xmalloc(sizeof *t);
	474	if(when)
	475	t->when = *when;
	476	t->callback = callback;
	477	t->u = u;
	478	t->active = 1;
	479	timeout_heap_insert(ev->timeouts, t);
	480	if(handlep)
	481	*handlep = t;
	482	return 0;
	483	}
	484
	485	/** @brief Cancel a timeout
	486	* @param ev Event loop
	487	* @param handle Handle returned from ev_timeout(), or 0
	488	* @return 0 on success, non-0 on error
	489	*
	490	* If @p handle is 0 then this is a no-op.
	491	*/
	492	int ev_timeout_cancel(ev_source attribute((unused)) *ev,
	493	ev_timeout_handle handle) {
	494	struct timeout *t = handle;
	495
	496	if(t)
	497	t->active = 0;
	498	return 0;
	499	}
	500
	501	/* signals ********************************************************************/
	502
	503	/** @brief Mapping of signals to pipe write ends
	504	*
	505	* The pipes are per-event loop, it's possible in theory for there to be
	506	* multiple event loops (e.g. in different threads), although in fact DisOrder
	507	* does not do this.
	508	*/
	509	static int sigfd[NSIG];
	510
	511	/** @brief The signal handler
	512	* @param s Signal number
	513	*
	514	* Writes to @c sigfd[s].
	515	*/
	516	static void sighandler(int s) {
	517	unsigned char sc = s;
	518	static const char errmsg[] = "error writing to signal pipe";
	519
	520	/* probably the reader has stopped listening for some reason */
	521	if(write(sigfd[s], &sc, 1) < 0) {
	522	/* do the best we can as we're about to abort; shut _up_, gcc */
	523	int _ignore = write(2, errmsg, sizeof errmsg - 1);
	524	(void)_ignore;
	525	abort();
	526	}
	527	}
	528
	529	/** @brief Read callback for signals */
	530	static int signal_read(ev_source *ev,
	531	int attribute((unused)) fd,
	532	void attribute((unused)) *u) {
	533	unsigned char s;
	534	int n;
	535	int ret;
	536
	537	if((n = read(ev->sigpipe[0], &s, 1)) == 1)
	538	if((ret = ev->signals[s].callback(ev, s, ev->signals[s].u)))
	539	return ret;
	540	assert(n != 0);
	541	if(n < 0 && (errno != EINTR && errno != EAGAIN)) {
	542	disorder_error(errno, "error reading from signal pipe %d", ev->sigpipe[0]);
	543	return -1;
	544	}
	545	return 0;
	546	}
	547
	548	/** @brief Close the signal pipe */
	549	static void close_sigpipe(ev_source *ev) {
	550	int save_errno = errno;
	551
	552	xclose(ev->sigpipe[0]);
	553	xclose(ev->sigpipe[1]);
	554	ev->sigpipe[0] = ev->sigpipe[1] = -1;
	555	errno = save_errno;
	556	}
	557
	558	/** @brief Register a signal handler
	559	* @param ev Event loop
	560	* @param sig Signal to handle
	561	* @param callback Called when signal is delivered
	562	* @param u Passed to @p callback
	563	* @return 0 on success, non-0 on error
	564	*
	565	* Note that @p callback is called from inside ev_run(), not from inside the
	566	* signal handler, so the usual restrictions on signal handlers do not apply.
	567	*/
	568	int ev_signal(ev_source *ev,
	569	int sig,
	570	ev_signal_callback *callback,
	571	void *u) {
	572	int n;
	573	struct sigaction sa;
	574
	575	D(("registering signal %d handler callback %p %p", sig, (void *)callback, u));
	576	assert(sig > 0);
	577	assert(sig < NSIG);
	578	assert(sig <= UCHAR_MAX);
	579	if(ev->sigpipe[0] == -1) {
	580	D(("creating signal pipe"));
	581	xpipe(ev->sigpipe);
	582	D(("signal pipe is %d, %d", ev->sigpipe[0], ev->sigpipe[1]));
	583	for(n = 0; n < 2; ++n) {
	584	nonblock(ev->sigpipe[n]);
	585	cloexec(ev->sigpipe[n]);
	586	}
	587	if(ev_fd(ev, ev_read, ev->sigpipe[0], signal_read, 0, "sigpipe read")) {
	588	close_sigpipe(ev);
	589	return -1;
	590	}
	591	}
	592	sigaddset(&ev->sigmask, sig);
	593	xsigprocmask(SIG_BLOCK, &ev->sigmask, 0);
	594	sigfd[sig] = ev->sigpipe[1];
	595	ev->signals[sig].callback = callback;
	596	ev->signals[sig].u = u;
	597	sa.sa_handler = sighandler;
	598	sigfillset(&sa.sa_mask);
	599	sa.sa_flags = SA_RESTART;
	600	xsigaction(sig, &sa, &ev->signals[sig].oldsa);
	601	ev->escape = 1;
	602	return 0;
	603	}
	604
	605	/** @brief Cancel a signal handler
	606	* @param ev Event loop
	607	* @param sig Signal to cancel
	608	* @return 0 on success, non-0 on error
	609	*/
	610	int ev_signal_cancel(ev_source *ev,
	611	int sig) {
	612	sigset_t ss;
	613
	614	xsigaction(sig, &ev->signals[sig].oldsa, 0);
	615	ev->signals[sig].callback = 0;
	616	ev->escape = 1;
	617	sigdelset(&ev->sigmask, sig);
	618	sigemptyset(&ss);
	619	sigaddset(&ss, sig);
	620	xsigprocmask(SIG_UNBLOCK, &ss, 0);
	621	return 0;
	622	}
	623
	624	/** @brief Clean up signal handling
	625	* @param ev Event loop
	626	*
	627	* This function can be called from inside a fork. It restores signal
	628	* handlers, unblocks the signals, and closes the signal pipe for @p ev.
	629	*/
	630	void ev_signal_atfork(ev_source *ev) {
	631	int sig;
	632
	633	if(ev->sigpipe[0] != -1) {
	634	/* revert any handled signals to their original state */
	635	for(sig = 1; sig < NSIG; ++sig) {
	636	if(ev->signals[sig].callback != 0)
	637	xsigaction(sig, &ev->signals[sig].oldsa, 0);
	638	}
	639	/* and then unblock them */
	640	xsigprocmask(SIG_UNBLOCK, &ev->sigmask, 0);
	641	/* don't want a copy of the signal pipe open inside the fork */
	642	xclose(ev->sigpipe[0]);
	643	xclose(ev->sigpipe[1]);
	644	}
	645	}
	646
	647	/* child processes ************************************************************/
	648
	649	/** @brief Called on SIGCHLD */
	650	static int sigchld_callback(ev_source *ev,
	651	int attribute((unused)) sig,
	652	void attribute((unused)) *u) {
	653	struct rusage ru;
	654	pid_t r;
	655	int status, n, ret, revisit;
	656
	657	do {
	658	revisit = 0;
	659	for(n = 0; n < ev->nchildren; ++n) {
	660	r = wait4(ev->children[n].pid,
	661	&status,
	662	ev->children[n].options \| WNOHANG,
	663	&ru);
	664	if(r > 0) {
	665	ev_child_callback *c = ev->children[n].callback;
	666	void *cu = ev->children[n].u;
	667
	668	if(WIFEXITED(status) \|\| WIFSIGNALED(status))
	669	ev_child_cancel(ev, r);
	670	revisit = 1;
	671	if((ret = c(ev, r, status, &ru, cu)))
	672	return ret;
	673	} else if(r < 0) {
	674	/* We should "never" get an ECHILD but it can in fact happen. For
	675	* instance on Linux 2.4.31, and probably other versions, if someone
	676	* straces a child process and then a different child process
	677	* terminates, when we wait4() the trace process we will get ECHILD
	678	* because it has been reparented to strace. Obviously this is a
	679	* hopeless design flaw in the tracing infrastructure, but we don't
	680	* want the disorder server to bomb out because of it. So we just log
	681	* the problem and ignore it.
	682	*/
	683	disorder_error(errno, "error calling wait4 for PID %lu (broken ptrace?)",
	684	(unsigned long)ev->children[n].pid);
	685	if(errno != ECHILD)
	686	return -1;
	687	}
	688	}
	689	} while(revisit);
	690	return 0;
	691	}
	692
	693	/** @brief Configure event loop for child process handling
	694	* @return 0 on success, non-0 on error
	695	*
	696	* Currently at most one event loop can handle child processes and it must be
	697	* distinguished from others by calling this function on it. This could be
	698	* fixed but since no process ever makes use of more than one event loop there
	699	* is no need.
	700	*/
	701	int ev_child_setup(ev_source *ev) {
	702	D(("installing SIGCHLD handler"));
	703	return ev_signal(ev, SIGCHLD, sigchld_callback, 0);
	704	}
	705
	706	/** @brief Wait for a child process to terminate
	707	* @param ev Event loop
	708	* @param pid Process ID of child
	709	* @param options Options to pass to @c wait4()
	710	* @param callback Called when child terminates (or possibly when it stops)
	711	* @param u Passed to @p callback
	712	* @return 0 on success, non-0 on error
	713	*
	714	* You must have called ev_child_setup() on @p ev once first.
	715	*/
	716	int ev_child(ev_source *ev,
	717	pid_t pid,
	718	int options,
	719	ev_child_callback *callback,
	720	void *u) {
	721	int n;
	722
	723	D(("registering child handling %ld options %d callback %p %p",
	724	(long)pid, options, (void *)callback, u));
	725	assert(ev->signals[SIGCHLD].callback == sigchld_callback);
	726	if(ev->nchildren >= ev->nchildslots) {
	727	ev->nchildslots = ev->nchildslots ? 2 * ev->nchildslots : 16;
	728	ev->children = xrealloc(ev->children,
	729	ev->nchildslots * sizeof (struct child));
	730	}
	731	n = ev->nchildren++;
	732	ev->children[n].pid = pid;
	733	ev->children[n].options = options;
	734	ev->children[n].callback = callback;
	735	ev->children[n].u = u;
	736	return 0;
	737	}
	738
	739	/** @brief Stop waiting for a child process
	740	* @param ev Event loop
	741	* @param pid Child process ID
	742	* @return 0 on success, non-0 on error
	743	*/
	744	int ev_child_cancel(ev_source *ev,
	745	pid_t pid) {
	746	int n;
	747
	748	for(n = 0; n < ev->nchildren && ev->children[n].pid != pid; ++n)
	749	;
	750	assert(n < ev->nchildren);
	751	if(n != ev->nchildren - 1)
	752	ev->children[n] = ev->children[ev->nchildren - 1];
	753	--ev->nchildren;
	754	return 0;
	755	}
	756
	757	/** @brief Terminate and wait for all child processes
	758	* @param ev Event loop
	759	*
	760	* Does not call the completion callbacks. Only used during teardown.
	761	*/
	762	void ev_child_killall(ev_source *ev) {
	763	int n, rc, w;
	764
	765	for(n = 0; n < ev->nchildren; ++n) {
	766	if(kill(ev->children[n].pid, SIGTERM) < 0) {
	767	disorder_error(errno, "sending SIGTERM to pid %lu",
	768	(unsigned long)ev->children[n].pid);
	769	ev->children[n].pid = -1;
	770	}
	771	}
	772	for(n = 0; n < ev->nchildren; ++n) {
	773	if(ev->children[n].pid == -1)
	774	continue;
	775	do {
	776	rc = waitpid(ev->children[n].pid, &w, 0);
	777	} while(rc < 0 && errno == EINTR);
	778	if(rc < 0) {
	779	disorder_error(errno, "waiting for pid %lu",
	780	(unsigned long)ev->children[n].pid);
	781	continue;
	782	}
	783	}
	784	ev->nchildren = 0;
	785	}
	786
	787	/* socket listeners ***********************************************************/
	788
	789	/** @brief State for a socket listener */
	790	struct listen_state {
	791	ev_listen_callback *callback;
	792	void *u;
	793	};
	794
	795	/** @brief Called when a listenign socket is readable */
	796	static int listen_callback(ev_source ev, int fd, void u) {
	797	const struct listen_state *l = u;
	798	int newfd;
	799	union {
	800	struct sockaddr_in in;
	801	#if HAVE_STRUCT_SOCKADDR_IN6
	802	struct sockaddr_in6 in6;
	803	#endif
	804	struct sockaddr_un un;
	805	struct sockaddr sa;
	806	} addr;
	807	socklen_t addrlen;
	808	int ret;
	809
	810	D(("callback for listener fd %d", fd));
	811	while((addrlen = sizeof addr),
	812	(newfd = accept(fd, &addr.sa, &addrlen)) >= 0) {
	813	if((ret = l->callback(ev, newfd, &addr.sa, addrlen, l->u)))
	814	return ret;
	815	}
	816	switch(errno) {
	817	case EINTR:
	818	case EAGAIN:
	819	break;
	820	#ifdef ECONNABORTED
	821	case ECONNABORTED:
	822	disorder_error(errno, "error calling accept");
	823	break;
	824	#endif
	825	#ifdef EPROTO
	826	case EPROTO:
	827	/* XXX on some systems EPROTO should be fatal, but we don't know if
	828	* we're running on one of them */
	829	disorder_error(errno, "error calling accept");
	830	break;
	831	#endif
	832	default:
	833	disorder_fatal(errno, "error calling accept");
	834	break;
	835	}
	836	if(errno != EINTR && errno != EAGAIN)
	837	disorder_error(errno, "error calling accept");
	838	return 0;
	839	}
	840
	841	/** @brief Listen on a socket for inbound stream connections
	842	* @param ev Event source
	843	* @param fd File descriptor of socket
	844	* @param callback Called when a new connection arrives
	845	* @param u Passed to @p callback
	846	* @param what Text description of socket
	847	* @return 0 on success, non-0 on error
	848	*/
	849	int ev_listen(ev_source *ev,
	850	int fd,
	851	ev_listen_callback *callback,
	852	void *u,
	853	const char *what) {
	854	struct listen_state l = xmalloc(sizeof l);
	855
	856	D(("registering listener fd %d callback %p %p", fd, (void *)callback, u));
	857	l->callback = callback;
	858	l->u = u;
	859	return ev_fd(ev, ev_read, fd, listen_callback, l, what);
	860	}
	861
	862	/** @brief Stop listening on a socket
	863	* @param ev Event loop
	864	* @param fd File descriptor of socket
	865	* @return 0 on success, non-0 on error
	866	*/
	867	int ev_listen_cancel(ev_source *ev, int fd) {
	868	D(("cancelling listener fd %d", fd));
	869	return ev_fd_cancel(ev, ev_read, fd);
	870	}
	871
	872	/* buffer *********************************************************************/
	873
	874	/** @brief Buffer structure */
	875	struct buffer {
	876	char base, start, end, top;
	877	};
	878
	879	/* @brief Make sure there is @p bytes available at @c b->end */
	880	static void buffer_space(struct buffer *b, size_t bytes) {
	881	D(("buffer_space %p %p %p %p want %lu",
	882	(void )b->base, (void )b->start, (void )b->end, (void )b->top,
	883	(unsigned long)bytes));
	884	if(b->start == b->end)
	885	b->start = b->end = b->base;
	886	if((size_t)(b->top - b->end) < bytes) {
	887	if((size_t)((b->top - b->end) + (b->start - b->base)) < bytes) {
	888	size_t newspace = b->end - b->start + bytes, n;
	889	char *newbase;
	890
	891	for(n = 16; n < newspace; n *= 2)
	892	;
	893	newbase = xmalloc_noptr(n);
	894	memcpy(newbase, b->start, b->end - b->start);
	895	b->base = newbase;
	896	b->end = newbase + (b->end - b->start);
	897	b->top = newbase + n;
	898	b->start = newbase; /* must be last */
	899	} else {
	900	memmove(b->base, b->start, b->end - b->start);
	901	b->end = b->base + (b->end - b->start);
	902	b->start = b->base;
	903	}
	904	}
	905	D(("result %p %p %p %p",
	906	(void )b->base, (void )b->start, (void )b->end, (void )b->top));
	907	}
	908
	909	/* readers and writers *******************************************************/
	910
	911	/** @brief State structure for a buffered writer */
	912	struct ev_writer {
	913	/** @brief Sink used for writing to the buffer */
	914	struct sink s;
	915
	916	/** @brief Output buffer */
	917	struct buffer b;
	918
	919	/** @brief File descriptor to write to */
	920	int fd;
	921
	922	/** @brief Set if there'll be no more output */
	923	int eof;
	924
	925	/** @brief Error/termination callback */
	926	ev_error_callback *callback;
	927
	928	/** @brief Passed to @p callback */
	929	void *u;
	930
	931	/** @brief Parent event source */
	932	ev_source *ev;
	933
	934	/** @brief Maximum amount of time between succesful writes, 0 = don't care */
	935	int timebound;
	936	/** @brief Maximum amount of data to buffer, 0 = don't care */
	937	int spacebound;
	938	/** @brief Error code to pass to @p callback (see writer_shutdown()) */
	939	int error;
	940	/** @brief Timeout handle for @p timebound (or 0) */
	941	ev_timeout_handle timeout;
	942
	943	/** @brief Description of this writer */
	944	const char *what;
	945
	946	/** @brief Tied reader or 0 */
	947	ev_reader *reader;
	948
	949	/** @brief Set when abandoned */
	950	int abandoned;
	951	};
	952
	953	/** @brief State structure for a buffered reader */
	954	struct ev_reader {
	955	/** @brief Input buffer */
	956	struct buffer b;
	957	/** @brief File descriptor read from */
	958	int fd;
	959	/** @brief Called when new data is available */
	960	ev_reader_callback *callback;
	961	/** @brief Called on error and shutdown */
	962	ev_error_callback *error_callback;
	963	/** @brief Passed to @p callback and @p error_callback */
	964	void *u;
	965	/** @brief Parent event loop */
	966	ev_source *ev;
	967	/** @brief Set when EOF is detected */
	968	int eof;
	969	/** @brief Error code to pass to error callback */
	970	int error;
	971	/** @brief Tied writer or NULL */
	972	ev_writer *writer;
	973	};
	974
	975	/* buffered writer ************************************************************/
	976
	977	/** @brief Shut down the writer
	978	*
	979	* This is called to shut down a writer. The error callback is not called
	980	* through any other path. Also we do not cancel @p fd from anywhere else,
	981	* though we might disable it.
	982	*
	983	* It has the signature of a timeout callback so that it can be called from a
	984	* time=0 timeout.
	985	*
	986	* Calls @p callback with @p w->syntherr as the error code (which might be 0).
	987	*/
	988	static int writer_shutdown(ev_source *ev,
	989	const attribute((unused)) struct timeval *now,
	990	void *u) {
	991	ev_writer *w = u;
	992
	993	if(w->fd == -1)
	994	return 0; /* already shut down */
	995	D(("writer_shutdown fd=%d error=%d", w->fd, w->error));
	996	ev_timeout_cancel(ev, w->timeout);
	997	ev_fd_cancel(ev, ev_write, w->fd);
	998	w->timeout = 0;
	999	if(w->reader) {
	1000	D(("found a tied reader"));
	1001	/* If there is a reader still around we just untie it */
	1002	w->reader->writer = 0;
	1003	shutdown(w->fd, SHUT_WR); /* there'll be no more writes */
	1004	} else {
	1005	D(("no tied reader"));
	1006	/* There's no reader so we are free to close the FD */
	1007	xclose(w->fd);
	1008	}
	1009	w->fd = -1;
	1010	return w->callback(ev, w->error, w->u);
	1011	}
	1012
	1013	/** @brief Called when a writer's @p timebound expires */
	1014	static int writer_timebound_exceeded(ev_source *ev,
	1015	const struct timeval *now,
	1016	void *u) {
	1017	ev_writer *const w = u;
	1018
	1019	if(!w->abandoned) {
	1020	w->abandoned = 1;
	1021	disorder_error(0, "abandoning writer '%s' because no writes within %ds",
	1022	w->what, w->timebound);
	1023	w->error = ETIMEDOUT;
	1024	}
	1025	return writer_shutdown(ev, now, u);
	1026	}
	1027
	1028	/** @brief Set the time bound callback (if not set already) */
	1029	static void writer_set_timebound(ev_writer *w) {
	1030	if(w->timebound && !w->timeout) {
	1031	struct timeval when;
	1032	ev_source *const ev = w->ev;
	1033
	1034	xgettimeofday(&when, 0);
	1035	when.tv_sec += w->timebound;
	1036	ev_timeout(ev, &w->timeout, &when, writer_timebound_exceeded, w);
	1037	}
	1038	}
	1039
	1040	/** @brief Called when a writer's file descriptor is writable */
	1041	static int writer_callback(ev_source ev, int fd, void u) {
	1042	ev_writer *const w = u;
	1043	int n;
	1044
	1045	n = write(fd, w->b.start, w->b.end - w->b.start);
	1046	D(("callback for writer fd %d, %ld bytes, n=%d, errno=%d",
	1047	fd, (long)(w->b.end - w->b.start), n, errno));
	1048	if(n >= 0) {
	1049	/* Consume bytes from the buffer */
	1050	w->b.start += n;
	1051	/* Suppress any outstanding timeout */
	1052	ev_timeout_cancel(ev, w->timeout);
	1053	w->timeout = 0;
	1054	if(w->b.start == w->b.end) {
	1055	/* The buffer is empty */
	1056	if(w->eof) {
	1057	/* We're done, we can shut down this writer */
	1058	w->error = 0;
	1059	return writer_shutdown(ev, 0, w);
	1060	} else
	1061	/* There might be more to come but we don't need writer_callback() to
	1062	* be called for the time being */
	1063	ev_fd_disable(ev, ev_write, fd);
	1064	} else
	1065	/* The buffer isn't empty, set a timeout so we give up if we don't manage
	1066	* to write some more within a reasonable time */
	1067	writer_set_timebound(w);
	1068	} else {
	1069	switch(errno) {
	1070	case EINTR:
	1071	case EAGAIN:
	1072	break;
	1073	default:
	1074	w->error = errno;
	1075	return writer_shutdown(ev, 0, w);
	1076	}
	1077	}
	1078	return 0;
	1079	}
	1080
	1081	/** @brief Write bytes to a writer's buffer
	1082	*
	1083	* This is the sink write callback.
	1084	*
	1085	* Calls ev_fd_enable() if necessary (i.e. if the buffer was empty but
	1086	* now is not).
	1087	*/
	1088	static int ev_writer_write(struct sink sk, const void s, int n) {
	1089	ev_writer w = (ev_writer )sk;
	1090
	1091	if(!n)
	1092	return 0; /* avoid silliness */
	1093	if(w->fd == -1)
	1094	disorder_error(0, "ev_writer_write on %s after shutdown", w->what);
	1095	if(w->spacebound && w->b.end - w->b.start + n > w->spacebound) {
	1096	/* The new buffer contents will exceed the space bound. We assume that the
	1097	* remote client has gone away and TCP hasn't noticed yet, or that it's got
	1098	* hopelessly stuck. */
	1099	if(!w->abandoned) {
	1100	w->abandoned = 1;
	1101	disorder_error(0, "abandoning writer '%s' because buffer has reached %td bytes",
	1102	w->what, w->b.end - w->b.start);
	1103	ev_fd_disable(w->ev, ev_write, w->fd);
	1104	w->error = EPIPE;
	1105	return ev_timeout(w->ev, 0, 0, writer_shutdown, w);
	1106	} else
	1107	return 0;
	1108	}
	1109	/* Make sure there is space */
	1110	buffer_space(&w->b, n);
	1111	/* If the buffer was formerly empty then we'll need to re-enable the FD */
	1112	if(w->b.start == w->b.end)
	1113	ev_fd_enable(w->ev, ev_write, w->fd);
	1114	memcpy(w->b.end, s, n);
	1115	w->b.end += n;
	1116	/* Arrange a timeout if there wasn't one set already */
	1117	writer_set_timebound(w);
	1118	return 0;
	1119	}
	1120
	1121	/** @brief Create a new buffered writer
	1122	* @param ev Event loop
	1123	* @param fd File descriptor to write to
	1124	* @param callback Called if an error occurs and when finished
	1125	* @param u Passed to @p callback
	1126	* @param what Text description
	1127	* @return New writer or @c NULL
	1128	*
	1129	* Writers own their file descriptor and close it when they have finished with
	1130	* it.
	1131	*
	1132	* If you pass the same fd to a reader and writer, you must tie them together
	1133	* with ev_tie().
	1134	*/
	1135	ev_writer ev_writer_new(ev_source ev,
	1136	int fd,
	1137	ev_error_callback *callback,
	1138	void *u,
	1139	const char *what) {
	1140	ev_writer w = xmalloc(sizeof w);
	1141
	1142	D(("registering writer fd %d callback %p %p", fd, (void *)callback, u));
	1143	w->s.write = ev_writer_write;
	1144	w->fd = fd;
	1145	w->callback = callback;
	1146	w->u = u;
	1147	w->ev = ev;
	1148	w->timebound = 10 * 60;
	1149	w->spacebound = 512 * 1024;
	1150	w->what = what;
	1151	if(ev_fd(ev, ev_write, fd, writer_callback, w, what))
	1152	return 0;
	1153	/* Buffer is initially empty so we don't want a callback */
	1154	ev_fd_disable(ev, ev_write, fd);
	1155	return w;
	1156	}
	1157
	1158	/** @brief Get/set the time bound
	1159	* @param w Writer
	1160	* @param new_time_bound New bound or -1 for no change
	1161	* @return Latest time bound
	1162	*
	1163	* If @p new_time_bound is negative then the current time bound is returned.
	1164	* Otherwise it is set and the new value returned.
	1165	*
	1166	* The time bound is the number of seconds allowed between writes. If it takes
	1167	* longer than this to flush a buffer then the peer will be assumed to be dead
	1168	* and an error will be synthesized. 0 means "don't care". The default time
	1169	* bound is 10 minutes.
	1170	*
	1171	* Note that this value does not take into account kernel buffering and
	1172	* timeouts.
	1173	*/
	1174	int ev_writer_time_bound(ev_writer *w,
	1175	int new_time_bound) {
	1176	if(new_time_bound >= 0)
	1177	w->timebound = new_time_bound;
	1178	return w->timebound;
	1179	}
	1180
	1181	/** @brief Get/set the space bound
	1182	* @param w Writer
	1183	* @param new_space_bound New bound or -1 for no change
	1184	* @return Latest space bound
	1185	*
	1186	* If @p new_space_bound is negative then the current space bound is returned.
	1187	* Otherwise it is set and the new value returned.
	1188	*
	1189	* The space bound is the number of bytes allowed between in the buffer. If
	1190	* the buffer exceeds this size an error will be synthesized. 0 means "don't
	1191	* care". The default space bound is 512Kbyte.
	1192	*
	1193	* Note that this value does not take into account kernel buffering.
	1194	*/
	1195	int ev_writer_space_bound(ev_writer *w,
	1196	int new_space_bound) {
	1197	if(new_space_bound >= 0)
	1198	w->spacebound = new_space_bound;
	1199	return w->spacebound;
	1200	}
	1201
	1202	/** @brief Return the sink associated with a writer
	1203	* @param w Writer
	1204	* @return Pointer to sink
	1205	*
	1206	* Writing to the sink will arrange for those bytes to be written to the file
	1207	* descriptor as and when it is writable.
	1208	*/
	1209	struct sink ev_writer_sink(ev_writer w) {
	1210	if(!w)
	1211	disorder_fatal(0, "ev_write_sink called with null writer");
	1212	return &w->s;
	1213	}
	1214
	1215	/** @brief Close a writer
	1216	* @param w Writer to close
	1217	* @return 0 on success, non-0 on error
	1218	*
	1219	* Close a writer. No more bytes should be written to its sink.
	1220	*
	1221	* When the last byte has been written the callback will be called with an
	1222	* error code of 0. It is guaranteed that this will NOT happen before
	1223	* ev_writer_close() returns (although the file descriptor for the writer might
	1224	* be cancelled by the time it returns).
	1225	*/
	1226	int ev_writer_close(ev_writer *w) {
	1227	D(("close writer fd %d", w->fd));
	1228	if(w->eof)
	1229	return 0; /* already closed */
	1230	w->eof = 1;
	1231	if(w->b.start == w->b.end) {
	1232	/* We're already finished */
	1233	w->error = 0; /* no error */
	1234	return ev_timeout(w->ev, 0, 0, writer_shutdown, w);
	1235	}
	1236	return 0;
	1237	}
	1238
	1239	/** @brief Attempt to flush a writer
	1240	* @param w Writer to flush
	1241	* @return 0 on success, non-0 on error
	1242	*
	1243	* Does a speculative write of any buffered data. Does not block if it cannot
	1244	* be written.
	1245	*/
	1246	int ev_writer_flush(ev_writer *w) {
	1247	return writer_callback(w->ev, w->fd, w);
	1248	}
	1249
	1250	/* buffered reader ************************************************************/
	1251
	1252	/** @brief Shut down a reader
	1253	*
	1254	* This is the only path through which we cancel and close the file descriptor.
	1255	* As with the writer case it is given timeout signature to allow it be
	1256	* deferred to the next iteration of the event loop.
	1257	*
	1258	* We only call @p error_callback if @p error is nonzero (unlike the writer
	1259	* case).
	1260	*/
	1261	static int reader_shutdown(ev_source *ev,
	1262	const attribute((unused)) struct timeval *now,
	1263	void *u) {
	1264	ev_reader *const r = u;
	1265
	1266	if(r->fd == -1)
	1267	return 0; /* already shut down */
	1268	D(("reader_shutdown fd=%d", r->fd));
	1269	ev_fd_cancel(ev, ev_read, r->fd);
	1270	r->eof = 1;
	1271	if(r->writer) {
	1272	D(("found a tied writer"));
	1273	/* If there is a writer still around we just untie it */
	1274	r->writer->reader = 0;
	1275	shutdown(r->fd, SHUT_RD); /* there'll be no more reads */
	1276	} else {
	1277	D(("no tied writer found"));
	1278	/* There's no writer so we are free to close the FD */
	1279	xclose(r->fd);
	1280	}
	1281	r->fd = -1;
	1282	if(r->error)
	1283	return r->error_callback(ev, r->error, r->u);
	1284	else
	1285	return 0;
	1286	}
	1287
	1288	/** @brief Called when a reader's @p fd is readable */
	1289	static int reader_callback(ev_source ev, int fd, void u) {
	1290	ev_reader *r = u;
	1291	int n;
	1292
	1293	buffer_space(&r->b, 1);
	1294	n = read(fd, r->b.end, r->b.top - r->b.end);
	1295	D(("read fd %d buffer %d returned %d errno %d",
	1296	fd, (int)(r->b.top - r->b.end), n, errno));
	1297	if(n > 0) {
	1298	r->b.end += n;
	1299	return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 0, r->u);
	1300	} else if(n == 0) {
	1301	/* No more read callbacks needed */
	1302	ev_fd_disable(r->ev, ev_read, r->fd);
	1303	ev_timeout(r->ev, 0, 0, reader_shutdown, r);
	1304	/* Pass the remaining data and an eof indicator to the user */
	1305	return r->callback(ev, r, r->b.start, r->b.end - r->b.start, 1, r->u);
	1306	} else {
	1307	switch(errno) {
	1308	case EINTR:
	1309	case EAGAIN:
	1310	break;
	1311	default:
	1312	/* Fatal error, kill the reader now */
	1313	r->error = errno;
	1314	return reader_shutdown(ev, 0, r);
	1315	}
	1316	}
	1317	return 0;
	1318	}
	1319
	1320	/** @brief Create a new buffered reader
	1321	* @param ev Event loop
	1322	* @param fd File descriptor to read from
	1323	* @param callback Called when new data is available
	1324	* @param error_callback Called if an error occurs
	1325	* @param u Passed to callbacks
	1326	* @param what Text description
	1327	* @return New reader or @c NULL
	1328	*
	1329	* Readers own their fd and close it when they are finished with it.
	1330	*
	1331	* If you pass the same fd to a reader and writer, you must tie them together
	1332	* with ev_tie().
	1333	*/
	1334	ev_reader ev_reader_new(ev_source ev,
	1335	int fd,
	1336	ev_reader_callback *callback,
	1337	ev_error_callback *error_callback,
	1338	void *u,
	1339	const char *what) {
	1340	ev_reader r = xmalloc(sizeof r);
	1341
	1342	D(("registering reader fd %d callback %p %p %p",
	1343	fd, (void )callback, (void )error_callback, u));
	1344	r->fd = fd;
	1345	r->callback = callback;
	1346	r->error_callback = error_callback;
	1347	r->u = u;
	1348	r->ev = ev;
	1349	if(ev_fd(ev, ev_read, fd, reader_callback, r, what))
	1350	return 0;
	1351	return r;
	1352	}
	1353
	1354	void ev_reader_buffer(ev_reader *r, size_t nbytes) {
	1355	buffer_space(&r->b, nbytes - (r->b.end - r->b.start));
	1356	}
	1357
	1358	/** @brief Consume @p n bytes from the reader's buffer
	1359	* @param r Reader
	1360	* @param n Number of bytes to consume
	1361	*
	1362	* Tells the reader than the next @p n bytes have been dealt with and can now
	1363	* be discarded.
	1364	*/
	1365	void ev_reader_consume(ev_reader *r, size_t n) {
	1366	r->b.start += n;
	1367	}
	1368
	1369	/** @brief Cancel a reader
	1370	* @param r Reader
	1371	* @return 0 on success, non-0 on error
	1372	*
	1373	* No further callbacks will be made, and the FD will be closed (in a later
	1374	* iteration of the event loop).
	1375	*/
	1376	int ev_reader_cancel(ev_reader *r) {
	1377	D(("cancel reader fd %d", r->fd));
	1378	if(r->fd == -1)
	1379	return 0; /* already thoroughly cancelled */
	1380	ev_fd_disable(r->ev, ev_read, r->fd);
	1381	return ev_timeout(r->ev, 0, 0, reader_shutdown, r);
	1382	}
	1383
	1384	/** @brief Temporarily disable a reader
	1385	* @param r Reader
	1386	* @return 0 on success, non-0 on error
	1387	*
	1388	* No further callbacks for this reader will be made. Re-enable with
	1389	* ev_reader_enable().
	1390	*/
	1391	int ev_reader_disable(ev_reader *r) {
	1392	D(("disable reader fd %d", r->fd));
	1393	return ev_fd_disable(r->ev, ev_read, r->fd);
	1394	}
	1395
	1396	/** @brief Called from ev_run() for ev_reader_incomplete() */
	1397	static int reader_continuation(ev_source attribute((unused)) *ev,
	1398	const attribute((unused)) struct timeval *now,
	1399	void *u) {
	1400	ev_reader *r = u;
	1401
	1402	D(("reader continuation callback fd %d", r->fd));
	1403	/* If not at EOF turn the FD back on */
	1404	if(!r->eof)
	1405	if(ev_fd_enable(r->ev, ev_read, r->fd))
	1406	return -1;
	1407	/* We're already in a timeout callback so there's no reason we can't call the
	1408	* user callback directly (compare ev_reader_enable()). */
	1409	return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u);
	1410	}
	1411
	1412	/** @brief Arrange another callback
	1413	* @param r reader
	1414	* @return 0 on success, non-0 on error
	1415	*
	1416	* Indicates that the reader can process more input but would like to yield to
	1417	* other clients of the event loop. Input will be disabled but it will be
	1418	* re-enabled on the next iteration of the event loop and the read callback
	1419	* will be called again (even if no further bytes are available).
	1420	*/
	1421	int ev_reader_incomplete(ev_reader *r) {
	1422	if(ev_fd_disable(r->ev, ev_read, r->fd)) return -1;
	1423	return ev_timeout(r->ev, 0, 0, reader_continuation, r);
	1424	}
	1425
	1426	static int reader_enabled(ev_source *ev,
	1427	const attribute((unused)) struct timeval *now,
	1428	void *u) {
	1429	ev_reader *r = u;
	1430
	1431	D(("reader enabled callback fd %d", r->fd));
	1432	return r->callback(ev, r, r->b.start, r->b.end - r->b.start, r->eof, r->u);
	1433	}
	1434
	1435	/** @brief Re-enable reading
	1436	* @param r reader
	1437	* @return 0 on success, non-0 on error
	1438	*
	1439	* If there is unconsumed data then you get a callback next time round the
	1440	* event loop even if nothing new has been read.
	1441	*
	1442	* The idea is in your read callback you come across a line (or whatever) that
	1443	* can't be processed immediately. So you set up processing and disable
	1444	* reading with ev_reader_disable(). Later when you finish processing you
	1445	* re-enable. You'll automatically get another callback directly from the
	1446	* event loop (i.e. not from inside ev_reader_enable()) so you can handle the
	1447	* next line (or whatever) if the whole thing has in fact already arrived.
	1448	*
	1449	* The difference between this process and calling ev_reader_incomplete() is
	1450	* ev_reader_incomplete() deals with the case where you can process now but
	1451	* would rather yield to other clients of the event loop, while using
	1452	* ev_reader_disable() and ev_reader_enable() deals with the case where you
	1453	* cannot process input yet because some other process is actually not
	1454	* complete.
	1455	*/
	1456	int ev_reader_enable(ev_reader *r) {
	1457	D(("enable reader fd %d", r->fd));
	1458
	1459	/* First if we're not at EOF then we re-enable reading */
	1460	if(!r->eof)
	1461	if(ev_fd_enable(r->ev, ev_read, r->fd))
	1462	return -1;
	1463	/* Arrange another callback next time round the event loop */
	1464	return ev_timeout(r->ev, 0, 0, reader_enabled, r);
	1465	}
	1466
	1467	/** @brief Tie a reader and a writer together
	1468	* @param r Reader
	1469	* @param w Writer
	1470	* @return 0 on success, non-0 on error
	1471	*
	1472	* This function must be called if @p r and @p w share a file descritptor.
	1473	*/
	1474	int ev_tie(ev_reader r, ev_writer w) {
	1475	assert(r->writer == 0);
	1476	assert(w->reader == 0);
	1477	r->writer = w;
	1478	w->reader = r;
	1479	return 0;
	1480	}
	1481
	1482	/*
	1483	Local Variables:
	1484	c-basic-offset:2
	1485	comment-column:40
	1486	fill-column:79
	1487	End:
	1488	*/