chiark / gitweb /
event: don't disarm invalid timerfd
[elogind.git] / src / libsystemd-bus / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "macro.h"
28 #include "prioq.h"
29 #include "hashmap.h"
30 #include "util.h"
31 #include "time-util.h"
32 #include "missing.h"
33
34 #include "sd-event.h"
35
36 #define EPOLL_QUEUE_MAX 64
37 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
38
39 typedef enum EventSourceType {
40         SOURCE_IO,
41         SOURCE_MONOTONIC,
42         SOURCE_REALTIME,
43         SOURCE_SIGNAL,
44         SOURCE_CHILD,
45         SOURCE_DEFER,
46         SOURCE_QUIT
47 } EventSourceType;
48
49 struct sd_event_source {
50         unsigned n_ref;
51
52         sd_event *event;
53         void *userdata;
54         sd_event_handler_t prepare;
55
56         EventSourceType type:4;
57         int enabled:3;
58         bool pending:1;
59
60         int priority;
61         unsigned pending_index;
62         unsigned prepare_index;
63         unsigned pending_iteration;
64         unsigned prepare_iteration;
65
66         union {
67                 struct {
68                         sd_event_io_handler_t callback;
69                         int fd;
70                         uint32_t events;
71                         uint32_t revents;
72                         bool registered:1;
73                 } io;
74                 struct {
75                         sd_event_time_handler_t callback;
76                         usec_t next, accuracy;
77                         unsigned earliest_index;
78                         unsigned latest_index;
79                 } time;
80                 struct {
81                         sd_event_signal_handler_t callback;
82                         struct signalfd_siginfo siginfo;
83                         int sig;
84                 } signal;
85                 struct {
86                         sd_event_child_handler_t callback;
87                         siginfo_t siginfo;
88                         pid_t pid;
89                         int options;
90                 } child;
91                 struct {
92                         sd_event_handler_t callback;
93                 } defer;
94                 struct {
95                         sd_event_handler_t callback;
96                         unsigned prioq_index;
97                 } quit;
98         };
99 };
100
101 struct sd_event {
102         unsigned n_ref;
103
104         int epoll_fd;
105         int signal_fd;
106         int realtime_fd;
107         int monotonic_fd;
108
109         Prioq *pending;
110         Prioq *prepare;
111
112         /* For both clocks we maintain two priority queues each, one
113          * ordered for the earliest times the events may be
114          * dispatched, and one ordered by the latest times they must
115          * have been dispatched. The range between the top entries in
116          * the two prioqs is the time window we can freely schedule
117          * wakeups in */
118         Prioq *monotonic_earliest;
119         Prioq *monotonic_latest;
120         Prioq *realtime_earliest;
121         Prioq *realtime_latest;
122
123         usec_t realtime_next, monotonic_next;
124         usec_t perturb;
125
126         sigset_t sigset;
127         sd_event_source **signal_sources;
128
129         Hashmap *child_sources;
130         unsigned n_enabled_child_sources;
131
132         Prioq *quit;
133
134         pid_t original_pid;
135
136         unsigned iteration;
137         dual_timestamp timestamp;
138         int state;
139
140         bool quit_requested:1;
141         bool need_process_child:1;
142
143         pid_t tid;
144         sd_event **default_event_ptr;
145 };
146
147 static int pending_prioq_compare(const void *a, const void *b) {
148         const sd_event_source *x = a, *y = b;
149
150         assert(x->pending);
151         assert(y->pending);
152
153         /* Enabled ones first */
154         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
155                 return -1;
156         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
157                 return 1;
158
159         /* Lower priority values first */
160         if (x->priority < y->priority)
161                 return -1;
162         if (x->priority > y->priority)
163                 return 1;
164
165         /* Older entries first */
166         if (x->pending_iteration < y->pending_iteration)
167                 return -1;
168         if (x->pending_iteration > y->pending_iteration)
169                 return 1;
170
171         /* Stability for the rest */
172         if (x < y)
173                 return -1;
174         if (x > y)
175                 return 1;
176
177         return 0;
178 }
179
180 static int prepare_prioq_compare(const void *a, const void *b) {
181         const sd_event_source *x = a, *y = b;
182
183         assert(x->prepare);
184         assert(y->prepare);
185
186         /* Move most recently prepared ones last, so that we can stop
187          * preparing as soon as we hit one that has already been
188          * prepared in the current iteration */
189         if (x->prepare_iteration < y->prepare_iteration)
190                 return -1;
191         if (x->prepare_iteration > y->prepare_iteration)
192                 return 1;
193
194         /* Enabled ones first */
195         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
196                 return -1;
197         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
198                 return 1;
199
200         /* Lower priority values first */
201         if (x->priority < y->priority)
202                 return -1;
203         if (x->priority > y->priority)
204                 return 1;
205
206         /* Stability for the rest */
207         if (x < y)
208                 return -1;
209         if (x > y)
210                 return 1;
211
212         return 0;
213 }
214
215 static int earliest_time_prioq_compare(const void *a, const void *b) {
216         const sd_event_source *x = a, *y = b;
217
218         assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
219         assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
220
221         /* Enabled ones first */
222         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
223                 return -1;
224         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
225                 return 1;
226
227         /* Move the pending ones to the end */
228         if (!x->pending && y->pending)
229                 return -1;
230         if (x->pending && !y->pending)
231                 return 1;
232
233         /* Order by time */
234         if (x->time.next < y->time.next)
235                 return -1;
236         if (x->time.next > y->time.next)
237                 return 1;
238
239         /* Stability for the rest */
240         if (x < y)
241                 return -1;
242         if (x > y)
243                 return 1;
244
245         return 0;
246 }
247
248 static int latest_time_prioq_compare(const void *a, const void *b) {
249         const sd_event_source *x = a, *y = b;
250
251         assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
252                (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
253
254         /* Enabled ones first */
255         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
256                 return -1;
257         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
258                 return 1;
259
260         /* Move the pending ones to the end */
261         if (!x->pending && y->pending)
262                 return -1;
263         if (x->pending && !y->pending)
264                 return 1;
265
266         /* Order by time */
267         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
268                 return -1;
269         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
270                 return 1;
271
272         /* Stability for the rest */
273         if (x < y)
274                 return -1;
275         if (x > y)
276                 return 1;
277
278         return 0;
279 }
280
281 static int quit_prioq_compare(const void *a, const void *b) {
282         const sd_event_source *x = a, *y = b;
283
284         assert(x->type == SOURCE_QUIT);
285         assert(y->type == SOURCE_QUIT);
286
287         /* Enabled ones first */
288         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
289                 return -1;
290         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
291                 return 1;
292
293         /* Lower priority values first */
294         if (x->priority < y->priority)
295                 return -1;
296         if (x->priority > y->priority)
297                 return 1;
298
299         /* Stability for the rest */
300         if (x < y)
301                 return -1;
302         if (x > y)
303                 return 1;
304
305         return 0;
306 }
307
308 static void event_free(sd_event *e) {
309         assert(e);
310
311         if (e->default_event_ptr)
312                 *(e->default_event_ptr) = NULL;
313
314         if (e->epoll_fd >= 0)
315                 close_nointr_nofail(e->epoll_fd);
316
317         if (e->signal_fd >= 0)
318                 close_nointr_nofail(e->signal_fd);
319
320         if (e->realtime_fd >= 0)
321                 close_nointr_nofail(e->realtime_fd);
322
323         if (e->monotonic_fd >= 0)
324                 close_nointr_nofail(e->monotonic_fd);
325
326         prioq_free(e->pending);
327         prioq_free(e->prepare);
328         prioq_free(e->monotonic_earliest);
329         prioq_free(e->monotonic_latest);
330         prioq_free(e->realtime_earliest);
331         prioq_free(e->realtime_latest);
332         prioq_free(e->quit);
333
334         free(e->signal_sources);
335
336         hashmap_free(e->child_sources);
337         free(e);
338 }
339
340 _public_ int sd_event_new(sd_event** ret) {
341         sd_event *e;
342         int r;
343
344         assert_return(ret, -EINVAL);
345
346         e = new0(sd_event, 1);
347         if (!e)
348                 return -ENOMEM;
349
350         e->n_ref = 1;
351         e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
352         e->realtime_next = e->monotonic_next = (usec_t) -1;
353         e->original_pid = getpid();
354
355         assert_se(sigemptyset(&e->sigset) == 0);
356
357         e->pending = prioq_new(pending_prioq_compare);
358         if (!e->pending) {
359                 r = -ENOMEM;
360                 goto fail;
361         }
362
363         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
364         if (e->epoll_fd < 0) {
365                 r = -errno;
366                 goto fail;
367         }
368
369         *ret = e;
370         return 0;
371
372 fail:
373         event_free(e);
374         return r;
375 }
376
377 _public_ sd_event* sd_event_ref(sd_event *e) {
378         assert_return(e, NULL);
379
380         assert(e->n_ref >= 1);
381         e->n_ref++;
382
383         return e;
384 }
385
386 _public_ sd_event* sd_event_unref(sd_event *e) {
387         assert_return(e, NULL);
388
389         assert(e->n_ref >= 1);
390         e->n_ref--;
391
392         if (e->n_ref <= 0)
393                 event_free(e);
394
395         return NULL;
396 }
397
398 static bool event_pid_changed(sd_event *e) {
399         assert(e);
400
401         /* We don't support people creating am event loop and keeping
402          * it around over a fork(). Let's complain. */
403
404         return e->original_pid != getpid();
405 }
406
407 static int source_io_unregister(sd_event_source *s) {
408         int r;
409
410         assert(s);
411         assert(s->type == SOURCE_IO);
412
413         if (!s->io.registered)
414                 return 0;
415
416         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
417         if (r < 0)
418                 return -errno;
419
420         s->io.registered = false;
421         return 0;
422 }
423
424 static int source_io_register(
425                 sd_event_source *s,
426                 int enabled,
427                 uint32_t events) {
428
429         struct epoll_event ev = {};
430         int r;
431
432         assert(s);
433         assert(s->type == SOURCE_IO);
434         assert(enabled != SD_EVENT_OFF);
435
436         ev.events = events;
437         ev.data.ptr = s;
438
439         if (enabled == SD_EVENT_ONESHOT)
440                 ev.events |= EPOLLONESHOT;
441
442         if (s->io.registered)
443                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
444         else
445                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
446
447         if (r < 0)
448                 return -errno;
449
450         s->io.registered = true;
451
452         return 0;
453 }
454
455 static void source_free(sd_event_source *s) {
456         assert(s);
457
458         if (s->event) {
459                 switch (s->type) {
460
461                 case SOURCE_IO:
462                         if (s->io.fd >= 0)
463                                 source_io_unregister(s);
464
465                         break;
466
467                 case SOURCE_MONOTONIC:
468                         prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
469                         prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
470                         break;
471
472                 case SOURCE_REALTIME:
473                         prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
474                         prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
475                         break;
476
477                 case SOURCE_SIGNAL:
478                         if (s->signal.sig > 0) {
479                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
480                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
481
482                                 if (s->event->signal_sources)
483                                         s->event->signal_sources[s->signal.sig] = NULL;
484                         }
485
486                         break;
487
488                 case SOURCE_CHILD:
489                         if (s->child.pid > 0) {
490                                 if (s->enabled != SD_EVENT_OFF) {
491                                         assert(s->event->n_enabled_child_sources > 0);
492                                         s->event->n_enabled_child_sources--;
493                                 }
494
495                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
496                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
497
498                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
499                         }
500
501                         break;
502
503                 case SOURCE_DEFER:
504                         /* nothing */
505                         break;
506
507                 case SOURCE_QUIT:
508                         prioq_remove(s->event->quit, s, &s->quit.prioq_index);
509                         break;
510                 }
511
512                 if (s->pending)
513                         prioq_remove(s->event->pending, s, &s->pending_index);
514
515                 if (s->prepare)
516                         prioq_remove(s->event->prepare, s, &s->prepare_index);
517
518                 sd_event_unref(s->event);
519         }
520
521         free(s);
522 }
523
524 static int source_set_pending(sd_event_source *s, bool b) {
525         int r;
526
527         assert(s);
528         assert(s->type != SOURCE_QUIT);
529
530         if (s->pending == b)
531                 return 0;
532
533         s->pending = b;
534
535         if (b) {
536                 s->pending_iteration = s->event->iteration;
537
538                 r = prioq_put(s->event->pending, s, &s->pending_index);
539                 if (r < 0) {
540                         s->pending = false;
541                         return r;
542                 }
543         } else
544                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
545
546         if (s->type == SOURCE_REALTIME) {
547                 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
548                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
549         } else if (s->type == SOURCE_MONOTONIC) {
550                 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
551                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
552         }
553
554         return 0;
555 }
556
557 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
558         sd_event_source *s;
559
560         assert(e);
561
562         s = new0(sd_event_source, 1);
563         if (!s)
564                 return NULL;
565
566         s->n_ref = 1;
567         s->event = sd_event_ref(e);
568         s->type = type;
569         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
570
571         return s;
572 }
573
574 _public_ int sd_event_add_io(
575                 sd_event *e,
576                 int fd,
577                 uint32_t events,
578                 sd_event_io_handler_t callback,
579                 void *userdata,
580                 sd_event_source **ret) {
581
582         sd_event_source *s;
583         int r;
584
585         assert_return(e, -EINVAL);
586         assert_return(fd >= 0, -EINVAL);
587         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
588         assert_return(callback, -EINVAL);
589         assert_return(ret, -EINVAL);
590         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
591         assert_return(!event_pid_changed(e), -ECHILD);
592
593         s = source_new(e, SOURCE_IO);
594         if (!s)
595                 return -ENOMEM;
596
597         s->io.fd = fd;
598         s->io.events = events;
599         s->io.callback = callback;
600         s->userdata = userdata;
601         s->enabled = SD_EVENT_ON;
602
603         r = source_io_register(s, s->enabled, events);
604         if (r < 0) {
605                 source_free(s);
606                 return -errno;
607         }
608
609         *ret = s;
610         return 0;
611 }
612
613 static int event_setup_timer_fd(
614                 sd_event *e,
615                 EventSourceType type,
616                 int *timer_fd,
617                 clockid_t id) {
618
619         struct epoll_event ev = {};
620         int r, fd;
621         sd_id128_t bootid;
622
623         assert(e);
624         assert(timer_fd);
625
626         if (_likely_(*timer_fd >= 0))
627                 return 0;
628
629         fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
630         if (fd < 0)
631                 return -errno;
632
633         ev.events = EPOLLIN;
634         ev.data.ptr = INT_TO_PTR(type);
635
636         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
637         if (r < 0) {
638                 close_nointr_nofail(fd);
639                 return -errno;
640         }
641
642         /* When we sleep for longer, we try to realign the wakeup to
643            the same time wihtin each second, so that events all across
644            the system can be coalesced into a single CPU
645            wakeup. However, let's take some system-specific randomness
646            for this value, so that in a network of systems with synced
647            clocks timer events are distributed a bit. Here, we
648            calculate a perturbation usec offset from the boot ID. */
649
650         if (sd_id128_get_boot(&bootid) >= 0)
651                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
652
653         *timer_fd = fd;
654         return 0;
655 }
656
657 static int event_add_time_internal(
658                 sd_event *e,
659                 EventSourceType type,
660                 int *timer_fd,
661                 clockid_t id,
662                 Prioq **earliest,
663                 Prioq **latest,
664                 uint64_t usec,
665                 uint64_t accuracy,
666                 sd_event_time_handler_t callback,
667                 void *userdata,
668                 sd_event_source **ret) {
669
670         sd_event_source *s;
671         int r;
672
673         assert_return(e, -EINVAL);
674         assert_return(callback, -EINVAL);
675         assert_return(ret, -EINVAL);
676         assert_return(usec != (uint64_t) -1, -EINVAL);
677         assert_return(accuracy != (uint64_t) -1, -EINVAL);
678         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
679         assert_return(!event_pid_changed(e), -ECHILD);
680
681         assert(timer_fd);
682         assert(earliest);
683         assert(latest);
684
685         if (!*earliest) {
686                 *earliest = prioq_new(earliest_time_prioq_compare);
687                 if (!*earliest)
688                         return -ENOMEM;
689         }
690
691         if (!*latest) {
692                 *latest = prioq_new(latest_time_prioq_compare);
693                 if (!*latest)
694                         return -ENOMEM;
695         }
696
697         if (*timer_fd < 0) {
698                 r = event_setup_timer_fd(e, type, timer_fd, id);
699                 if (r < 0)
700                         return r;
701         }
702
703         s = source_new(e, type);
704         if (!s)
705                 return -ENOMEM;
706
707         s->time.next = usec;
708         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
709         s->time.callback = callback;
710         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
711         s->userdata = userdata;
712         s->enabled = SD_EVENT_ONESHOT;
713
714         r = prioq_put(*earliest, s, &s->time.earliest_index);
715         if (r < 0)
716                 goto fail;
717
718         r = prioq_put(*latest, s, &s->time.latest_index);
719         if (r < 0)
720                 goto fail;
721
722         *ret = s;
723         return 0;
724
725 fail:
726         source_free(s);
727         return r;
728 }
729
730 _public_ int sd_event_add_monotonic(sd_event *e,
731                                     uint64_t usec,
732                                     uint64_t accuracy,
733                                     sd_event_time_handler_t callback,
734                                     void *userdata,
735                                     sd_event_source **ret) {
736
737         return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
738 }
739
740 _public_ int sd_event_add_realtime(sd_event *e,
741                                    uint64_t usec,
742                                    uint64_t accuracy,
743                                    sd_event_time_handler_t callback,
744                                    void *userdata,
745                                    sd_event_source **ret) {
746
747         return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
748 }
749
750 static int event_update_signal_fd(sd_event *e) {
751         struct epoll_event ev = {};
752         bool add_to_epoll;
753         int r;
754
755         assert(e);
756
757         add_to_epoll = e->signal_fd < 0;
758
759         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
760         if (r < 0)
761                 return -errno;
762
763         e->signal_fd = r;
764
765         if (!add_to_epoll)
766                 return 0;
767
768         ev.events = EPOLLIN;
769         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
770
771         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
772         if (r < 0) {
773                 close_nointr_nofail(e->signal_fd);
774                 e->signal_fd = -1;
775
776                 return -errno;
777         }
778
779         return 0;
780 }
781
782 _public_ int sd_event_add_signal(
783                 sd_event *e,
784                 int sig,
785                 sd_event_signal_handler_t callback,
786                 void *userdata,
787                 sd_event_source **ret) {
788
789         sd_event_source *s;
790         int r;
791
792         assert_return(e, -EINVAL);
793         assert_return(sig > 0, -EINVAL);
794         assert_return(sig < _NSIG, -EINVAL);
795         assert_return(callback, -EINVAL);
796         assert_return(ret, -EINVAL);
797         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
798         assert_return(!event_pid_changed(e), -ECHILD);
799
800         if (!e->signal_sources) {
801                 e->signal_sources = new0(sd_event_source*, _NSIG);
802                 if (!e->signal_sources)
803                         return -ENOMEM;
804         } else if (e->signal_sources[sig])
805                 return -EBUSY;
806
807         s = source_new(e, SOURCE_SIGNAL);
808         if (!s)
809                 return -ENOMEM;
810
811         s->signal.sig = sig;
812         s->signal.callback = callback;
813         s->userdata = userdata;
814         s->enabled = SD_EVENT_ON;
815
816         e->signal_sources[sig] = s;
817         assert_se(sigaddset(&e->sigset, sig) == 0);
818
819         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
820                 r = event_update_signal_fd(e);
821                 if (r < 0) {
822                         source_free(s);
823                         return r;
824                 }
825         }
826
827         *ret = s;
828         return 0;
829 }
830
831 _public_ int sd_event_add_child(
832                 sd_event *e,
833                 pid_t pid,
834                 int options,
835                 sd_event_child_handler_t callback,
836                 void *userdata,
837                 sd_event_source **ret) {
838
839         sd_event_source *s;
840         int r;
841
842         assert_return(e, -EINVAL);
843         assert_return(pid > 1, -EINVAL);
844         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
845         assert_return(options != 0, -EINVAL);
846         assert_return(callback, -EINVAL);
847         assert_return(ret, -EINVAL);
848         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
849         assert_return(!event_pid_changed(e), -ECHILD);
850
851         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
852         if (r < 0)
853                 return r;
854
855         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
856                 return -EBUSY;
857
858         s = source_new(e, SOURCE_CHILD);
859         if (!s)
860                 return -ENOMEM;
861
862         s->child.pid = pid;
863         s->child.options = options;
864         s->child.callback = callback;
865         s->userdata = userdata;
866         s->enabled = SD_EVENT_ONESHOT;
867
868         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
869         if (r < 0) {
870                 source_free(s);
871                 return r;
872         }
873
874         e->n_enabled_child_sources ++;
875
876         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
877
878         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
879                 r = event_update_signal_fd(e);
880                 if (r < 0) {
881                         source_free(s);
882                         return -errno;
883                 }
884         }
885
886         e->need_process_child = true;
887
888         *ret = s;
889         return 0;
890 }
891
892 _public_ int sd_event_add_defer(
893                 sd_event *e,
894                 sd_event_handler_t callback,
895                 void *userdata,
896                 sd_event_source **ret) {
897
898         sd_event_source *s;
899         int r;
900
901         assert_return(e, -EINVAL);
902         assert_return(callback, -EINVAL);
903         assert_return(ret, -EINVAL);
904         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
905         assert_return(!event_pid_changed(e), -ECHILD);
906
907         s = source_new(e, SOURCE_DEFER);
908         if (!s)
909                 return -ENOMEM;
910
911         s->defer.callback = callback;
912         s->userdata = userdata;
913         s->enabled = SD_EVENT_ONESHOT;
914
915         r = source_set_pending(s, true);
916         if (r < 0) {
917                 source_free(s);
918                 return r;
919         }
920
921         *ret = s;
922         return 0;
923 }
924
925 _public_ int sd_event_add_quit(
926                 sd_event *e,
927                 sd_event_handler_t callback,
928                 void *userdata,
929                 sd_event_source **ret) {
930
931         sd_event_source *s;
932         int r;
933
934         assert_return(e, -EINVAL);
935         assert_return(callback, -EINVAL);
936         assert_return(ret, -EINVAL);
937         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
938         assert_return(!event_pid_changed(e), -ECHILD);
939
940         if (!e->quit) {
941                 e->quit = prioq_new(quit_prioq_compare);
942                 if (!e->quit)
943                         return -ENOMEM;
944         }
945
946         s = source_new(e, SOURCE_QUIT);
947         if (!s)
948                 return -ENOMEM;
949
950         s->quit.callback = callback;
951         s->userdata = userdata;
952         s->quit.prioq_index = PRIOQ_IDX_NULL;
953         s->enabled = SD_EVENT_ONESHOT;
954
955         r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
956         if (r < 0) {
957                 source_free(s);
958                 return r;
959         }
960
961         *ret = s;
962         return 0;
963 }
964
965 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
966         assert_return(s, NULL);
967
968         assert(s->n_ref >= 1);
969         s->n_ref++;
970
971         return s;
972 }
973
974 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
975         assert_return(s, NULL);
976
977         assert(s->n_ref >= 1);
978         s->n_ref--;
979
980         if (s->n_ref <= 0)
981                 source_free(s);
982
983         return NULL;
984 }
985
986 _public_ sd_event *sd_event_get(sd_event_source *s) {
987         assert_return(s, NULL);
988
989         return s->event;
990 }
991
992 _public_ int sd_event_source_get_pending(sd_event_source *s) {
993         assert_return(s, -EINVAL);
994         assert_return(s->type != SOURCE_QUIT, -EDOM);
995         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
996         assert_return(!event_pid_changed(s->event), -ECHILD);
997
998         return s->pending;
999 }
1000
1001 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1002         assert_return(s, -EINVAL);
1003         assert_return(s->type == SOURCE_IO, -EDOM);
1004         assert_return(!event_pid_changed(s->event), -ECHILD);
1005
1006         return s->io.fd;
1007 }
1008
1009 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1010         assert_return(s, -EINVAL);
1011         assert_return(events, -EINVAL);
1012         assert_return(s->type == SOURCE_IO, -EDOM);
1013         assert_return(!event_pid_changed(s->event), -ECHILD);
1014
1015         *events = s->io.events;
1016         return 0;
1017 }
1018
1019 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1020         int r;
1021
1022         assert_return(s, -EINVAL);
1023         assert_return(s->type == SOURCE_IO, -EDOM);
1024         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
1025         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1026         assert_return(!event_pid_changed(s->event), -ECHILD);
1027
1028         if (s->io.events == events)
1029                 return 0;
1030
1031         if (s->enabled != SD_EVENT_OFF) {
1032                 r = source_io_register(s, s->enabled, events);
1033                 if (r < 0)
1034                         return r;
1035         }
1036
1037         s->io.events = events;
1038         source_set_pending(s, false);
1039
1040         return 0;
1041 }
1042
1043 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1044         assert_return(s, -EINVAL);
1045         assert_return(revents, -EINVAL);
1046         assert_return(s->type == SOURCE_IO, -EDOM);
1047         assert_return(s->pending, -ENODATA);
1048         assert_return(!event_pid_changed(s->event), -ECHILD);
1049
1050         *revents = s->io.revents;
1051         return 0;
1052 }
1053
1054 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1055         assert_return(s, -EINVAL);
1056         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1057         assert_return(!event_pid_changed(s->event), -ECHILD);
1058
1059         return s->signal.sig;
1060 }
1061
1062 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1063         assert_return(s, -EINVAL);
1064         assert_return(!event_pid_changed(s->event), -ECHILD);
1065
1066         return s->priority;
1067 }
1068
1069 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1070         assert_return(s, -EINVAL);
1071         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1072         assert_return(!event_pid_changed(s->event), -ECHILD);
1073
1074         if (s->priority == priority)
1075                 return 0;
1076
1077         s->priority = priority;
1078
1079         if (s->pending)
1080                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1081
1082         if (s->prepare)
1083                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1084
1085         if (s->type == SOURCE_QUIT)
1086                 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1087
1088         return 0;
1089 }
1090
1091 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1092         assert_return(s, -EINVAL);
1093         assert_return(m, -EINVAL);
1094         assert_return(!event_pid_changed(s->event), -ECHILD);
1095
1096         *m = s->enabled;
1097         return 0;
1098 }
1099
1100 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1101         int r;
1102
1103         assert_return(s, -EINVAL);
1104         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1105         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1106         assert_return(!event_pid_changed(s->event), -ECHILD);
1107
1108         if (s->enabled == m)
1109                 return 0;
1110
1111         if (m == SD_EVENT_OFF) {
1112
1113                 switch (s->type) {
1114
1115                 case SOURCE_IO:
1116                         r = source_io_unregister(s);
1117                         if (r < 0)
1118                                 return r;
1119
1120                         s->enabled = m;
1121                         break;
1122
1123                 case SOURCE_MONOTONIC:
1124                         s->enabled = m;
1125                         prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1126                         prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1127                         break;
1128
1129                 case SOURCE_REALTIME:
1130                         s->enabled = m;
1131                         prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1132                         prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1133                         break;
1134
1135                 case SOURCE_SIGNAL:
1136                         s->enabled = m;
1137                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1138                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1139                                 event_update_signal_fd(s->event);
1140                         }
1141
1142                         break;
1143
1144                 case SOURCE_CHILD:
1145                         s->enabled = m;
1146
1147                         assert(s->event->n_enabled_child_sources > 0);
1148                         s->event->n_enabled_child_sources--;
1149
1150                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1151                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1152                                 event_update_signal_fd(s->event);
1153                         }
1154
1155                         break;
1156
1157                 case SOURCE_QUIT:
1158                         s->enabled = m;
1159                         prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1160                         break;
1161
1162                 case SOURCE_DEFER:
1163                         s->enabled = m;
1164                         break;
1165                 }
1166
1167         } else {
1168                 switch (s->type) {
1169
1170                 case SOURCE_IO:
1171                         r = source_io_register(s, m, s->io.events);
1172                         if (r < 0)
1173                                 return r;
1174
1175                         s->enabled = m;
1176                         break;
1177
1178                 case SOURCE_MONOTONIC:
1179                         s->enabled = m;
1180                         prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1181                         prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1182                         break;
1183
1184                 case SOURCE_REALTIME:
1185                         s->enabled = m;
1186                         prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1187                         prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1188                         break;
1189
1190                 case SOURCE_SIGNAL:
1191                         s->enabled = m;
1192
1193                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1194                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1195                                 event_update_signal_fd(s->event);
1196                         }
1197                         break;
1198
1199                 case SOURCE_CHILD:
1200                         s->enabled = m;
1201
1202                         if (s->enabled == SD_EVENT_OFF) {
1203                                 s->event->n_enabled_child_sources++;
1204
1205                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1206                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1207                                         event_update_signal_fd(s->event);
1208                                 }
1209                         }
1210                         break;
1211
1212                 case SOURCE_QUIT:
1213                         s->enabled = m;
1214                         prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1215                         break;
1216
1217                 case SOURCE_DEFER:
1218                         s->enabled = m;
1219                         break;
1220                 }
1221         }
1222
1223         if (s->pending)
1224                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1225
1226         if (s->prepare)
1227                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1228
1229         return 0;
1230 }
1231
1232 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1233         assert_return(s, -EINVAL);
1234         assert_return(usec, -EINVAL);
1235         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1236         assert_return(!event_pid_changed(s->event), -ECHILD);
1237
1238         *usec = s->time.next;
1239         return 0;
1240 }
1241
1242 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1243         assert_return(s, -EINVAL);
1244         assert_return(usec != (uint64_t) -1, -EINVAL);
1245         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1246         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1247         assert_return(!event_pid_changed(s->event), -ECHILD);
1248
1249         s->time.next = usec;
1250
1251         source_set_pending(s, false);
1252
1253         if (s->type == SOURCE_REALTIME) {
1254                 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1255                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1256         } else {
1257                 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1258                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1259         }
1260
1261         return 0;
1262 }
1263
1264 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1265         assert_return(s, -EINVAL);
1266         assert_return(usec, -EINVAL);
1267         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1268         assert_return(!event_pid_changed(s->event), -ECHILD);
1269
1270         *usec = s->time.accuracy;
1271         return 0;
1272 }
1273
1274 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1275         assert_return(s, -EINVAL);
1276         assert_return(usec != (uint64_t) -1, -EINVAL);
1277         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1278         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1279         assert_return(!event_pid_changed(s->event), -ECHILD);
1280
1281         if (usec == 0)
1282                 usec = DEFAULT_ACCURACY_USEC;
1283
1284         s->time.accuracy = usec;
1285
1286         source_set_pending(s, false);
1287
1288         if (s->type == SOURCE_REALTIME)
1289                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1290         else
1291                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1292
1293         return 0;
1294 }
1295
1296 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1297         assert_return(s, -EINVAL);
1298         assert_return(pid, -EINVAL);
1299         assert_return(s->type == SOURCE_CHILD, -EDOM);
1300         assert_return(!event_pid_changed(s->event), -ECHILD);
1301
1302         *pid = s->child.pid;
1303         return 0;
1304 }
1305
1306 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1307         int r;
1308
1309         assert_return(s, -EINVAL);
1310         assert_return(s->type != SOURCE_QUIT, -EDOM);
1311         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1312         assert_return(!event_pid_changed(s->event), -ECHILD);
1313
1314         if (s->prepare == callback)
1315                 return 0;
1316
1317         if (callback && s->prepare) {
1318                 s->prepare = callback;
1319                 return 0;
1320         }
1321
1322         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1323         if (r < 0)
1324                 return r;
1325
1326         s->prepare = callback;
1327
1328         if (callback) {
1329                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1330                 if (r < 0)
1331                         return r;
1332         } else
1333                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1334
1335         return 0;
1336 }
1337
1338 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1339         assert_return(s, NULL);
1340
1341         return s->userdata;
1342 }
1343
1344 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1345         usec_t c;
1346         assert(e);
1347         assert(a <= b);
1348
1349         if (a <= 0)
1350                 return 0;
1351
1352         if (b <= a + 1)
1353                 return a;
1354
1355         /*
1356           Find a good time to wake up again between times a and b. We
1357           have two goals here:
1358
1359           a) We want to wake up as seldom as possible, hence prefer
1360              later times over earlier times.
1361
1362           b) But if we have to wake up, then let's make sure to
1363              dispatch as much as possible on the entire system.
1364
1365           We implement this by waking up everywhere at the same time
1366           within any given second if we can, synchronised via the
1367           perturbation value determined from the boot ID. If we can't,
1368           then we try to find the same spot in every a 250ms
1369           step. Otherwise, we pick the last possible time to wake up.
1370         */
1371
1372         c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
1373         if (c >= b) {
1374                 if (_unlikely_(c < USEC_PER_SEC))
1375                         return b;
1376
1377                 c -= USEC_PER_SEC;
1378         }
1379
1380         if (c >= a)
1381                 return c;
1382
1383         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1384         if (c >= b) {
1385                 if (_unlikely_(c < USEC_PER_MSEC*250))
1386                         return b;
1387
1388                 c -= USEC_PER_MSEC*250;
1389         }
1390
1391         if (c >= a)
1392                 return c;
1393
1394         return b;
1395 }
1396
1397 static int event_arm_timer(
1398                 sd_event *e,
1399                 int timer_fd,
1400                 Prioq *earliest,
1401                 Prioq *latest,
1402                 usec_t *next) {
1403
1404         struct itimerspec its = {};
1405         sd_event_source *a, *b;
1406         usec_t t;
1407         int r;
1408
1409         assert_se(e);
1410         assert_se(next);
1411
1412         a = prioq_peek(earliest);
1413         if (!a || a->enabled == SD_EVENT_OFF) {
1414
1415                 if (timer_fd < 0)
1416                         return 0;
1417
1418                 if (*next == (usec_t) -1)
1419                         return 0;
1420
1421                 /* disarm */
1422                 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1423                 if (r < 0)
1424                         return r;
1425
1426                 *next = (usec_t) -1;
1427
1428                 return 0;
1429         }
1430
1431         b = prioq_peek(latest);
1432         assert_se(b && b->enabled != SD_EVENT_OFF);
1433
1434         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1435         if (*next == t)
1436                 return 0;
1437
1438         assert_se(timer_fd >= 0);
1439
1440         if (t == 0) {
1441                 /* We don' want to disarm here, just mean some time looooong ago. */
1442                 its.it_value.tv_sec = 0;
1443                 its.it_value.tv_nsec = 1;
1444         } else
1445                 timespec_store(&its.it_value, t);
1446
1447         r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1448         if (r < 0)
1449                 return r;
1450
1451         *next = t;
1452         return 0;
1453 }
1454
1455 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1456         assert(e);
1457         assert(s);
1458         assert(s->type == SOURCE_IO);
1459
1460         s->io.revents = events;
1461
1462         return source_set_pending(s, true);
1463 }
1464
1465 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1466         uint64_t x;
1467         ssize_t ss;
1468
1469         assert(e);
1470         assert(fd >= 0);
1471         assert(next);
1472
1473         assert_return(events == EPOLLIN, -EIO);
1474
1475         ss = read(fd, &x, sizeof(x));
1476         if (ss < 0) {
1477                 if (errno == EAGAIN || errno == EINTR)
1478                         return 0;
1479
1480                 return -errno;
1481         }
1482
1483         if (ss != sizeof(x))
1484                 return -EIO;
1485
1486         *next = (usec_t) -1;
1487
1488         return 0;
1489 }
1490
1491 static int process_timer(
1492                 sd_event *e,
1493                 usec_t n,
1494                 Prioq *earliest,
1495                 Prioq *latest) {
1496
1497         sd_event_source *s;
1498         int r;
1499
1500         assert(e);
1501
1502         for (;;) {
1503                 s = prioq_peek(earliest);
1504                 if (!s ||
1505                     s->time.next > n ||
1506                     s->enabled == SD_EVENT_OFF ||
1507                     s->pending)
1508                         break;
1509
1510                 r = source_set_pending(s, true);
1511                 if (r < 0)
1512                         return r;
1513
1514                 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1515                 prioq_reshuffle(latest, s, &s->time.latest_index);
1516         }
1517
1518         return 0;
1519 }
1520
1521 static int process_child(sd_event *e) {
1522         sd_event_source *s;
1523         Iterator i;
1524         int r;
1525
1526         assert(e);
1527
1528         e->need_process_child = false;
1529
1530         /*
1531            So, this is ugly. We iteratively invoke waitid() with P_PID
1532            + WNOHANG for each PID we wait for, instead of using
1533            P_ALL. This is because we only want to get child
1534            information of very specific child processes, and not all
1535            of them. We might not have processed the SIGCHLD even of a
1536            previous invocation and we don't want to maintain a
1537            unbounded *per-child* event queue, hence we really don't
1538            want anything flushed out of the kernel's queue that we
1539            don't care about. Since this is O(n) this means that if you
1540            have a lot of processes you probably want to handle SIGCHLD
1541            yourself.
1542         */
1543
1544         HASHMAP_FOREACH(s, e->child_sources, i) {
1545                 assert(s->type == SOURCE_CHILD);
1546
1547                 if (s->pending)
1548                         continue;
1549
1550                 if (s->enabled == SD_EVENT_OFF)
1551                         continue;
1552
1553                 zero(s->child.siginfo);
1554                 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1555                 if (r < 0)
1556                         return -errno;
1557
1558                 if (s->child.siginfo.si_pid != 0) {
1559                         r = source_set_pending(s, true);
1560                         if (r < 0)
1561                                 return r;
1562                 }
1563         }
1564
1565         return 0;
1566 }
1567
1568 static int process_signal(sd_event *e, uint32_t events) {
1569         bool read_one = false;
1570         int r;
1571
1572         assert(e);
1573         assert(e->signal_sources);
1574
1575         assert_return(events == EPOLLIN, -EIO);
1576
1577         for (;;) {
1578                 struct signalfd_siginfo si;
1579                 ssize_t ss;
1580                 sd_event_source *s;
1581
1582                 ss = read(e->signal_fd, &si, sizeof(si));
1583                 if (ss < 0) {
1584                         if (errno == EAGAIN || errno == EINTR)
1585                                 return read_one;
1586
1587                         return -errno;
1588                 }
1589
1590                 if (ss != sizeof(si))
1591                         return -EIO;
1592
1593                 read_one = true;
1594
1595                 s = e->signal_sources[si.ssi_signo];
1596                 if (si.ssi_signo == SIGCHLD) {
1597                         r = process_child(e);
1598                         if (r < 0)
1599                                 return r;
1600                         if (r > 0 || !s)
1601                                 continue;
1602                 } else
1603                         if (!s)
1604                                 return -EIO;
1605
1606                 s->signal.siginfo = si;
1607                 r = source_set_pending(s, true);
1608                 if (r < 0)
1609                         return r;
1610         }
1611
1612
1613         return 0;
1614 }
1615
1616 static int source_dispatch(sd_event_source *s) {
1617         int r = 0;
1618
1619         assert(s);
1620         assert(s->pending || s->type == SOURCE_QUIT);
1621
1622         if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1623                 r = source_set_pending(s, false);
1624                 if (r < 0)
1625                         return r;
1626         }
1627
1628         if (s->enabled == SD_EVENT_ONESHOT) {
1629                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1630                 if (r < 0)
1631                         return r;
1632         }
1633
1634         sd_event_source_ref(s);
1635
1636         switch (s->type) {
1637
1638         case SOURCE_IO:
1639                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1640                 break;
1641
1642         case SOURCE_MONOTONIC:
1643                 r = s->time.callback(s, s->time.next, s->userdata);
1644                 break;
1645
1646         case SOURCE_REALTIME:
1647                 r = s->time.callback(s, s->time.next, s->userdata);
1648                 break;
1649
1650         case SOURCE_SIGNAL:
1651                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1652                 break;
1653
1654         case SOURCE_CHILD:
1655                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1656                 break;
1657
1658         case SOURCE_DEFER:
1659                 r = s->defer.callback(s, s->userdata);
1660                 break;
1661
1662         case SOURCE_QUIT:
1663                 r = s->quit.callback(s, s->userdata);
1664                 break;
1665         }
1666
1667         sd_event_source_unref(s);
1668
1669         return r;
1670 }
1671
1672 static int event_prepare(sd_event *e) {
1673         int r;
1674
1675         assert(e);
1676
1677         for (;;) {
1678                 sd_event_source *s;
1679
1680                 s = prioq_peek(e->prepare);
1681                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1682                         break;
1683
1684                 s->prepare_iteration = e->iteration;
1685                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1686                 if (r < 0)
1687                         return r;
1688
1689                 assert(s->prepare);
1690                 r = s->prepare(s, s->userdata);
1691                 if (r < 0)
1692                         return r;
1693
1694         }
1695
1696         return 0;
1697 }
1698
1699 static int dispatch_quit(sd_event *e) {
1700         sd_event_source *p;
1701         int r;
1702
1703         assert(e);
1704
1705         p = prioq_peek(e->quit);
1706         if (!p || p->enabled == SD_EVENT_OFF) {
1707                 e->state = SD_EVENT_FINISHED;
1708                 return 0;
1709         }
1710
1711         sd_event_ref(e);
1712         e->iteration++;
1713         e->state = SD_EVENT_QUITTING;
1714
1715         r = source_dispatch(p);
1716
1717         e->state = SD_EVENT_PASSIVE;
1718         sd_event_unref(e);
1719
1720         return r;
1721 }
1722
1723 static sd_event_source* event_next_pending(sd_event *e) {
1724         sd_event_source *p;
1725
1726         assert(e);
1727
1728         p = prioq_peek(e->pending);
1729         if (!p)
1730                 return NULL;
1731
1732         if (p->enabled == SD_EVENT_OFF)
1733                 return NULL;
1734
1735         return p;
1736 }
1737
1738 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1739         struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1740         sd_event_source *p;
1741         int r, i, m;
1742
1743         assert_return(e, -EINVAL);
1744         assert_return(!event_pid_changed(e), -ECHILD);
1745         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1746         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1747
1748         if (e->quit_requested)
1749                 return dispatch_quit(e);
1750
1751         sd_event_ref(e);
1752         e->iteration++;
1753         e->state = SD_EVENT_RUNNING;
1754
1755         r = event_prepare(e);
1756         if (r < 0)
1757                 goto finish;
1758
1759         if (event_next_pending(e) || e->need_process_child)
1760                 timeout = 0;
1761
1762         if (timeout > 0) {
1763                 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1764                 if (r < 0)
1765                         goto finish;
1766
1767                 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1768                 if (r < 0)
1769                         goto finish;
1770         }
1771
1772         m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1773                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1774         if (m < 0) {
1775                 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1776                 goto finish;
1777         }
1778
1779         dual_timestamp_get(&e->timestamp);
1780
1781         for (i = 0; i < m; i++) {
1782
1783                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1784                         r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1785                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1786                         r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1787                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1788                         r = process_signal(e, ev_queue[i].events);
1789                 else
1790                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1791
1792                 if (r < 0)
1793                         goto finish;
1794         }
1795
1796         r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1797         if (r < 0)
1798                 goto finish;
1799
1800         r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1801         if (r < 0)
1802                 goto finish;
1803
1804         if (e->need_process_child) {
1805                 r = process_child(e);
1806                 if (r < 0)
1807                         goto finish;
1808         }
1809
1810         p = event_next_pending(e);
1811         if (!p) {
1812                 r = 0;
1813                 goto finish;
1814         }
1815
1816         r = source_dispatch(p);
1817
1818 finish:
1819         e->state = SD_EVENT_PASSIVE;
1820         sd_event_unref(e);
1821
1822         return r;
1823 }
1824
1825 _public_ int sd_event_loop(sd_event *e) {
1826         int r;
1827
1828         assert_return(e, -EINVAL);
1829         assert_return(!event_pid_changed(e), -ECHILD);
1830         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1831
1832         sd_event_ref(e);
1833
1834         while (e->state != SD_EVENT_FINISHED) {
1835                 r = sd_event_run(e, (uint64_t) -1);
1836                 if (r < 0)
1837                         goto finish;
1838         }
1839
1840         r = 0;
1841
1842 finish:
1843         sd_event_unref(e);
1844         return r;
1845 }
1846
1847 _public_ int sd_event_get_state(sd_event *e) {
1848         assert_return(e, -EINVAL);
1849         assert_return(!event_pid_changed(e), -ECHILD);
1850
1851         return e->state;
1852 }
1853
1854 _public_ int sd_event_get_quit(sd_event *e) {
1855         assert_return(e, -EINVAL);
1856         assert_return(!event_pid_changed(e), -ECHILD);
1857
1858         return e->quit_requested;
1859 }
1860
1861 _public_ int sd_event_request_quit(sd_event *e) {
1862         assert_return(e, -EINVAL);
1863         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1864         assert_return(!event_pid_changed(e), -ECHILD);
1865
1866         e->quit_requested = true;
1867         return 0;
1868 }
1869
1870 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1871         assert_return(e, -EINVAL);
1872         assert_return(usec, -EINVAL);
1873         assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1874         assert_return(!event_pid_changed(e), -ECHILD);
1875
1876         *usec = e->timestamp.realtime;
1877         return 0;
1878 }
1879
1880 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1881         assert_return(e, -EINVAL);
1882         assert_return(usec, -EINVAL);
1883         assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1884         assert_return(!event_pid_changed(e), -ECHILD);
1885
1886         *usec = e->timestamp.monotonic;
1887         return 0;
1888 }
1889
1890 _public_ int sd_event_default(sd_event **ret) {
1891
1892         static __thread sd_event *default_event = NULL;
1893         sd_event *e;
1894         int r;
1895
1896         if (!ret)
1897                 return !!default_event;
1898
1899         if (default_event) {
1900                 *ret = sd_event_ref(default_event);
1901                 return 0;
1902         }
1903
1904         r = sd_event_new(&e);
1905         if (r < 0)
1906                 return r;
1907
1908         e->default_event_ptr = &default_event;
1909         e->tid = gettid();
1910         default_event = e;
1911
1912         *ret = e;
1913         return 1;
1914 }
1915
1916 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
1917         assert_return(e, -EINVAL);
1918         assert_return(tid, -EINVAL);
1919         assert_return(!event_pid_changed(e), -ECHILD);
1920
1921         if (e->tid != 0) {
1922                 *tid = e->tid;
1923                 return 0;
1924         }
1925
1926         return -ENXIO;
1927 }