chiark / gitweb /
sd-event: split run into prepare/wait/dispatch
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         EventSourceType type:5;
70         int enabled:3;
71         bool pending:1;
72         bool dispatching:1;
73         bool floating:1;
74
75         int64_t priority;
76         unsigned pending_index;
77         unsigned prepare_index;
78         unsigned pending_iteration;
79         unsigned prepare_iteration;
80
81         LIST_FIELDS(sd_event_source, sources);
82
83         union {
84                 struct {
85                         sd_event_io_handler_t callback;
86                         int fd;
87                         uint32_t events;
88                         uint32_t revents;
89                         bool registered:1;
90                 } io;
91                 struct {
92                         sd_event_time_handler_t callback;
93                         usec_t next, accuracy;
94                         unsigned earliest_index;
95                         unsigned latest_index;
96                 } time;
97                 struct {
98                         sd_event_signal_handler_t callback;
99                         struct signalfd_siginfo siginfo;
100                         int sig;
101                 } signal;
102                 struct {
103                         sd_event_child_handler_t callback;
104                         siginfo_t siginfo;
105                         pid_t pid;
106                         int options;
107                 } child;
108                 struct {
109                         sd_event_handler_t callback;
110                 } defer;
111                 struct {
112                         sd_event_handler_t callback;
113                 } post;
114                 struct {
115                         sd_event_handler_t callback;
116                         unsigned prioq_index;
117                 } exit;
118         };
119 };
120
121 struct clock_data {
122         int fd;
123
124         /* For all clocks we maintain two priority queues each, one
125          * ordered for the earliest times the events may be
126          * dispatched, and one ordered by the latest times they must
127          * have been dispatched. The range between the top entries in
128          * the two prioqs is the time window we can freely schedule
129          * wakeups in */
130
131         Prioq *earliest;
132         Prioq *latest;
133         usec_t next;
134
135         bool needs_rearm:1;
136 };
137
138 struct sd_event {
139         unsigned n_ref;
140
141         int epoll_fd;
142         int signal_fd;
143         int watchdog_fd;
144
145         Prioq *pending;
146         Prioq *prepare;
147
148         /* timerfd_create() only supports these five clocks so far. We
149          * can add support for more clocks when the kernel learns to
150          * deal with them, too. */
151         struct clock_data realtime;
152         struct clock_data boottime;
153         struct clock_data monotonic;
154         struct clock_data realtime_alarm;
155         struct clock_data boottime_alarm;
156
157         usec_t perturb;
158
159         sigset_t sigset;
160         sd_event_source **signal_sources;
161
162         Hashmap *child_sources;
163         unsigned n_enabled_child_sources;
164
165         Set *post_sources;
166
167         Prioq *exit;
168
169         pid_t original_pid;
170
171         unsigned iteration;
172         dual_timestamp timestamp;
173         usec_t timestamp_boottime;
174         int state;
175
176         bool exit_requested:1;
177         bool need_process_child:1;
178         bool watchdog:1;
179
180         int exit_code;
181
182         pid_t tid;
183         sd_event **default_event_ptr;
184
185         usec_t watchdog_last, watchdog_period;
186
187         unsigned n_sources;
188
189         LIST_HEAD(sd_event_source, sources);
190 };
191
192 static void source_disconnect(sd_event_source *s);
193
194 static int pending_prioq_compare(const void *a, const void *b) {
195         const sd_event_source *x = a, *y = b;
196
197         assert(x->pending);
198         assert(y->pending);
199
200         /* Enabled ones first */
201         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
202                 return -1;
203         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
204                 return 1;
205
206         /* Lower priority values first */
207         if (x->priority < y->priority)
208                 return -1;
209         if (x->priority > y->priority)
210                 return 1;
211
212         /* Older entries first */
213         if (x->pending_iteration < y->pending_iteration)
214                 return -1;
215         if (x->pending_iteration > y->pending_iteration)
216                 return 1;
217
218         /* Stability for the rest */
219         if (x < y)
220                 return -1;
221         if (x > y)
222                 return 1;
223
224         return 0;
225 }
226
227 static int prepare_prioq_compare(const void *a, const void *b) {
228         const sd_event_source *x = a, *y = b;
229
230         assert(x->prepare);
231         assert(y->prepare);
232
233         /* Move most recently prepared ones last, so that we can stop
234          * preparing as soon as we hit one that has already been
235          * prepared in the current iteration */
236         if (x->prepare_iteration < y->prepare_iteration)
237                 return -1;
238         if (x->prepare_iteration > y->prepare_iteration)
239                 return 1;
240
241         /* Enabled ones first */
242         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
243                 return -1;
244         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
245                 return 1;
246
247         /* Lower priority values first */
248         if (x->priority < y->priority)
249                 return -1;
250         if (x->priority > y->priority)
251                 return 1;
252
253         /* Stability for the rest */
254         if (x < y)
255                 return -1;
256         if (x > y)
257                 return 1;
258
259         return 0;
260 }
261
262 static int earliest_time_prioq_compare(const void *a, const void *b) {
263         const sd_event_source *x = a, *y = b;
264
265         assert(EVENT_SOURCE_IS_TIME(x->type));
266         assert(x->type == y->type);
267
268         /* Enabled ones first */
269         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
270                 return -1;
271         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
272                 return 1;
273
274         /* Move the pending ones to the end */
275         if (!x->pending && y->pending)
276                 return -1;
277         if (x->pending && !y->pending)
278                 return 1;
279
280         /* Order by time */
281         if (x->time.next < y->time.next)
282                 return -1;
283         if (x->time.next > y->time.next)
284                 return 1;
285
286         /* Stability for the rest */
287         if (x < y)
288                 return -1;
289         if (x > y)
290                 return 1;
291
292         return 0;
293 }
294
295 static int latest_time_prioq_compare(const void *a, const void *b) {
296         const sd_event_source *x = a, *y = b;
297
298         assert(EVENT_SOURCE_IS_TIME(x->type));
299         assert(x->type == y->type);
300
301         /* Enabled ones first */
302         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
303                 return -1;
304         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
305                 return 1;
306
307         /* Move the pending ones to the end */
308         if (!x->pending && y->pending)
309                 return -1;
310         if (x->pending && !y->pending)
311                 return 1;
312
313         /* Order by time */
314         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
315                 return -1;
316         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
317                 return 1;
318
319         /* Stability for the rest */
320         if (x < y)
321                 return -1;
322         if (x > y)
323                 return 1;
324
325         return 0;
326 }
327
328 static int exit_prioq_compare(const void *a, const void *b) {
329         const sd_event_source *x = a, *y = b;
330
331         assert(x->type == SOURCE_EXIT);
332         assert(y->type == SOURCE_EXIT);
333
334         /* Enabled ones first */
335         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
336                 return -1;
337         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338                 return 1;
339
340         /* Lower priority values first */
341         if (x->priority < y->priority)
342                 return -1;
343         if (x->priority > y->priority)
344                 return 1;
345
346         /* Stability for the rest */
347         if (x < y)
348                 return -1;
349         if (x > y)
350                 return 1;
351
352         return 0;
353 }
354
355 static void free_clock_data(struct clock_data *d) {
356         assert(d);
357
358         safe_close(d->fd);
359         prioq_free(d->earliest);
360         prioq_free(d->latest);
361 }
362
363 static void event_free(sd_event *e) {
364         sd_event_source *s;
365
366         assert(e);
367
368         while ((s = e->sources)) {
369                 assert(s->floating);
370                 source_disconnect(s);
371                 sd_event_source_unref(s);
372         }
373
374         assert(e->n_sources == 0);
375
376         if (e->default_event_ptr)
377                 *(e->default_event_ptr) = NULL;
378
379         safe_close(e->epoll_fd);
380         safe_close(e->signal_fd);
381         safe_close(e->watchdog_fd);
382
383         free_clock_data(&e->realtime);
384         free_clock_data(&e->boottime);
385         free_clock_data(&e->monotonic);
386         free_clock_data(&e->realtime_alarm);
387         free_clock_data(&e->boottime_alarm);
388
389         prioq_free(e->pending);
390         prioq_free(e->prepare);
391         prioq_free(e->exit);
392
393         free(e->signal_sources);
394
395         hashmap_free(e->child_sources);
396         set_free(e->post_sources);
397         free(e);
398 }
399
400 _public_ int sd_event_new(sd_event** ret) {
401         sd_event *e;
402         int r;
403
404         assert_return(ret, -EINVAL);
405
406         e = new0(sd_event, 1);
407         if (!e)
408                 return -ENOMEM;
409
410         e->n_ref = 1;
411         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
412         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
413         e->original_pid = getpid();
414         e->perturb = USEC_INFINITY;
415
416         assert_se(sigemptyset(&e->sigset) == 0);
417
418         e->pending = prioq_new(pending_prioq_compare);
419         if (!e->pending) {
420                 r = -ENOMEM;
421                 goto fail;
422         }
423
424         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
425         if (e->epoll_fd < 0) {
426                 r = -errno;
427                 goto fail;
428         }
429
430         *ret = e;
431         return 0;
432
433 fail:
434         event_free(e);
435         return r;
436 }
437
438 _public_ sd_event* sd_event_ref(sd_event *e) {
439         assert_return(e, NULL);
440
441         assert(e->n_ref >= 1);
442         e->n_ref++;
443
444         return e;
445 }
446
447 _public_ sd_event* sd_event_unref(sd_event *e) {
448
449         if (!e)
450                 return NULL;
451
452         assert(e->n_ref >= 1);
453         e->n_ref--;
454
455         if (e->n_ref <= 0)
456                 event_free(e);
457
458         return NULL;
459 }
460
461 static bool event_pid_changed(sd_event *e) {
462         assert(e);
463
464         /* We don't support people creating am event loop and keeping
465          * it around over a fork(). Let's complain. */
466
467         return e->original_pid != getpid();
468 }
469
470 static int source_io_unregister(sd_event_source *s) {
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475
476         if (!s->io.registered)
477                 return 0;
478
479         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
480         if (r < 0)
481                 return -errno;
482
483         s->io.registered = false;
484         return 0;
485 }
486
487 static int source_io_register(
488                 sd_event_source *s,
489                 int enabled,
490                 uint32_t events) {
491
492         struct epoll_event ev = {};
493         int r;
494
495         assert(s);
496         assert(s->type == SOURCE_IO);
497         assert(enabled != SD_EVENT_OFF);
498
499         ev.events = events;
500         ev.data.ptr = s;
501
502         if (enabled == SD_EVENT_ONESHOT)
503                 ev.events |= EPOLLONESHOT;
504
505         if (s->io.registered)
506                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
507         else
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
509
510         if (r < 0)
511                 return -errno;
512
513         s->io.registered = true;
514
515         return 0;
516 }
517
518 static clockid_t event_source_type_to_clock(EventSourceType t) {
519
520         switch (t) {
521
522         case SOURCE_TIME_REALTIME:
523                 return CLOCK_REALTIME;
524
525         case SOURCE_TIME_BOOTTIME:
526                 return CLOCK_BOOTTIME;
527
528         case SOURCE_TIME_MONOTONIC:
529                 return CLOCK_MONOTONIC;
530
531         case SOURCE_TIME_REALTIME_ALARM:
532                 return CLOCK_REALTIME_ALARM;
533
534         case SOURCE_TIME_BOOTTIME_ALARM:
535                 return CLOCK_BOOTTIME_ALARM;
536
537         default:
538                 return (clockid_t) -1;
539         }
540 }
541
542 static EventSourceType clock_to_event_source_type(clockid_t clock) {
543
544         switch (clock) {
545
546         case CLOCK_REALTIME:
547                 return SOURCE_TIME_REALTIME;
548
549         case CLOCK_BOOTTIME:
550                 return SOURCE_TIME_BOOTTIME;
551
552         case CLOCK_MONOTONIC:
553                 return SOURCE_TIME_MONOTONIC;
554
555         case CLOCK_REALTIME_ALARM:
556                 return SOURCE_TIME_REALTIME_ALARM;
557
558         case CLOCK_BOOTTIME_ALARM:
559                 return SOURCE_TIME_BOOTTIME_ALARM;
560
561         default:
562                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
563         }
564 }
565
566 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
567         assert(e);
568
569         switch (t) {
570
571         case SOURCE_TIME_REALTIME:
572                 return &e->realtime;
573
574         case SOURCE_TIME_BOOTTIME:
575                 return &e->boottime;
576
577         case SOURCE_TIME_MONOTONIC:
578                 return &e->monotonic;
579
580         case SOURCE_TIME_REALTIME_ALARM:
581                 return &e->realtime_alarm;
582
583         case SOURCE_TIME_BOOTTIME_ALARM:
584                 return &e->boottime_alarm;
585
586         default:
587                 return NULL;
588         }
589 }
590
591 static void source_disconnect(sd_event_source *s) {
592         sd_event *event;
593
594         assert(s);
595
596         if (!s->event)
597                 return;
598
599         assert(s->event->n_sources > 0);
600
601         switch (s->type) {
602
603         case SOURCE_IO:
604                 if (s->io.fd >= 0)
605                         source_io_unregister(s);
606
607                 break;
608
609         case SOURCE_TIME_REALTIME:
610         case SOURCE_TIME_BOOTTIME:
611         case SOURCE_TIME_MONOTONIC:
612         case SOURCE_TIME_REALTIME_ALARM:
613         case SOURCE_TIME_BOOTTIME_ALARM: {
614                 struct clock_data *d;
615
616                 d = event_get_clock_data(s->event, s->type);
617                 assert(d);
618
619                 prioq_remove(d->earliest, s, &s->time.earliest_index);
620                 prioq_remove(d->latest, s, &s->time.latest_index);
621                 d->needs_rearm = true;
622                 break;
623         }
624
625         case SOURCE_SIGNAL:
626                 if (s->signal.sig > 0) {
627                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
628                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
629
630                         if (s->event->signal_sources)
631                                 s->event->signal_sources[s->signal.sig] = NULL;
632                 }
633
634                 break;
635
636         case SOURCE_CHILD:
637                 if (s->child.pid > 0) {
638                         if (s->enabled != SD_EVENT_OFF) {
639                                 assert(s->event->n_enabled_child_sources > 0);
640                                 s->event->n_enabled_child_sources--;
641                         }
642
643                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
644                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
645
646                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
647                 }
648
649                 break;
650
651         case SOURCE_DEFER:
652                 /* nothing */
653                 break;
654
655         case SOURCE_POST:
656                 set_remove(s->event->post_sources, s);
657                 break;
658
659         case SOURCE_EXIT:
660                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
661                 break;
662
663         default:
664                 assert_not_reached("Wut? I shouldn't exist.");
665         }
666
667         if (s->pending)
668                 prioq_remove(s->event->pending, s, &s->pending_index);
669
670         if (s->prepare)
671                 prioq_remove(s->event->prepare, s, &s->prepare_index);
672
673         event = s->event;
674
675         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
676         s->event = NULL;
677         LIST_REMOVE(sources, event->sources, s);
678         event->n_sources--;
679
680         if (!s->floating)
681                 sd_event_unref(event);
682 }
683
684 static void source_free(sd_event_source *s) {
685         assert(s);
686
687         source_disconnect(s);
688         free(s);
689 }
690
691 static int source_set_pending(sd_event_source *s, bool b) {
692         int r;
693
694         assert(s);
695         assert(s->type != SOURCE_EXIT);
696
697         if (s->pending == b)
698                 return 0;
699
700         s->pending = b;
701
702         if (b) {
703                 s->pending_iteration = s->event->iteration;
704
705                 r = prioq_put(s->event->pending, s, &s->pending_index);
706                 if (r < 0) {
707                         s->pending = false;
708                         return r;
709                 }
710         } else
711                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
712
713         if (EVENT_SOURCE_IS_TIME(s->type)) {
714                 struct clock_data *d;
715
716                 d = event_get_clock_data(s->event, s->type);
717                 assert(d);
718
719                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
720                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
721                 d->needs_rearm = true;
722         }
723
724         return 0;
725 }
726
727 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
728         sd_event_source *s;
729
730         assert(e);
731
732         s = new0(sd_event_source, 1);
733         if (!s)
734                 return NULL;
735
736         s->n_ref = 1;
737         s->event = e;
738         s->floating = floating;
739         s->type = type;
740         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
741
742         if (!floating)
743                 sd_event_ref(e);
744
745         LIST_PREPEND(sources, e->sources, s);
746         e->n_sources ++;
747
748         return s;
749 }
750
751 _public_ int sd_event_add_io(
752                 sd_event *e,
753                 sd_event_source **ret,
754                 int fd,
755                 uint32_t events,
756                 sd_event_io_handler_t callback,
757                 void *userdata) {
758
759         sd_event_source *s;
760         int r;
761
762         assert_return(e, -EINVAL);
763         assert_return(fd >= 0, -EINVAL);
764         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
765         assert_return(callback, -EINVAL);
766         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
767         assert_return(!event_pid_changed(e), -ECHILD);
768
769         s = source_new(e, !ret, SOURCE_IO);
770         if (!s)
771                 return -ENOMEM;
772
773         s->io.fd = fd;
774         s->io.events = events;
775         s->io.callback = callback;
776         s->userdata = userdata;
777         s->enabled = SD_EVENT_ON;
778
779         r = source_io_register(s, s->enabled, events);
780         if (r < 0) {
781                 source_free(s);
782                 return r;
783         }
784
785         if (ret)
786                 *ret = s;
787
788         return 0;
789 }
790
791 static void initialize_perturb(sd_event *e) {
792         sd_id128_t bootid = {};
793
794         /* When we sleep for longer, we try to realign the wakeup to
795            the same time wihtin each minute/second/250ms, so that
796            events all across the system can be coalesced into a single
797            CPU wakeup. However, let's take some system-specific
798            randomness for this value, so that in a network of systems
799            with synced clocks timer events are distributed a
800            bit. Here, we calculate a perturbation usec offset from the
801            boot ID. */
802
803         if (_likely_(e->perturb != USEC_INFINITY))
804                 return;
805
806         if (sd_id128_get_boot(&bootid) >= 0)
807                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
808 }
809
810 static int event_setup_timer_fd(
811                 sd_event *e,
812                 struct clock_data *d,
813                 clockid_t clock) {
814
815         struct epoll_event ev = {};
816         int r, fd;
817
818         assert(e);
819         assert(d);
820
821         if (_likely_(d->fd >= 0))
822                 return 0;
823
824         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
825         if (fd < 0)
826                 return -errno;
827
828         ev.events = EPOLLIN;
829         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
830
831         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
832         if (r < 0) {
833                 safe_close(fd);
834                 return -errno;
835         }
836
837         d->fd = fd;
838         return 0;
839 }
840
841 _public_ int sd_event_add_time(
842                 sd_event *e,
843                 sd_event_source **ret,
844                 clockid_t clock,
845                 uint64_t usec,
846                 uint64_t accuracy,
847                 sd_event_time_handler_t callback,
848                 void *userdata) {
849
850         EventSourceType type;
851         sd_event_source *s;
852         struct clock_data *d;
853         int r;
854
855         assert_return(e, -EINVAL);
856         assert_return(usec != (uint64_t) -1, -EINVAL);
857         assert_return(accuracy != (uint64_t) -1, -EINVAL);
858         assert_return(callback, -EINVAL);
859         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
860         assert_return(!event_pid_changed(e), -ECHILD);
861
862         type = clock_to_event_source_type(clock);
863         assert_return(type >= 0, -ENOTSUP);
864
865         d = event_get_clock_data(e, type);
866         assert(d);
867
868         if (!d->earliest) {
869                 d->earliest = prioq_new(earliest_time_prioq_compare);
870                 if (!d->earliest)
871                         return -ENOMEM;
872         }
873
874         if (!d->latest) {
875                 d->latest = prioq_new(latest_time_prioq_compare);
876                 if (!d->latest)
877                         return -ENOMEM;
878         }
879
880         if (d->fd < 0) {
881                 r = event_setup_timer_fd(e, d, clock);
882                 if (r < 0)
883                         return r;
884         }
885
886         s = source_new(e, !ret, type);
887         if (!s)
888                 return -ENOMEM;
889
890         s->time.next = usec;
891         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
892         s->time.callback = callback;
893         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
894         s->userdata = userdata;
895         s->enabled = SD_EVENT_ONESHOT;
896
897         d->needs_rearm = true;
898
899         r = prioq_put(d->earliest, s, &s->time.earliest_index);
900         if (r < 0)
901                 goto fail;
902
903         r = prioq_put(d->latest, s, &s->time.latest_index);
904         if (r < 0)
905                 goto fail;
906
907         if (ret)
908                 *ret = s;
909
910         return 0;
911
912 fail:
913         source_free(s);
914         return r;
915 }
916
917 static int event_update_signal_fd(sd_event *e) {
918         struct epoll_event ev = {};
919         bool add_to_epoll;
920         int r;
921
922         assert(e);
923
924         add_to_epoll = e->signal_fd < 0;
925
926         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
927         if (r < 0)
928                 return -errno;
929
930         e->signal_fd = r;
931
932         if (!add_to_epoll)
933                 return 0;
934
935         ev.events = EPOLLIN;
936         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
937
938         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
939         if (r < 0) {
940                 e->signal_fd = safe_close(e->signal_fd);
941                 return -errno;
942         }
943
944         return 0;
945 }
946
947 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
948         assert(s);
949
950         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
951 }
952
953 _public_ int sd_event_add_signal(
954                 sd_event *e,
955                 sd_event_source **ret,
956                 int sig,
957                 sd_event_signal_handler_t callback,
958                 void *userdata) {
959
960         sd_event_source *s;
961         sigset_t ss;
962         int r;
963
964         assert_return(e, -EINVAL);
965         assert_return(sig > 0, -EINVAL);
966         assert_return(sig < _NSIG, -EINVAL);
967         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
968         assert_return(!event_pid_changed(e), -ECHILD);
969
970         if (!callback)
971                 callback = signal_exit_callback;
972
973         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
974         if (r < 0)
975                 return -errno;
976
977         if (!sigismember(&ss, sig))
978                 return -EBUSY;
979
980         if (!e->signal_sources) {
981                 e->signal_sources = new0(sd_event_source*, _NSIG);
982                 if (!e->signal_sources)
983                         return -ENOMEM;
984         } else if (e->signal_sources[sig])
985                 return -EBUSY;
986
987         s = source_new(e, !ret, SOURCE_SIGNAL);
988         if (!s)
989                 return -ENOMEM;
990
991         s->signal.sig = sig;
992         s->signal.callback = callback;
993         s->userdata = userdata;
994         s->enabled = SD_EVENT_ON;
995
996         e->signal_sources[sig] = s;
997         assert_se(sigaddset(&e->sigset, sig) == 0);
998
999         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
1000                 r = event_update_signal_fd(e);
1001                 if (r < 0) {
1002                         source_free(s);
1003                         return r;
1004                 }
1005         }
1006
1007         if (ret)
1008                 *ret = s;
1009
1010         return 0;
1011 }
1012
1013 _public_ int sd_event_add_child(
1014                 sd_event *e,
1015                 sd_event_source **ret,
1016                 pid_t pid,
1017                 int options,
1018                 sd_event_child_handler_t callback,
1019                 void *userdata) {
1020
1021         sd_event_source *s;
1022         int r;
1023
1024         assert_return(e, -EINVAL);
1025         assert_return(pid > 1, -EINVAL);
1026         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1027         assert_return(options != 0, -EINVAL);
1028         assert_return(callback, -EINVAL);
1029         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1030         assert_return(!event_pid_changed(e), -ECHILD);
1031
1032         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
1033         if (r < 0)
1034                 return r;
1035
1036         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1037                 return -EBUSY;
1038
1039         s = source_new(e, !ret, SOURCE_CHILD);
1040         if (!s)
1041                 return -ENOMEM;
1042
1043         s->child.pid = pid;
1044         s->child.options = options;
1045         s->child.callback = callback;
1046         s->userdata = userdata;
1047         s->enabled = SD_EVENT_ONESHOT;
1048
1049         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1050         if (r < 0) {
1051                 source_free(s);
1052                 return r;
1053         }
1054
1055         e->n_enabled_child_sources ++;
1056
1057         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1058
1059         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1060                 r = event_update_signal_fd(e);
1061                 if (r < 0) {
1062                         source_free(s);
1063                         return r;
1064                 }
1065         }
1066
1067         e->need_process_child = true;
1068
1069         if (ret)
1070                 *ret = s;
1071
1072         return 0;
1073 }
1074
1075 _public_ int sd_event_add_defer(
1076                 sd_event *e,
1077                 sd_event_source **ret,
1078                 sd_event_handler_t callback,
1079                 void *userdata) {
1080
1081         sd_event_source *s;
1082         int r;
1083
1084         assert_return(e, -EINVAL);
1085         assert_return(callback, -EINVAL);
1086         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1087         assert_return(!event_pid_changed(e), -ECHILD);
1088
1089         s = source_new(e, !ret, SOURCE_DEFER);
1090         if (!s)
1091                 return -ENOMEM;
1092
1093         s->defer.callback = callback;
1094         s->userdata = userdata;
1095         s->enabled = SD_EVENT_ONESHOT;
1096
1097         r = source_set_pending(s, true);
1098         if (r < 0) {
1099                 source_free(s);
1100                 return r;
1101         }
1102
1103         if (ret)
1104                 *ret = s;
1105
1106         return 0;
1107 }
1108
1109 _public_ int sd_event_add_post(
1110                 sd_event *e,
1111                 sd_event_source **ret,
1112                 sd_event_handler_t callback,
1113                 void *userdata) {
1114
1115         sd_event_source *s;
1116         int r;
1117
1118         assert_return(e, -EINVAL);
1119         assert_return(callback, -EINVAL);
1120         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1121         assert_return(!event_pid_changed(e), -ECHILD);
1122
1123         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1124         if (r < 0)
1125                 return r;
1126
1127         s = source_new(e, !ret, SOURCE_POST);
1128         if (!s)
1129                 return -ENOMEM;
1130
1131         s->post.callback = callback;
1132         s->userdata = userdata;
1133         s->enabled = SD_EVENT_ON;
1134
1135         r = set_put(e->post_sources, s);
1136         if (r < 0) {
1137                 source_free(s);
1138                 return r;
1139         }
1140
1141         if (ret)
1142                 *ret = s;
1143
1144         return 0;
1145 }
1146
1147 _public_ int sd_event_add_exit(
1148                 sd_event *e,
1149                 sd_event_source **ret,
1150                 sd_event_handler_t callback,
1151                 void *userdata) {
1152
1153         sd_event_source *s;
1154         int r;
1155
1156         assert_return(e, -EINVAL);
1157         assert_return(callback, -EINVAL);
1158         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1159         assert_return(!event_pid_changed(e), -ECHILD);
1160
1161         if (!e->exit) {
1162                 e->exit = prioq_new(exit_prioq_compare);
1163                 if (!e->exit)
1164                         return -ENOMEM;
1165         }
1166
1167         s = source_new(e, !ret, SOURCE_EXIT);
1168         if (!s)
1169                 return -ENOMEM;
1170
1171         s->exit.callback = callback;
1172         s->userdata = userdata;
1173         s->exit.prioq_index = PRIOQ_IDX_NULL;
1174         s->enabled = SD_EVENT_ONESHOT;
1175
1176         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1177         if (r < 0) {
1178                 source_free(s);
1179                 return r;
1180         }
1181
1182         if (ret)
1183                 *ret = s;
1184
1185         return 0;
1186 }
1187
1188 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1189         assert_return(s, NULL);
1190
1191         assert(s->n_ref >= 1);
1192         s->n_ref++;
1193
1194         return s;
1195 }
1196
1197 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1198
1199         if (!s)
1200                 return NULL;
1201
1202         assert(s->n_ref >= 1);
1203         s->n_ref--;
1204
1205         if (s->n_ref <= 0) {
1206                 /* Here's a special hack: when we are called from a
1207                  * dispatch handler we won't free the event source
1208                  * immediately, but we will detach the fd from the
1209                  * epoll. This way it is safe for the caller to unref
1210                  * the event source and immediately close the fd, but
1211                  * we still retain a valid event source object after
1212                  * the callback. */
1213
1214                 if (s->dispatching) {
1215                         if (s->type == SOURCE_IO)
1216                                 source_io_unregister(s);
1217
1218                         source_disconnect(s);
1219                 } else
1220                         source_free(s);
1221         }
1222
1223         return NULL;
1224 }
1225
1226 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1227         assert_return(s, NULL);
1228
1229         return s->event;
1230 }
1231
1232 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1233         assert_return(s, -EINVAL);
1234         assert_return(s->type != SOURCE_EXIT, -EDOM);
1235         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1236         assert_return(!event_pid_changed(s->event), -ECHILD);
1237
1238         return s->pending;
1239 }
1240
1241 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1242         assert_return(s, -EINVAL);
1243         assert_return(s->type == SOURCE_IO, -EDOM);
1244         assert_return(!event_pid_changed(s->event), -ECHILD);
1245
1246         return s->io.fd;
1247 }
1248
1249 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1250         int r;
1251
1252         assert_return(s, -EINVAL);
1253         assert_return(fd >= 0, -EINVAL);
1254         assert_return(s->type == SOURCE_IO, -EDOM);
1255         assert_return(!event_pid_changed(s->event), -ECHILD);
1256
1257         if (s->io.fd == fd)
1258                 return 0;
1259
1260         if (s->enabled == SD_EVENT_OFF) {
1261                 s->io.fd = fd;
1262                 s->io.registered = false;
1263         } else {
1264                 int saved_fd;
1265
1266                 saved_fd = s->io.fd;
1267                 assert(s->io.registered);
1268
1269                 s->io.fd = fd;
1270                 s->io.registered = false;
1271
1272                 r = source_io_register(s, s->enabled, s->io.events);
1273                 if (r < 0) {
1274                         s->io.fd = saved_fd;
1275                         s->io.registered = true;
1276                         return r;
1277                 }
1278
1279                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1280         }
1281
1282         return 0;
1283 }
1284
1285 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1286         assert_return(s, -EINVAL);
1287         assert_return(events, -EINVAL);
1288         assert_return(s->type == SOURCE_IO, -EDOM);
1289         assert_return(!event_pid_changed(s->event), -ECHILD);
1290
1291         *events = s->io.events;
1292         return 0;
1293 }
1294
1295 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1296         int r;
1297
1298         assert_return(s, -EINVAL);
1299         assert_return(s->type == SOURCE_IO, -EDOM);
1300         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1301         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1302         assert_return(!event_pid_changed(s->event), -ECHILD);
1303
1304         /* edge-triggered updates are never skipped, so we can reset edges */
1305         if (s->io.events == events && !(events & EPOLLET))
1306                 return 0;
1307
1308         if (s->enabled != SD_EVENT_OFF) {
1309                 r = source_io_register(s, s->enabled, events);
1310                 if (r < 0)
1311                         return r;
1312         }
1313
1314         s->io.events = events;
1315         source_set_pending(s, false);
1316
1317         return 0;
1318 }
1319
1320 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1321         assert_return(s, -EINVAL);
1322         assert_return(revents, -EINVAL);
1323         assert_return(s->type == SOURCE_IO, -EDOM);
1324         assert_return(s->pending, -ENODATA);
1325         assert_return(!event_pid_changed(s->event), -ECHILD);
1326
1327         *revents = s->io.revents;
1328         return 0;
1329 }
1330
1331 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1332         assert_return(s, -EINVAL);
1333         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1334         assert_return(!event_pid_changed(s->event), -ECHILD);
1335
1336         return s->signal.sig;
1337 }
1338
1339 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1340         assert_return(s, -EINVAL);
1341         assert_return(!event_pid_changed(s->event), -ECHILD);
1342
1343         return s->priority;
1344 }
1345
1346 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1347         assert_return(s, -EINVAL);
1348         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1349         assert_return(!event_pid_changed(s->event), -ECHILD);
1350
1351         if (s->priority == priority)
1352                 return 0;
1353
1354         s->priority = priority;
1355
1356         if (s->pending)
1357                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1358
1359         if (s->prepare)
1360                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1361
1362         if (s->type == SOURCE_EXIT)
1363                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1364
1365         return 0;
1366 }
1367
1368 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1369         assert_return(s, -EINVAL);
1370         assert_return(m, -EINVAL);
1371         assert_return(!event_pid_changed(s->event), -ECHILD);
1372
1373         *m = s->enabled;
1374         return 0;
1375 }
1376
1377 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1378         int r;
1379
1380         assert_return(s, -EINVAL);
1381         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1382         assert_return(!event_pid_changed(s->event), -ECHILD);
1383
1384         /* If we are dead anyway, we are fine with turning off
1385          * sources, but everything else needs to fail. */
1386         if (s->event->state == SD_EVENT_FINISHED)
1387                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1388
1389         if (s->enabled == m)
1390                 return 0;
1391
1392         if (m == SD_EVENT_OFF) {
1393
1394                 switch (s->type) {
1395
1396                 case SOURCE_IO:
1397                         r = source_io_unregister(s);
1398                         if (r < 0)
1399                                 return r;
1400
1401                         s->enabled = m;
1402                         break;
1403
1404                 case SOURCE_TIME_REALTIME:
1405                 case SOURCE_TIME_BOOTTIME:
1406                 case SOURCE_TIME_MONOTONIC:
1407                 case SOURCE_TIME_REALTIME_ALARM:
1408                 case SOURCE_TIME_BOOTTIME_ALARM: {
1409                         struct clock_data *d;
1410
1411                         s->enabled = m;
1412                         d = event_get_clock_data(s->event, s->type);
1413                         assert(d);
1414
1415                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1416                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1417                         d->needs_rearm = true;
1418                         break;
1419                 }
1420
1421                 case SOURCE_SIGNAL:
1422                         s->enabled = m;
1423                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1424                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1425                                 event_update_signal_fd(s->event);
1426                         }
1427
1428                         break;
1429
1430                 case SOURCE_CHILD:
1431                         s->enabled = m;
1432
1433                         assert(s->event->n_enabled_child_sources > 0);
1434                         s->event->n_enabled_child_sources--;
1435
1436                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1437                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1438                                 event_update_signal_fd(s->event);
1439                         }
1440
1441                         break;
1442
1443                 case SOURCE_EXIT:
1444                         s->enabled = m;
1445                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1446                         break;
1447
1448                 case SOURCE_DEFER:
1449                 case SOURCE_POST:
1450                         s->enabled = m;
1451                         break;
1452
1453                 default:
1454                         assert_not_reached("Wut? I shouldn't exist.");
1455                 }
1456
1457         } else {
1458                 switch (s->type) {
1459
1460                 case SOURCE_IO:
1461                         r = source_io_register(s, m, s->io.events);
1462                         if (r < 0)
1463                                 return r;
1464
1465                         s->enabled = m;
1466                         break;
1467
1468                 case SOURCE_TIME_REALTIME:
1469                 case SOURCE_TIME_BOOTTIME:
1470                 case SOURCE_TIME_MONOTONIC:
1471                 case SOURCE_TIME_REALTIME_ALARM:
1472                 case SOURCE_TIME_BOOTTIME_ALARM: {
1473                         struct clock_data *d;
1474
1475                         s->enabled = m;
1476                         d = event_get_clock_data(s->event, s->type);
1477                         assert(d);
1478
1479                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1480                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1481                         d->needs_rearm = true;
1482                         break;
1483                 }
1484
1485                 case SOURCE_SIGNAL:
1486                         s->enabled = m;
1487
1488                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1489                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1490                                 event_update_signal_fd(s->event);
1491                         }
1492                         break;
1493
1494                 case SOURCE_CHILD:
1495                         if (s->enabled == SD_EVENT_OFF) {
1496                                 s->event->n_enabled_child_sources++;
1497
1498                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1499                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1500                                         event_update_signal_fd(s->event);
1501                                 }
1502                         }
1503
1504                         s->enabled = m;
1505                         break;
1506
1507                 case SOURCE_EXIT:
1508                         s->enabled = m;
1509                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1510                         break;
1511
1512                 case SOURCE_DEFER:
1513                 case SOURCE_POST:
1514                         s->enabled = m;
1515                         break;
1516
1517                 default:
1518                         assert_not_reached("Wut? I shouldn't exist.");
1519                 }
1520         }
1521
1522         if (s->pending)
1523                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1524
1525         if (s->prepare)
1526                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1527
1528         return 0;
1529 }
1530
1531 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1532         assert_return(s, -EINVAL);
1533         assert_return(usec, -EINVAL);
1534         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1535         assert_return(!event_pid_changed(s->event), -ECHILD);
1536
1537         *usec = s->time.next;
1538         return 0;
1539 }
1540
1541 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1542         struct clock_data *d;
1543
1544         assert_return(s, -EINVAL);
1545         assert_return(usec != (uint64_t) -1, -EINVAL);
1546         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1547         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1548         assert_return(!event_pid_changed(s->event), -ECHILD);
1549
1550         s->time.next = usec;
1551
1552         source_set_pending(s, false);
1553
1554         d = event_get_clock_data(s->event, s->type);
1555         assert(d);
1556
1557         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1558         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1559         d->needs_rearm = true;
1560
1561         return 0;
1562 }
1563
1564 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1565         assert_return(s, -EINVAL);
1566         assert_return(usec, -EINVAL);
1567         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1568         assert_return(!event_pid_changed(s->event), -ECHILD);
1569
1570         *usec = s->time.accuracy;
1571         return 0;
1572 }
1573
1574 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1575         struct clock_data *d;
1576
1577         assert_return(s, -EINVAL);
1578         assert_return(usec != (uint64_t) -1, -EINVAL);
1579         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1580         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1581         assert_return(!event_pid_changed(s->event), -ECHILD);
1582
1583         if (usec == 0)
1584                 usec = DEFAULT_ACCURACY_USEC;
1585
1586         s->time.accuracy = usec;
1587
1588         source_set_pending(s, false);
1589
1590         d = event_get_clock_data(s->event, s->type);
1591         assert(d);
1592
1593         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1594         d->needs_rearm = true;
1595
1596         return 0;
1597 }
1598
1599 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1600         assert_return(s, -EINVAL);
1601         assert_return(clock, -EINVAL);
1602         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1603         assert_return(!event_pid_changed(s->event), -ECHILD);
1604
1605         *clock = event_source_type_to_clock(s->type);
1606         return 0;
1607 }
1608
1609 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1610         assert_return(s, -EINVAL);
1611         assert_return(pid, -EINVAL);
1612         assert_return(s->type == SOURCE_CHILD, -EDOM);
1613         assert_return(!event_pid_changed(s->event), -ECHILD);
1614
1615         *pid = s->child.pid;
1616         return 0;
1617 }
1618
1619 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1620         int r;
1621
1622         assert_return(s, -EINVAL);
1623         assert_return(s->type != SOURCE_EXIT, -EDOM);
1624         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1625         assert_return(!event_pid_changed(s->event), -ECHILD);
1626
1627         if (s->prepare == callback)
1628                 return 0;
1629
1630         if (callback && s->prepare) {
1631                 s->prepare = callback;
1632                 return 0;
1633         }
1634
1635         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1636         if (r < 0)
1637                 return r;
1638
1639         s->prepare = callback;
1640
1641         if (callback) {
1642                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1643                 if (r < 0)
1644                         return r;
1645         } else
1646                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1647
1648         return 0;
1649 }
1650
1651 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1652         assert_return(s, NULL);
1653
1654         return s->userdata;
1655 }
1656
1657 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1658         void *ret;
1659
1660         assert_return(s, NULL);
1661
1662         ret = s->userdata;
1663         s->userdata = userdata;
1664
1665         return ret;
1666 }
1667
1668 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1669         usec_t c;
1670         assert(e);
1671         assert(a <= b);
1672
1673         if (a <= 0)
1674                 return 0;
1675
1676         if (b <= a + 1)
1677                 return a;
1678
1679         initialize_perturb(e);
1680
1681         /*
1682           Find a good time to wake up again between times a and b. We
1683           have two goals here:
1684
1685           a) We want to wake up as seldom as possible, hence prefer
1686              later times over earlier times.
1687
1688           b) But if we have to wake up, then let's make sure to
1689              dispatch as much as possible on the entire system.
1690
1691           We implement this by waking up everywhere at the same time
1692           within any given minute if we can, synchronised via the
1693           perturbation value determined from the boot ID. If we can't,
1694           then we try to find the same spot in every 10s, then 1s and
1695           then 250ms step. Otherwise, we pick the last possible time
1696           to wake up.
1697         */
1698
1699         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1700         if (c >= b) {
1701                 if (_unlikely_(c < USEC_PER_MINUTE))
1702                         return b;
1703
1704                 c -= USEC_PER_MINUTE;
1705         }
1706
1707         if (c >= a)
1708                 return c;
1709
1710         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1711         if (c >= b) {
1712                 if (_unlikely_(c < USEC_PER_SEC*10))
1713                         return b;
1714
1715                 c -= USEC_PER_SEC*10;
1716         }
1717
1718         if (c >= a)
1719                 return c;
1720
1721         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1722         if (c >= b) {
1723                 if (_unlikely_(c < USEC_PER_SEC))
1724                         return b;
1725
1726                 c -= USEC_PER_SEC;
1727         }
1728
1729         if (c >= a)
1730                 return c;
1731
1732         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1733         if (c >= b) {
1734                 if (_unlikely_(c < USEC_PER_MSEC*250))
1735                         return b;
1736
1737                 c -= USEC_PER_MSEC*250;
1738         }
1739
1740         if (c >= a)
1741                 return c;
1742
1743         return b;
1744 }
1745
1746 static int event_arm_timer(
1747                 sd_event *e,
1748                 struct clock_data *d) {
1749
1750         struct itimerspec its = {};
1751         sd_event_source *a, *b;
1752         usec_t t;
1753         int r;
1754
1755         assert(e);
1756         assert(d);
1757
1758         if (!d->needs_rearm)
1759                 return 0;
1760         else
1761                 d->needs_rearm = false;
1762
1763         a = prioq_peek(d->earliest);
1764         if (!a || a->enabled == SD_EVENT_OFF) {
1765
1766                 if (d->fd < 0)
1767                         return 0;
1768
1769                 if (d->next == USEC_INFINITY)
1770                         return 0;
1771
1772                 /* disarm */
1773                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1774                 if (r < 0)
1775                         return r;
1776
1777                 d->next = USEC_INFINITY;
1778                 return 0;
1779         }
1780
1781         b = prioq_peek(d->latest);
1782         assert_se(b && b->enabled != SD_EVENT_OFF);
1783
1784         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1785         if (d->next == t)
1786                 return 0;
1787
1788         assert_se(d->fd >= 0);
1789
1790         if (t == 0) {
1791                 /* We don' want to disarm here, just mean some time looooong ago. */
1792                 its.it_value.tv_sec = 0;
1793                 its.it_value.tv_nsec = 1;
1794         } else
1795                 timespec_store(&its.it_value, t);
1796
1797         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1798         if (r < 0)
1799                 return -errno;
1800
1801         d->next = t;
1802         return 0;
1803 }
1804
1805 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1806         assert(e);
1807         assert(s);
1808         assert(s->type == SOURCE_IO);
1809
1810         /* If the event source was already pending, we just OR in the
1811          * new revents, otherwise we reset the value. The ORing is
1812          * necessary to handle EPOLLONESHOT events properly where
1813          * readability might happen independently of writability, and
1814          * we need to keep track of both */
1815
1816         if (s->pending)
1817                 s->io.revents |= revents;
1818         else
1819                 s->io.revents = revents;
1820
1821         return source_set_pending(s, true);
1822 }
1823
1824 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1825         uint64_t x;
1826         ssize_t ss;
1827
1828         assert(e);
1829         assert(fd >= 0);
1830
1831         assert_return(events == EPOLLIN, -EIO);
1832
1833         ss = read(fd, &x, sizeof(x));
1834         if (ss < 0) {
1835                 if (errno == EAGAIN || errno == EINTR)
1836                         return 0;
1837
1838                 return -errno;
1839         }
1840
1841         if (_unlikely_(ss != sizeof(x)))
1842                 return -EIO;
1843
1844         if (next)
1845                 *next = USEC_INFINITY;
1846
1847         return 0;
1848 }
1849
1850 static int process_timer(
1851                 sd_event *e,
1852                 usec_t n,
1853                 struct clock_data *d) {
1854
1855         sd_event_source *s;
1856         int r;
1857
1858         assert(e);
1859         assert(d);
1860
1861         for (;;) {
1862                 s = prioq_peek(d->earliest);
1863                 if (!s ||
1864                     s->time.next > n ||
1865                     s->enabled == SD_EVENT_OFF ||
1866                     s->pending)
1867                         break;
1868
1869                 r = source_set_pending(s, true);
1870                 if (r < 0)
1871                         return r;
1872
1873                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1874                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1875                 d->needs_rearm = true;
1876         }
1877
1878         return 0;
1879 }
1880
1881 static int process_child(sd_event *e) {
1882         sd_event_source *s;
1883         Iterator i;
1884         int r;
1885
1886         assert(e);
1887
1888         e->need_process_child = false;
1889
1890         /*
1891            So, this is ugly. We iteratively invoke waitid() with P_PID
1892            + WNOHANG for each PID we wait for, instead of using
1893            P_ALL. This is because we only want to get child
1894            information of very specific child processes, and not all
1895            of them. We might not have processed the SIGCHLD even of a
1896            previous invocation and we don't want to maintain a
1897            unbounded *per-child* event queue, hence we really don't
1898            want anything flushed out of the kernel's queue that we
1899            don't care about. Since this is O(n) this means that if you
1900            have a lot of processes you probably want to handle SIGCHLD
1901            yourself.
1902
1903            We do not reap the children here (by using WNOWAIT), this
1904            is only done after the event source is dispatched so that
1905            the callback still sees the process as a zombie.
1906         */
1907
1908         HASHMAP_FOREACH(s, e->child_sources, i) {
1909                 assert(s->type == SOURCE_CHILD);
1910
1911                 if (s->pending)
1912                         continue;
1913
1914                 if (s->enabled == SD_EVENT_OFF)
1915                         continue;
1916
1917                 zero(s->child.siginfo);
1918                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1919                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1920                 if (r < 0)
1921                         return -errno;
1922
1923                 if (s->child.siginfo.si_pid != 0) {
1924                         bool zombie =
1925                                 s->child.siginfo.si_code == CLD_EXITED ||
1926                                 s->child.siginfo.si_code == CLD_KILLED ||
1927                                 s->child.siginfo.si_code == CLD_DUMPED;
1928
1929                         if (!zombie && (s->child.options & WEXITED)) {
1930                                 /* If the child isn't dead then let's
1931                                  * immediately remove the state change
1932                                  * from the queue, since there's no
1933                                  * benefit in leaving it queued */
1934
1935                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1936                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1937                         }
1938
1939                         r = source_set_pending(s, true);
1940                         if (r < 0)
1941                                 return r;
1942                 }
1943         }
1944
1945         return 0;
1946 }
1947
1948 static int process_signal(sd_event *e, uint32_t events) {
1949         bool read_one = false;
1950         int r;
1951
1952         assert(e);
1953
1954         assert_return(events == EPOLLIN, -EIO);
1955
1956         for (;;) {
1957                 struct signalfd_siginfo si;
1958                 ssize_t ss;
1959                 sd_event_source *s = NULL;
1960
1961                 ss = read(e->signal_fd, &si, sizeof(si));
1962                 if (ss < 0) {
1963                         if (errno == EAGAIN || errno == EINTR)
1964                                 return read_one;
1965
1966                         return -errno;
1967                 }
1968
1969                 if (_unlikely_(ss != sizeof(si)))
1970                         return -EIO;
1971
1972                 read_one = true;
1973
1974                 if (si.ssi_signo == SIGCHLD) {
1975                         r = process_child(e);
1976                         if (r < 0)
1977                                 return r;
1978                         if (r > 0)
1979                                 continue;
1980                 }
1981
1982                 if (e->signal_sources)
1983                         s = e->signal_sources[si.ssi_signo];
1984
1985                 if (!s)
1986                         continue;
1987
1988                 s->signal.siginfo = si;
1989                 r = source_set_pending(s, true);
1990                 if (r < 0)
1991                         return r;
1992         }
1993 }
1994
1995 static int source_dispatch(sd_event_source *s) {
1996         int r = 0;
1997
1998         assert(s);
1999         assert(s->pending || s->type == SOURCE_EXIT);
2000
2001         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2002                 r = source_set_pending(s, false);
2003                 if (r < 0)
2004                         return r;
2005         }
2006
2007         if (s->type != SOURCE_POST) {
2008                 sd_event_source *z;
2009                 Iterator i;
2010
2011                 /* If we execute a non-post source, let's mark all
2012                  * post sources as pending */
2013
2014                 SET_FOREACH(z, s->event->post_sources, i) {
2015                         if (z->enabled == SD_EVENT_OFF)
2016                                 continue;
2017
2018                         r = source_set_pending(z, true);
2019                         if (r < 0)
2020                                 return r;
2021                 }
2022         }
2023
2024         if (s->enabled == SD_EVENT_ONESHOT) {
2025                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2026                 if (r < 0)
2027                         return r;
2028         }
2029
2030         s->dispatching = true;
2031
2032         switch (s->type) {
2033
2034         case SOURCE_IO:
2035                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2036                 break;
2037
2038         case SOURCE_TIME_REALTIME:
2039         case SOURCE_TIME_BOOTTIME:
2040         case SOURCE_TIME_MONOTONIC:
2041         case SOURCE_TIME_REALTIME_ALARM:
2042         case SOURCE_TIME_BOOTTIME_ALARM:
2043                 r = s->time.callback(s, s->time.next, s->userdata);
2044                 break;
2045
2046         case SOURCE_SIGNAL:
2047                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2048                 break;
2049
2050         case SOURCE_CHILD: {
2051                 bool zombie;
2052
2053                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2054                          s->child.siginfo.si_code == CLD_KILLED ||
2055                          s->child.siginfo.si_code == CLD_DUMPED;
2056
2057                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2058
2059                 /* Now, reap the PID for good. */
2060                 if (zombie)
2061                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2062
2063                 break;
2064         }
2065
2066         case SOURCE_DEFER:
2067                 r = s->defer.callback(s, s->userdata);
2068                 break;
2069
2070         case SOURCE_POST:
2071                 r = s->post.callback(s, s->userdata);
2072                 break;
2073
2074         case SOURCE_EXIT:
2075                 r = s->exit.callback(s, s->userdata);
2076                 break;
2077
2078         case SOURCE_WATCHDOG:
2079         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2080         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2081                 assert_not_reached("Wut? I shouldn't exist.");
2082         }
2083
2084         s->dispatching = false;
2085
2086         if (r < 0)
2087                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2088
2089         if (s->n_ref == 0)
2090                 source_free(s);
2091         else if (r < 0)
2092                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2093
2094         return 1;
2095 }
2096
2097 static int event_prepare(sd_event *e) {
2098         int r;
2099
2100         assert(e);
2101
2102         for (;;) {
2103                 sd_event_source *s;
2104
2105                 s = prioq_peek(e->prepare);
2106                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2107                         break;
2108
2109                 s->prepare_iteration = e->iteration;
2110                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2111                 if (r < 0)
2112                         return r;
2113
2114                 assert(s->prepare);
2115
2116                 s->dispatching = true;
2117                 r = s->prepare(s, s->userdata);
2118                 s->dispatching = false;
2119
2120                 if (r < 0)
2121                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2122
2123                 if (s->n_ref == 0)
2124                         source_free(s);
2125                 else if (r < 0)
2126                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2127         }
2128
2129         return 0;
2130 }
2131
2132 static int dispatch_exit(sd_event *e) {
2133         sd_event_source *p;
2134         int r;
2135
2136         assert(e);
2137
2138         p = prioq_peek(e->exit);
2139         if (!p || p->enabled == SD_EVENT_OFF) {
2140                 e->state = SD_EVENT_FINISHED;
2141                 return 0;
2142         }
2143
2144         sd_event_ref(e);
2145         e->iteration++;
2146         e->state = SD_EVENT_EXITING;
2147
2148         r = source_dispatch(p);
2149
2150         e->state = SD_EVENT_PASSIVE;
2151         sd_event_unref(e);
2152
2153         return r;
2154 }
2155
2156 static sd_event_source* event_next_pending(sd_event *e) {
2157         sd_event_source *p;
2158
2159         assert(e);
2160
2161         p = prioq_peek(e->pending);
2162         if (!p)
2163                 return NULL;
2164
2165         if (p->enabled == SD_EVENT_OFF)
2166                 return NULL;
2167
2168         return p;
2169 }
2170
2171 static int arm_watchdog(sd_event *e) {
2172         struct itimerspec its = {};
2173         usec_t t;
2174         int r;
2175
2176         assert(e);
2177         assert(e->watchdog_fd >= 0);
2178
2179         t = sleep_between(e,
2180                           e->watchdog_last + (e->watchdog_period / 2),
2181                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2182
2183         timespec_store(&its.it_value, t);
2184
2185         /* Make sure we never set the watchdog to 0, which tells the
2186          * kernel to disable it. */
2187         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2188                 its.it_value.tv_nsec = 1;
2189
2190         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2191         if (r < 0)
2192                 return -errno;
2193
2194         return 0;
2195 }
2196
2197 static int process_watchdog(sd_event *e) {
2198         assert(e);
2199
2200         if (!e->watchdog)
2201                 return 0;
2202
2203         /* Don't notify watchdog too often */
2204         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2205                 return 0;
2206
2207         sd_notify(false, "WATCHDOG=1");
2208         e->watchdog_last = e->timestamp.monotonic;
2209
2210         return arm_watchdog(e);
2211 }
2212
2213 _public_ int sd_event_prepare(sd_event *e) {
2214         int r;
2215
2216         assert_return(e, -EINVAL);
2217         assert_return(!event_pid_changed(e), -ECHILD);
2218         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2219         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2220
2221         if (e->exit_requested)
2222                 goto pending;
2223
2224         e->iteration++;
2225
2226         r = event_prepare(e);
2227         if (r < 0)
2228                 return r;
2229
2230         r = event_arm_timer(e, &e->realtime);
2231         if (r < 0)
2232                 return r;
2233
2234         r = event_arm_timer(e, &e->boottime);
2235         if (r < 0)
2236                 return r;
2237
2238         r = event_arm_timer(e, &e->monotonic);
2239         if (r < 0)
2240                 return r;
2241
2242         r = event_arm_timer(e, &e->realtime_alarm);
2243         if (r < 0)
2244                 return r;
2245
2246         r = event_arm_timer(e, &e->boottime_alarm);
2247         if (r < 0)
2248                 return r;
2249
2250         if (event_next_pending(e) || e->need_process_child)
2251                 goto pending;
2252
2253         e->state = SD_EVENT_PREPARED;
2254
2255         return 0;
2256
2257 pending:
2258         e->state = SD_EVENT_PREPARED;
2259         return sd_event_wait(e, 0);
2260 }
2261
2262 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2263         struct epoll_event *ev_queue;
2264         unsigned ev_queue_max;
2265         int r, m, i;
2266
2267         assert_return(e, -EINVAL);
2268         assert_return(!event_pid_changed(e), -ECHILD);
2269         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2270         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2271
2272         if (e->exit_requested) {
2273                 e->state = SD_EVENT_PENDING;
2274                 return 1;
2275         }
2276
2277         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2278         ev_queue = newa(struct epoll_event, ev_queue_max);
2279
2280         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2281                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2282         if (m < 0) {
2283                 if (errno == EINTR) {
2284                         e->state = SD_EVENT_PENDING;
2285                         return 1;
2286                 }
2287
2288                 r = -errno;
2289
2290                 goto finish;
2291         }
2292
2293         dual_timestamp_get(&e->timestamp);
2294         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2295
2296         for (i = 0; i < m; i++) {
2297
2298                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2299                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2300                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2301                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2302                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2303                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2304                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2305                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2306                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2307                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2308                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2309                         r = process_signal(e, ev_queue[i].events);
2310                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2311                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2312                 else
2313                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2314
2315                 if (r < 0)
2316                         goto finish;
2317         }
2318
2319         r = process_watchdog(e);
2320         if (r < 0)
2321                 goto finish;
2322
2323         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2324         if (r < 0)
2325                 goto finish;
2326
2327         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2328         if (r < 0)
2329                 goto finish;
2330
2331         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2332         if (r < 0)
2333                 goto finish;
2334
2335         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2336         if (r < 0)
2337                 goto finish;
2338
2339         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2340         if (r < 0)
2341                 goto finish;
2342
2343         if (e->need_process_child) {
2344                 r = process_child(e);
2345                 if (r < 0)
2346                         goto finish;
2347         }
2348
2349         if (event_next_pending(e)) {
2350                 e->state = SD_EVENT_PENDING;
2351
2352                 return 1;
2353         }
2354
2355         r = 0;
2356
2357 finish:
2358         e->state = SD_EVENT_PASSIVE;
2359
2360         return r;
2361 }
2362
2363 _public_ int sd_event_dispatch(sd_event *e) {
2364         sd_event_source *p;
2365         int r;
2366
2367         assert_return(e, -EINVAL);
2368         assert_return(!event_pid_changed(e), -ECHILD);
2369         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2370         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2371
2372         if (e->exit_requested)
2373                 return dispatch_exit(e);
2374
2375         p = event_next_pending(e);
2376         if (p) {
2377                 sd_event_ref(e);
2378
2379                 e->state = SD_EVENT_RUNNING;
2380                 r = source_dispatch(p);
2381                 e->state = SD_EVENT_PASSIVE;
2382
2383                 sd_event_unref(e);
2384
2385                 return r;
2386         }
2387
2388         e->state = SD_EVENT_PASSIVE;
2389
2390         return 1;
2391 }
2392
2393 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2394         int r;
2395
2396         assert_return(e, -EINVAL);
2397         assert_return(!event_pid_changed(e), -ECHILD);
2398         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2399         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2400
2401         r = sd_event_prepare(e);
2402         if (r > 0)
2403                 return sd_event_dispatch(e);
2404         else if (r < 0)
2405                 return r;
2406
2407         r = sd_event_wait(e, timeout);
2408         if (r > 0)
2409                 return sd_event_dispatch(e);
2410         else
2411                 return r;
2412 }
2413
2414 _public_ int sd_event_loop(sd_event *e) {
2415         int r;
2416
2417         assert_return(e, -EINVAL);
2418         assert_return(!event_pid_changed(e), -ECHILD);
2419         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2420
2421         sd_event_ref(e);
2422
2423         while (e->state != SD_EVENT_FINISHED) {
2424                 r = sd_event_run(e, (uint64_t) -1);
2425                 if (r < 0)
2426                         goto finish;
2427         }
2428
2429         r = e->exit_code;
2430
2431 finish:
2432         sd_event_unref(e);
2433         return r;
2434 }
2435
2436 _public_ int sd_event_get_fd(sd_event *e) {
2437
2438         assert_return(e, -EINVAL);
2439         assert_return(!event_pid_changed(e), -ECHILD);
2440
2441         return e->epoll_fd;
2442 }
2443
2444 _public_ int sd_event_get_state(sd_event *e) {
2445         assert_return(e, -EINVAL);
2446         assert_return(!event_pid_changed(e), -ECHILD);
2447
2448         return e->state;
2449 }
2450
2451 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2452         assert_return(e, -EINVAL);
2453         assert_return(code, -EINVAL);
2454         assert_return(!event_pid_changed(e), -ECHILD);
2455
2456         if (!e->exit_requested)
2457                 return -ENODATA;
2458
2459         *code = e->exit_code;
2460         return 0;
2461 }
2462
2463 _public_ int sd_event_exit(sd_event *e, int code) {
2464         assert_return(e, -EINVAL);
2465         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2466         assert_return(!event_pid_changed(e), -ECHILD);
2467
2468         e->exit_requested = true;
2469         e->exit_code = code;
2470
2471         return 0;
2472 }
2473
2474 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2475         assert_return(e, -EINVAL);
2476         assert_return(usec, -EINVAL);
2477         assert_return(!event_pid_changed(e), -ECHILD);
2478
2479         /* If we haven't run yet, just get the actual time */
2480         if (!dual_timestamp_is_set(&e->timestamp))
2481                 return -ENODATA;
2482
2483         switch (clock) {
2484
2485         case CLOCK_REALTIME:
2486         case CLOCK_REALTIME_ALARM:
2487                 *usec = e->timestamp.realtime;
2488                 break;
2489
2490         case CLOCK_MONOTONIC:
2491                 *usec = e->timestamp.monotonic;
2492                 break;
2493
2494         case CLOCK_BOOTTIME:
2495         case CLOCK_BOOTTIME_ALARM:
2496                 *usec = e->timestamp_boottime;
2497                 break;
2498         }
2499
2500         return 0;
2501 }
2502
2503 _public_ int sd_event_default(sd_event **ret) {
2504
2505         static thread_local sd_event *default_event = NULL;
2506         sd_event *e = NULL;
2507         int r;
2508
2509         if (!ret)
2510                 return !!default_event;
2511
2512         if (default_event) {
2513                 *ret = sd_event_ref(default_event);
2514                 return 0;
2515         }
2516
2517         r = sd_event_new(&e);
2518         if (r < 0)
2519                 return r;
2520
2521         e->default_event_ptr = &default_event;
2522         e->tid = gettid();
2523         default_event = e;
2524
2525         *ret = e;
2526         return 1;
2527 }
2528
2529 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2530         assert_return(e, -EINVAL);
2531         assert_return(tid, -EINVAL);
2532         assert_return(!event_pid_changed(e), -ECHILD);
2533
2534         if (e->tid != 0) {
2535                 *tid = e->tid;
2536                 return 0;
2537         }
2538
2539         return -ENXIO;
2540 }
2541
2542 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2543         int r;
2544
2545         assert_return(e, -EINVAL);
2546         assert_return(!event_pid_changed(e), -ECHILD);
2547
2548         if (e->watchdog == !!b)
2549                 return e->watchdog;
2550
2551         if (b) {
2552                 struct epoll_event ev = {};
2553
2554                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2555                 if (r <= 0)
2556                         return r;
2557
2558                 /* Issue first ping immediately */
2559                 sd_notify(false, "WATCHDOG=1");
2560                 e->watchdog_last = now(CLOCK_MONOTONIC);
2561
2562                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2563                 if (e->watchdog_fd < 0)
2564                         return -errno;
2565
2566                 r = arm_watchdog(e);
2567                 if (r < 0)
2568                         goto fail;
2569
2570                 ev.events = EPOLLIN;
2571                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2572
2573                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2574                 if (r < 0) {
2575                         r = -errno;
2576                         goto fail;
2577                 }
2578
2579         } else {
2580                 if (e->watchdog_fd >= 0) {
2581                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2582                         e->watchdog_fd = safe_close(e->watchdog_fd);
2583                 }
2584         }
2585
2586         e->watchdog = !!b;
2587         return e->watchdog;
2588
2589 fail:
2590         e->watchdog_fd = safe_close(e->watchdog_fd);
2591         return r;
2592 }
2593
2594 _public_ int sd_event_get_watchdog(sd_event *e) {
2595         assert_return(e, -EINVAL);
2596         assert_return(!event_pid_changed(e), -ECHILD);
2597
2598         return e->watchdog;
2599 }