chiark / gitweb /
sd-event: allow naming event sources
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *name;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static void source_disconnect(sd_event_source *s) {
594         sd_event *event;
595
596         assert(s);
597
598         if (!s->event)
599                 return;
600
601         assert(s->event->n_sources > 0);
602
603         switch (s->type) {
604
605         case SOURCE_IO:
606                 if (s->io.fd >= 0)
607                         source_io_unregister(s);
608
609                 break;
610
611         case SOURCE_TIME_REALTIME:
612         case SOURCE_TIME_BOOTTIME:
613         case SOURCE_TIME_MONOTONIC:
614         case SOURCE_TIME_REALTIME_ALARM:
615         case SOURCE_TIME_BOOTTIME_ALARM: {
616                 struct clock_data *d;
617
618                 d = event_get_clock_data(s->event, s->type);
619                 assert(d);
620
621                 prioq_remove(d->earliest, s, &s->time.earliest_index);
622                 prioq_remove(d->latest, s, &s->time.latest_index);
623                 d->needs_rearm = true;
624                 break;
625         }
626
627         case SOURCE_SIGNAL:
628                 if (s->signal.sig > 0) {
629                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
630                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
631
632                         if (s->event->signal_sources)
633                                 s->event->signal_sources[s->signal.sig] = NULL;
634                 }
635
636                 break;
637
638         case SOURCE_CHILD:
639                 if (s->child.pid > 0) {
640                         if (s->enabled != SD_EVENT_OFF) {
641                                 assert(s->event->n_enabled_child_sources > 0);
642                                 s->event->n_enabled_child_sources--;
643                         }
644
645                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
646                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
647
648                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
649                 }
650
651                 break;
652
653         case SOURCE_DEFER:
654                 /* nothing */
655                 break;
656
657         case SOURCE_POST:
658                 set_remove(s->event->post_sources, s);
659                 break;
660
661         case SOURCE_EXIT:
662                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
663                 break;
664
665         default:
666                 assert_not_reached("Wut? I shouldn't exist.");
667         }
668
669         if (s->pending)
670                 prioq_remove(s->event->pending, s, &s->pending_index);
671
672         if (s->prepare)
673                 prioq_remove(s->event->prepare, s, &s->prepare_index);
674
675         event = s->event;
676
677         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
678         s->event = NULL;
679         LIST_REMOVE(sources, event->sources, s);
680         event->n_sources--;
681
682         if (!s->floating)
683                 sd_event_unref(event);
684 }
685
686 static void source_free(sd_event_source *s) {
687         assert(s);
688
689         source_disconnect(s);
690         free(s->name);
691         free(s);
692 }
693
694 static int source_set_pending(sd_event_source *s, bool b) {
695         int r;
696
697         assert(s);
698         assert(s->type != SOURCE_EXIT);
699
700         if (s->pending == b)
701                 return 0;
702
703         s->pending = b;
704
705         if (b) {
706                 s->pending_iteration = s->event->iteration;
707
708                 r = prioq_put(s->event->pending, s, &s->pending_index);
709                 if (r < 0) {
710                         s->pending = false;
711                         return r;
712                 }
713         } else
714                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
715
716         if (EVENT_SOURCE_IS_TIME(s->type)) {
717                 struct clock_data *d;
718
719                 d = event_get_clock_data(s->event, s->type);
720                 assert(d);
721
722                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
723                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
724                 d->needs_rearm = true;
725         }
726
727         return 0;
728 }
729
730 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
731         sd_event_source *s;
732
733         assert(e);
734
735         s = new0(sd_event_source, 1);
736         if (!s)
737                 return NULL;
738
739         s->n_ref = 1;
740         s->event = e;
741         s->floating = floating;
742         s->type = type;
743         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
744
745         if (!floating)
746                 sd_event_ref(e);
747
748         LIST_PREPEND(sources, e->sources, s);
749         e->n_sources ++;
750
751         return s;
752 }
753
754 _public_ int sd_event_add_io(
755                 sd_event *e,
756                 sd_event_source **ret,
757                 int fd,
758                 uint32_t events,
759                 sd_event_io_handler_t callback,
760                 void *userdata) {
761
762         sd_event_source *s;
763         int r;
764
765         assert_return(e, -EINVAL);
766         assert_return(fd >= 0, -EINVAL);
767         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
768         assert_return(callback, -EINVAL);
769         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
770         assert_return(!event_pid_changed(e), -ECHILD);
771
772         s = source_new(e, !ret, SOURCE_IO);
773         if (!s)
774                 return -ENOMEM;
775
776         s->io.fd = fd;
777         s->io.events = events;
778         s->io.callback = callback;
779         s->userdata = userdata;
780         s->enabled = SD_EVENT_ON;
781
782         r = source_io_register(s, s->enabled, events);
783         if (r < 0) {
784                 source_free(s);
785                 return r;
786         }
787
788         if (ret)
789                 *ret = s;
790
791         return 0;
792 }
793
794 static void initialize_perturb(sd_event *e) {
795         sd_id128_t bootid = {};
796
797         /* When we sleep for longer, we try to realign the wakeup to
798            the same time wihtin each minute/second/250ms, so that
799            events all across the system can be coalesced into a single
800            CPU wakeup. However, let's take some system-specific
801            randomness for this value, so that in a network of systems
802            with synced clocks timer events are distributed a
803            bit. Here, we calculate a perturbation usec offset from the
804            boot ID. */
805
806         if (_likely_(e->perturb != USEC_INFINITY))
807                 return;
808
809         if (sd_id128_get_boot(&bootid) >= 0)
810                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
811 }
812
813 static int event_setup_timer_fd(
814                 sd_event *e,
815                 struct clock_data *d,
816                 clockid_t clock) {
817
818         struct epoll_event ev = {};
819         int r, fd;
820
821         assert(e);
822         assert(d);
823
824         if (_likely_(d->fd >= 0))
825                 return 0;
826
827         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
828         if (fd < 0)
829                 return -errno;
830
831         ev.events = EPOLLIN;
832         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
833
834         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
835         if (r < 0) {
836                 safe_close(fd);
837                 return -errno;
838         }
839
840         d->fd = fd;
841         return 0;
842 }
843
844 _public_ int sd_event_add_time(
845                 sd_event *e,
846                 sd_event_source **ret,
847                 clockid_t clock,
848                 uint64_t usec,
849                 uint64_t accuracy,
850                 sd_event_time_handler_t callback,
851                 void *userdata) {
852
853         EventSourceType type;
854         sd_event_source *s;
855         struct clock_data *d;
856         int r;
857
858         assert_return(e, -EINVAL);
859         assert_return(usec != (uint64_t) -1, -EINVAL);
860         assert_return(accuracy != (uint64_t) -1, -EINVAL);
861         assert_return(callback, -EINVAL);
862         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
863         assert_return(!event_pid_changed(e), -ECHILD);
864
865         type = clock_to_event_source_type(clock);
866         assert_return(type >= 0, -ENOTSUP);
867
868         d = event_get_clock_data(e, type);
869         assert(d);
870
871         if (!d->earliest) {
872                 d->earliest = prioq_new(earliest_time_prioq_compare);
873                 if (!d->earliest)
874                         return -ENOMEM;
875         }
876
877         if (!d->latest) {
878                 d->latest = prioq_new(latest_time_prioq_compare);
879                 if (!d->latest)
880                         return -ENOMEM;
881         }
882
883         if (d->fd < 0) {
884                 r = event_setup_timer_fd(e, d, clock);
885                 if (r < 0)
886                         return r;
887         }
888
889         s = source_new(e, !ret, type);
890         if (!s)
891                 return -ENOMEM;
892
893         s->time.next = usec;
894         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
895         s->time.callback = callback;
896         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
897         s->userdata = userdata;
898         s->enabled = SD_EVENT_ONESHOT;
899
900         d->needs_rearm = true;
901
902         r = prioq_put(d->earliest, s, &s->time.earliest_index);
903         if (r < 0)
904                 goto fail;
905
906         r = prioq_put(d->latest, s, &s->time.latest_index);
907         if (r < 0)
908                 goto fail;
909
910         if (ret)
911                 *ret = s;
912
913         return 0;
914
915 fail:
916         source_free(s);
917         return r;
918 }
919
920 static int event_update_signal_fd(sd_event *e) {
921         struct epoll_event ev = {};
922         bool add_to_epoll;
923         int r;
924
925         assert(e);
926
927         add_to_epoll = e->signal_fd < 0;
928
929         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
930         if (r < 0)
931                 return -errno;
932
933         e->signal_fd = r;
934
935         if (!add_to_epoll)
936                 return 0;
937
938         ev.events = EPOLLIN;
939         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
940
941         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
942         if (r < 0) {
943                 e->signal_fd = safe_close(e->signal_fd);
944                 return -errno;
945         }
946
947         return 0;
948 }
949
950 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
951         assert(s);
952
953         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
954 }
955
956 _public_ int sd_event_add_signal(
957                 sd_event *e,
958                 sd_event_source **ret,
959                 int sig,
960                 sd_event_signal_handler_t callback,
961                 void *userdata) {
962
963         sd_event_source *s;
964         sigset_t ss;
965         int r;
966
967         assert_return(e, -EINVAL);
968         assert_return(sig > 0, -EINVAL);
969         assert_return(sig < _NSIG, -EINVAL);
970         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
971         assert_return(!event_pid_changed(e), -ECHILD);
972
973         if (!callback)
974                 callback = signal_exit_callback;
975
976         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
977         if (r < 0)
978                 return -errno;
979
980         if (!sigismember(&ss, sig))
981                 return -EBUSY;
982
983         if (!e->signal_sources) {
984                 e->signal_sources = new0(sd_event_source*, _NSIG);
985                 if (!e->signal_sources)
986                         return -ENOMEM;
987         } else if (e->signal_sources[sig])
988                 return -EBUSY;
989
990         s = source_new(e, !ret, SOURCE_SIGNAL);
991         if (!s)
992                 return -ENOMEM;
993
994         s->signal.sig = sig;
995         s->signal.callback = callback;
996         s->userdata = userdata;
997         s->enabled = SD_EVENT_ON;
998
999         e->signal_sources[sig] = s;
1000         assert_se(sigaddset(&e->sigset, sig) == 0);
1001
1002         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
1003                 r = event_update_signal_fd(e);
1004                 if (r < 0) {
1005                         source_free(s);
1006                         return r;
1007                 }
1008         }
1009
1010         if (ret)
1011                 *ret = s;
1012
1013         return 0;
1014 }
1015
1016 _public_ int sd_event_add_child(
1017                 sd_event *e,
1018                 sd_event_source **ret,
1019                 pid_t pid,
1020                 int options,
1021                 sd_event_child_handler_t callback,
1022                 void *userdata) {
1023
1024         sd_event_source *s;
1025         int r;
1026
1027         assert_return(e, -EINVAL);
1028         assert_return(pid > 1, -EINVAL);
1029         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1030         assert_return(options != 0, -EINVAL);
1031         assert_return(callback, -EINVAL);
1032         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1033         assert_return(!event_pid_changed(e), -ECHILD);
1034
1035         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
1036         if (r < 0)
1037                 return r;
1038
1039         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1040                 return -EBUSY;
1041
1042         s = source_new(e, !ret, SOURCE_CHILD);
1043         if (!s)
1044                 return -ENOMEM;
1045
1046         s->child.pid = pid;
1047         s->child.options = options;
1048         s->child.callback = callback;
1049         s->userdata = userdata;
1050         s->enabled = SD_EVENT_ONESHOT;
1051
1052         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1053         if (r < 0) {
1054                 source_free(s);
1055                 return r;
1056         }
1057
1058         e->n_enabled_child_sources ++;
1059
1060         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1061
1062         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1063                 r = event_update_signal_fd(e);
1064                 if (r < 0) {
1065                         source_free(s);
1066                         return r;
1067                 }
1068         }
1069
1070         e->need_process_child = true;
1071
1072         if (ret)
1073                 *ret = s;
1074
1075         return 0;
1076 }
1077
1078 _public_ int sd_event_add_defer(
1079                 sd_event *e,
1080                 sd_event_source **ret,
1081                 sd_event_handler_t callback,
1082                 void *userdata) {
1083
1084         sd_event_source *s;
1085         int r;
1086
1087         assert_return(e, -EINVAL);
1088         assert_return(callback, -EINVAL);
1089         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1090         assert_return(!event_pid_changed(e), -ECHILD);
1091
1092         s = source_new(e, !ret, SOURCE_DEFER);
1093         if (!s)
1094                 return -ENOMEM;
1095
1096         s->defer.callback = callback;
1097         s->userdata = userdata;
1098         s->enabled = SD_EVENT_ONESHOT;
1099
1100         r = source_set_pending(s, true);
1101         if (r < 0) {
1102                 source_free(s);
1103                 return r;
1104         }
1105
1106         if (ret)
1107                 *ret = s;
1108
1109         return 0;
1110 }
1111
1112 _public_ int sd_event_add_post(
1113                 sd_event *e,
1114                 sd_event_source **ret,
1115                 sd_event_handler_t callback,
1116                 void *userdata) {
1117
1118         sd_event_source *s;
1119         int r;
1120
1121         assert_return(e, -EINVAL);
1122         assert_return(callback, -EINVAL);
1123         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1124         assert_return(!event_pid_changed(e), -ECHILD);
1125
1126         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1127         if (r < 0)
1128                 return r;
1129
1130         s = source_new(e, !ret, SOURCE_POST);
1131         if (!s)
1132                 return -ENOMEM;
1133
1134         s->post.callback = callback;
1135         s->userdata = userdata;
1136         s->enabled = SD_EVENT_ON;
1137
1138         r = set_put(e->post_sources, s);
1139         if (r < 0) {
1140                 source_free(s);
1141                 return r;
1142         }
1143
1144         if (ret)
1145                 *ret = s;
1146
1147         return 0;
1148 }
1149
1150 _public_ int sd_event_add_exit(
1151                 sd_event *e,
1152                 sd_event_source **ret,
1153                 sd_event_handler_t callback,
1154                 void *userdata) {
1155
1156         sd_event_source *s;
1157         int r;
1158
1159         assert_return(e, -EINVAL);
1160         assert_return(callback, -EINVAL);
1161         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1162         assert_return(!event_pid_changed(e), -ECHILD);
1163
1164         if (!e->exit) {
1165                 e->exit = prioq_new(exit_prioq_compare);
1166                 if (!e->exit)
1167                         return -ENOMEM;
1168         }
1169
1170         s = source_new(e, !ret, SOURCE_EXIT);
1171         if (!s)
1172                 return -ENOMEM;
1173
1174         s->exit.callback = callback;
1175         s->userdata = userdata;
1176         s->exit.prioq_index = PRIOQ_IDX_NULL;
1177         s->enabled = SD_EVENT_ONESHOT;
1178
1179         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1180         if (r < 0) {
1181                 source_free(s);
1182                 return r;
1183         }
1184
1185         if (ret)
1186                 *ret = s;
1187
1188         return 0;
1189 }
1190
1191 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1192         assert_return(s, NULL);
1193
1194         assert(s->n_ref >= 1);
1195         s->n_ref++;
1196
1197         return s;
1198 }
1199
1200 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1201
1202         if (!s)
1203                 return NULL;
1204
1205         assert(s->n_ref >= 1);
1206         s->n_ref--;
1207
1208         if (s->n_ref <= 0) {
1209                 /* Here's a special hack: when we are called from a
1210                  * dispatch handler we won't free the event source
1211                  * immediately, but we will detach the fd from the
1212                  * epoll. This way it is safe for the caller to unref
1213                  * the event source and immediately close the fd, but
1214                  * we still retain a valid event source object after
1215                  * the callback. */
1216
1217                 if (s->dispatching) {
1218                         if (s->type == SOURCE_IO)
1219                                 source_io_unregister(s);
1220
1221                         source_disconnect(s);
1222                 } else
1223                         source_free(s);
1224         }
1225
1226         return NULL;
1227 }
1228
1229 _public_ int sd_event_source_set_name(sd_event_source *s, const char *name) {
1230         char *new_name = NULL;
1231
1232         assert_return(s, -EINVAL);
1233
1234         if (name) {
1235                 new_name = strdup(name);
1236                 if (!new_name)
1237                         return -ENOMEM;
1238         }
1239
1240         free(s->name);
1241         s->name = new_name;
1242
1243         return 0;
1244 }
1245
1246 _public_ int sd_event_source_get_name(sd_event_source *s, const char **name) {
1247         assert_return(s, -EINVAL);
1248         assert_return(name, -EINVAL);
1249
1250         *name = s->name;
1251
1252         return 0;
1253 }
1254
1255 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1256         assert_return(s, NULL);
1257
1258         return s->event;
1259 }
1260
1261 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1262         assert_return(s, -EINVAL);
1263         assert_return(s->type != SOURCE_EXIT, -EDOM);
1264         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1265         assert_return(!event_pid_changed(s->event), -ECHILD);
1266
1267         return s->pending;
1268 }
1269
1270 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1271         assert_return(s, -EINVAL);
1272         assert_return(s->type == SOURCE_IO, -EDOM);
1273         assert_return(!event_pid_changed(s->event), -ECHILD);
1274
1275         return s->io.fd;
1276 }
1277
1278 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1279         int r;
1280
1281         assert_return(s, -EINVAL);
1282         assert_return(fd >= 0, -EINVAL);
1283         assert_return(s->type == SOURCE_IO, -EDOM);
1284         assert_return(!event_pid_changed(s->event), -ECHILD);
1285
1286         if (s->io.fd == fd)
1287                 return 0;
1288
1289         if (s->enabled == SD_EVENT_OFF) {
1290                 s->io.fd = fd;
1291                 s->io.registered = false;
1292         } else {
1293                 int saved_fd;
1294
1295                 saved_fd = s->io.fd;
1296                 assert(s->io.registered);
1297
1298                 s->io.fd = fd;
1299                 s->io.registered = false;
1300
1301                 r = source_io_register(s, s->enabled, s->io.events);
1302                 if (r < 0) {
1303                         s->io.fd = saved_fd;
1304                         s->io.registered = true;
1305                         return r;
1306                 }
1307
1308                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1309         }
1310
1311         return 0;
1312 }
1313
1314 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1315         assert_return(s, -EINVAL);
1316         assert_return(events, -EINVAL);
1317         assert_return(s->type == SOURCE_IO, -EDOM);
1318         assert_return(!event_pid_changed(s->event), -ECHILD);
1319
1320         *events = s->io.events;
1321         return 0;
1322 }
1323
1324 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1325         int r;
1326
1327         assert_return(s, -EINVAL);
1328         assert_return(s->type == SOURCE_IO, -EDOM);
1329         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1330         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1331         assert_return(!event_pid_changed(s->event), -ECHILD);
1332
1333         /* edge-triggered updates are never skipped, so we can reset edges */
1334         if (s->io.events == events && !(events & EPOLLET))
1335                 return 0;
1336
1337         if (s->enabled != SD_EVENT_OFF) {
1338                 r = source_io_register(s, s->enabled, events);
1339                 if (r < 0)
1340                         return r;
1341         }
1342
1343         s->io.events = events;
1344         source_set_pending(s, false);
1345
1346         return 0;
1347 }
1348
1349 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1350         assert_return(s, -EINVAL);
1351         assert_return(revents, -EINVAL);
1352         assert_return(s->type == SOURCE_IO, -EDOM);
1353         assert_return(s->pending, -ENODATA);
1354         assert_return(!event_pid_changed(s->event), -ECHILD);
1355
1356         *revents = s->io.revents;
1357         return 0;
1358 }
1359
1360 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1361         assert_return(s, -EINVAL);
1362         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1363         assert_return(!event_pid_changed(s->event), -ECHILD);
1364
1365         return s->signal.sig;
1366 }
1367
1368 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1369         assert_return(s, -EINVAL);
1370         assert_return(!event_pid_changed(s->event), -ECHILD);
1371
1372         return s->priority;
1373 }
1374
1375 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1376         assert_return(s, -EINVAL);
1377         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1378         assert_return(!event_pid_changed(s->event), -ECHILD);
1379
1380         if (s->priority == priority)
1381                 return 0;
1382
1383         s->priority = priority;
1384
1385         if (s->pending)
1386                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1387
1388         if (s->prepare)
1389                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1390
1391         if (s->type == SOURCE_EXIT)
1392                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1393
1394         return 0;
1395 }
1396
1397 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1398         assert_return(s, -EINVAL);
1399         assert_return(m, -EINVAL);
1400         assert_return(!event_pid_changed(s->event), -ECHILD);
1401
1402         *m = s->enabled;
1403         return 0;
1404 }
1405
1406 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1407         int r;
1408
1409         assert_return(s, -EINVAL);
1410         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1411         assert_return(!event_pid_changed(s->event), -ECHILD);
1412
1413         /* If we are dead anyway, we are fine with turning off
1414          * sources, but everything else needs to fail. */
1415         if (s->event->state == SD_EVENT_FINISHED)
1416                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1417
1418         if (s->enabled == m)
1419                 return 0;
1420
1421         if (m == SD_EVENT_OFF) {
1422
1423                 switch (s->type) {
1424
1425                 case SOURCE_IO:
1426                         r = source_io_unregister(s);
1427                         if (r < 0)
1428                                 return r;
1429
1430                         s->enabled = m;
1431                         break;
1432
1433                 case SOURCE_TIME_REALTIME:
1434                 case SOURCE_TIME_BOOTTIME:
1435                 case SOURCE_TIME_MONOTONIC:
1436                 case SOURCE_TIME_REALTIME_ALARM:
1437                 case SOURCE_TIME_BOOTTIME_ALARM: {
1438                         struct clock_data *d;
1439
1440                         s->enabled = m;
1441                         d = event_get_clock_data(s->event, s->type);
1442                         assert(d);
1443
1444                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1445                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1446                         d->needs_rearm = true;
1447                         break;
1448                 }
1449
1450                 case SOURCE_SIGNAL:
1451                         s->enabled = m;
1452                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1453                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1454                                 event_update_signal_fd(s->event);
1455                         }
1456
1457                         break;
1458
1459                 case SOURCE_CHILD:
1460                         s->enabled = m;
1461
1462                         assert(s->event->n_enabled_child_sources > 0);
1463                         s->event->n_enabled_child_sources--;
1464
1465                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1466                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1467                                 event_update_signal_fd(s->event);
1468                         }
1469
1470                         break;
1471
1472                 case SOURCE_EXIT:
1473                         s->enabled = m;
1474                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1475                         break;
1476
1477                 case SOURCE_DEFER:
1478                 case SOURCE_POST:
1479                         s->enabled = m;
1480                         break;
1481
1482                 default:
1483                         assert_not_reached("Wut? I shouldn't exist.");
1484                 }
1485
1486         } else {
1487                 switch (s->type) {
1488
1489                 case SOURCE_IO:
1490                         r = source_io_register(s, m, s->io.events);
1491                         if (r < 0)
1492                                 return r;
1493
1494                         s->enabled = m;
1495                         break;
1496
1497                 case SOURCE_TIME_REALTIME:
1498                 case SOURCE_TIME_BOOTTIME:
1499                 case SOURCE_TIME_MONOTONIC:
1500                 case SOURCE_TIME_REALTIME_ALARM:
1501                 case SOURCE_TIME_BOOTTIME_ALARM: {
1502                         struct clock_data *d;
1503
1504                         s->enabled = m;
1505                         d = event_get_clock_data(s->event, s->type);
1506                         assert(d);
1507
1508                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1509                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1510                         d->needs_rearm = true;
1511                         break;
1512                 }
1513
1514                 case SOURCE_SIGNAL:
1515                         s->enabled = m;
1516
1517                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1518                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1519                                 event_update_signal_fd(s->event);
1520                         }
1521                         break;
1522
1523                 case SOURCE_CHILD:
1524                         if (s->enabled == SD_EVENT_OFF) {
1525                                 s->event->n_enabled_child_sources++;
1526
1527                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1528                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1529                                         event_update_signal_fd(s->event);
1530                                 }
1531                         }
1532
1533                         s->enabled = m;
1534                         break;
1535
1536                 case SOURCE_EXIT:
1537                         s->enabled = m;
1538                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1539                         break;
1540
1541                 case SOURCE_DEFER:
1542                 case SOURCE_POST:
1543                         s->enabled = m;
1544                         break;
1545
1546                 default:
1547                         assert_not_reached("Wut? I shouldn't exist.");
1548                 }
1549         }
1550
1551         if (s->pending)
1552                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1553
1554         if (s->prepare)
1555                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1556
1557         return 0;
1558 }
1559
1560 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1561         assert_return(s, -EINVAL);
1562         assert_return(usec, -EINVAL);
1563         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1564         assert_return(!event_pid_changed(s->event), -ECHILD);
1565
1566         *usec = s->time.next;
1567         return 0;
1568 }
1569
1570 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1571         struct clock_data *d;
1572
1573         assert_return(s, -EINVAL);
1574         assert_return(usec != (uint64_t) -1, -EINVAL);
1575         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1576         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1577         assert_return(!event_pid_changed(s->event), -ECHILD);
1578
1579         s->time.next = usec;
1580
1581         source_set_pending(s, false);
1582
1583         d = event_get_clock_data(s->event, s->type);
1584         assert(d);
1585
1586         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1587         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1588         d->needs_rearm = true;
1589
1590         return 0;
1591 }
1592
1593 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1594         assert_return(s, -EINVAL);
1595         assert_return(usec, -EINVAL);
1596         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1597         assert_return(!event_pid_changed(s->event), -ECHILD);
1598
1599         *usec = s->time.accuracy;
1600         return 0;
1601 }
1602
1603 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1604         struct clock_data *d;
1605
1606         assert_return(s, -EINVAL);
1607         assert_return(usec != (uint64_t) -1, -EINVAL);
1608         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1609         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1610         assert_return(!event_pid_changed(s->event), -ECHILD);
1611
1612         if (usec == 0)
1613                 usec = DEFAULT_ACCURACY_USEC;
1614
1615         s->time.accuracy = usec;
1616
1617         source_set_pending(s, false);
1618
1619         d = event_get_clock_data(s->event, s->type);
1620         assert(d);
1621
1622         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1623         d->needs_rearm = true;
1624
1625         return 0;
1626 }
1627
1628 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1629         assert_return(s, -EINVAL);
1630         assert_return(clock, -EINVAL);
1631         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1632         assert_return(!event_pid_changed(s->event), -ECHILD);
1633
1634         *clock = event_source_type_to_clock(s->type);
1635         return 0;
1636 }
1637
1638 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1639         assert_return(s, -EINVAL);
1640         assert_return(pid, -EINVAL);
1641         assert_return(s->type == SOURCE_CHILD, -EDOM);
1642         assert_return(!event_pid_changed(s->event), -ECHILD);
1643
1644         *pid = s->child.pid;
1645         return 0;
1646 }
1647
1648 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1649         int r;
1650
1651         assert_return(s, -EINVAL);
1652         assert_return(s->type != SOURCE_EXIT, -EDOM);
1653         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1654         assert_return(!event_pid_changed(s->event), -ECHILD);
1655
1656         if (s->prepare == callback)
1657                 return 0;
1658
1659         if (callback && s->prepare) {
1660                 s->prepare = callback;
1661                 return 0;
1662         }
1663
1664         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1665         if (r < 0)
1666                 return r;
1667
1668         s->prepare = callback;
1669
1670         if (callback) {
1671                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1672                 if (r < 0)
1673                         return r;
1674         } else
1675                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1676
1677         return 0;
1678 }
1679
1680 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1681         assert_return(s, NULL);
1682
1683         return s->userdata;
1684 }
1685
1686 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1687         void *ret;
1688
1689         assert_return(s, NULL);
1690
1691         ret = s->userdata;
1692         s->userdata = userdata;
1693
1694         return ret;
1695 }
1696
1697 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1698         usec_t c;
1699         assert(e);
1700         assert(a <= b);
1701
1702         if (a <= 0)
1703                 return 0;
1704
1705         if (b <= a + 1)
1706                 return a;
1707
1708         initialize_perturb(e);
1709
1710         /*
1711           Find a good time to wake up again between times a and b. We
1712           have two goals here:
1713
1714           a) We want to wake up as seldom as possible, hence prefer
1715              later times over earlier times.
1716
1717           b) But if we have to wake up, then let's make sure to
1718              dispatch as much as possible on the entire system.
1719
1720           We implement this by waking up everywhere at the same time
1721           within any given minute if we can, synchronised via the
1722           perturbation value determined from the boot ID. If we can't,
1723           then we try to find the same spot in every 10s, then 1s and
1724           then 250ms step. Otherwise, we pick the last possible time
1725           to wake up.
1726         */
1727
1728         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1729         if (c >= b) {
1730                 if (_unlikely_(c < USEC_PER_MINUTE))
1731                         return b;
1732
1733                 c -= USEC_PER_MINUTE;
1734         }
1735
1736         if (c >= a)
1737                 return c;
1738
1739         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1740         if (c >= b) {
1741                 if (_unlikely_(c < USEC_PER_SEC*10))
1742                         return b;
1743
1744                 c -= USEC_PER_SEC*10;
1745         }
1746
1747         if (c >= a)
1748                 return c;
1749
1750         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1751         if (c >= b) {
1752                 if (_unlikely_(c < USEC_PER_SEC))
1753                         return b;
1754
1755                 c -= USEC_PER_SEC;
1756         }
1757
1758         if (c >= a)
1759                 return c;
1760
1761         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1762         if (c >= b) {
1763                 if (_unlikely_(c < USEC_PER_MSEC*250))
1764                         return b;
1765
1766                 c -= USEC_PER_MSEC*250;
1767         }
1768
1769         if (c >= a)
1770                 return c;
1771
1772         return b;
1773 }
1774
1775 static int event_arm_timer(
1776                 sd_event *e,
1777                 struct clock_data *d) {
1778
1779         struct itimerspec its = {};
1780         sd_event_source *a, *b;
1781         usec_t t;
1782         int r;
1783
1784         assert(e);
1785         assert(d);
1786
1787         if (!d->needs_rearm)
1788                 return 0;
1789         else
1790                 d->needs_rearm = false;
1791
1792         a = prioq_peek(d->earliest);
1793         if (!a || a->enabled == SD_EVENT_OFF) {
1794
1795                 if (d->fd < 0)
1796                         return 0;
1797
1798                 if (d->next == USEC_INFINITY)
1799                         return 0;
1800
1801                 /* disarm */
1802                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1803                 if (r < 0)
1804                         return r;
1805
1806                 d->next = USEC_INFINITY;
1807                 return 0;
1808         }
1809
1810         b = prioq_peek(d->latest);
1811         assert_se(b && b->enabled != SD_EVENT_OFF);
1812
1813         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1814         if (d->next == t)
1815                 return 0;
1816
1817         assert_se(d->fd >= 0);
1818
1819         if (t == 0) {
1820                 /* We don' want to disarm here, just mean some time looooong ago. */
1821                 its.it_value.tv_sec = 0;
1822                 its.it_value.tv_nsec = 1;
1823         } else
1824                 timespec_store(&its.it_value, t);
1825
1826         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1827         if (r < 0)
1828                 return -errno;
1829
1830         d->next = t;
1831         return 0;
1832 }
1833
1834 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1835         assert(e);
1836         assert(s);
1837         assert(s->type == SOURCE_IO);
1838
1839         /* If the event source was already pending, we just OR in the
1840          * new revents, otherwise we reset the value. The ORing is
1841          * necessary to handle EPOLLONESHOT events properly where
1842          * readability might happen independently of writability, and
1843          * we need to keep track of both */
1844
1845         if (s->pending)
1846                 s->io.revents |= revents;
1847         else
1848                 s->io.revents = revents;
1849
1850         return source_set_pending(s, true);
1851 }
1852
1853 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1854         uint64_t x;
1855         ssize_t ss;
1856
1857         assert(e);
1858         assert(fd >= 0);
1859
1860         assert_return(events == EPOLLIN, -EIO);
1861
1862         ss = read(fd, &x, sizeof(x));
1863         if (ss < 0) {
1864                 if (errno == EAGAIN || errno == EINTR)
1865                         return 0;
1866
1867                 return -errno;
1868         }
1869
1870         if (_unlikely_(ss != sizeof(x)))
1871                 return -EIO;
1872
1873         if (next)
1874                 *next = USEC_INFINITY;
1875
1876         return 0;
1877 }
1878
1879 static int process_timer(
1880                 sd_event *e,
1881                 usec_t n,
1882                 struct clock_data *d) {
1883
1884         sd_event_source *s;
1885         int r;
1886
1887         assert(e);
1888         assert(d);
1889
1890         for (;;) {
1891                 s = prioq_peek(d->earliest);
1892                 if (!s ||
1893                     s->time.next > n ||
1894                     s->enabled == SD_EVENT_OFF ||
1895                     s->pending)
1896                         break;
1897
1898                 r = source_set_pending(s, true);
1899                 if (r < 0)
1900                         return r;
1901
1902                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1903                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1904                 d->needs_rearm = true;
1905         }
1906
1907         return 0;
1908 }
1909
1910 static int process_child(sd_event *e) {
1911         sd_event_source *s;
1912         Iterator i;
1913         int r;
1914
1915         assert(e);
1916
1917         e->need_process_child = false;
1918
1919         /*
1920            So, this is ugly. We iteratively invoke waitid() with P_PID
1921            + WNOHANG for each PID we wait for, instead of using
1922            P_ALL. This is because we only want to get child
1923            information of very specific child processes, and not all
1924            of them. We might not have processed the SIGCHLD even of a
1925            previous invocation and we don't want to maintain a
1926            unbounded *per-child* event queue, hence we really don't
1927            want anything flushed out of the kernel's queue that we
1928            don't care about. Since this is O(n) this means that if you
1929            have a lot of processes you probably want to handle SIGCHLD
1930            yourself.
1931
1932            We do not reap the children here (by using WNOWAIT), this
1933            is only done after the event source is dispatched so that
1934            the callback still sees the process as a zombie.
1935         */
1936
1937         HASHMAP_FOREACH(s, e->child_sources, i) {
1938                 assert(s->type == SOURCE_CHILD);
1939
1940                 if (s->pending)
1941                         continue;
1942
1943                 if (s->enabled == SD_EVENT_OFF)
1944                         continue;
1945
1946                 zero(s->child.siginfo);
1947                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1948                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1949                 if (r < 0)
1950                         return -errno;
1951
1952                 if (s->child.siginfo.si_pid != 0) {
1953                         bool zombie =
1954                                 s->child.siginfo.si_code == CLD_EXITED ||
1955                                 s->child.siginfo.si_code == CLD_KILLED ||
1956                                 s->child.siginfo.si_code == CLD_DUMPED;
1957
1958                         if (!zombie && (s->child.options & WEXITED)) {
1959                                 /* If the child isn't dead then let's
1960                                  * immediately remove the state change
1961                                  * from the queue, since there's no
1962                                  * benefit in leaving it queued */
1963
1964                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1965                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1966                         }
1967
1968                         r = source_set_pending(s, true);
1969                         if (r < 0)
1970                                 return r;
1971                 }
1972         }
1973
1974         return 0;
1975 }
1976
1977 static int process_signal(sd_event *e, uint32_t events) {
1978         bool read_one = false;
1979         int r;
1980
1981         assert(e);
1982
1983         assert_return(events == EPOLLIN, -EIO);
1984
1985         for (;;) {
1986                 struct signalfd_siginfo si;
1987                 ssize_t ss;
1988                 sd_event_source *s = NULL;
1989
1990                 ss = read(e->signal_fd, &si, sizeof(si));
1991                 if (ss < 0) {
1992                         if (errno == EAGAIN || errno == EINTR)
1993                                 return read_one;
1994
1995                         return -errno;
1996                 }
1997
1998                 if (_unlikely_(ss != sizeof(si)))
1999                         return -EIO;
2000
2001                 read_one = true;
2002
2003                 if (si.ssi_signo == SIGCHLD) {
2004                         r = process_child(e);
2005                         if (r < 0)
2006                                 return r;
2007                         if (r > 0)
2008                                 continue;
2009                 }
2010
2011                 if (e->signal_sources)
2012                         s = e->signal_sources[si.ssi_signo];
2013
2014                 if (!s)
2015                         continue;
2016
2017                 s->signal.siginfo = si;
2018                 r = source_set_pending(s, true);
2019                 if (r < 0)
2020                         return r;
2021         }
2022 }
2023
2024 static int source_dispatch(sd_event_source *s) {
2025         int r = 0;
2026
2027         assert(s);
2028         assert(s->pending || s->type == SOURCE_EXIT);
2029
2030         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2031                 r = source_set_pending(s, false);
2032                 if (r < 0)
2033                         return r;
2034         }
2035
2036         if (s->type != SOURCE_POST) {
2037                 sd_event_source *z;
2038                 Iterator i;
2039
2040                 /* If we execute a non-post source, let's mark all
2041                  * post sources as pending */
2042
2043                 SET_FOREACH(z, s->event->post_sources, i) {
2044                         if (z->enabled == SD_EVENT_OFF)
2045                                 continue;
2046
2047                         r = source_set_pending(z, true);
2048                         if (r < 0)
2049                                 return r;
2050                 }
2051         }
2052
2053         if (s->enabled == SD_EVENT_ONESHOT) {
2054                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2055                 if (r < 0)
2056                         return r;
2057         }
2058
2059         s->dispatching = true;
2060
2061         switch (s->type) {
2062
2063         case SOURCE_IO:
2064                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2065                 break;
2066
2067         case SOURCE_TIME_REALTIME:
2068         case SOURCE_TIME_BOOTTIME:
2069         case SOURCE_TIME_MONOTONIC:
2070         case SOURCE_TIME_REALTIME_ALARM:
2071         case SOURCE_TIME_BOOTTIME_ALARM:
2072                 r = s->time.callback(s, s->time.next, s->userdata);
2073                 break;
2074
2075         case SOURCE_SIGNAL:
2076                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2077                 break;
2078
2079         case SOURCE_CHILD: {
2080                 bool zombie;
2081
2082                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2083                          s->child.siginfo.si_code == CLD_KILLED ||
2084                          s->child.siginfo.si_code == CLD_DUMPED;
2085
2086                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2087
2088                 /* Now, reap the PID for good. */
2089                 if (zombie)
2090                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2091
2092                 break;
2093         }
2094
2095         case SOURCE_DEFER:
2096                 r = s->defer.callback(s, s->userdata);
2097                 break;
2098
2099         case SOURCE_POST:
2100                 r = s->post.callback(s, s->userdata);
2101                 break;
2102
2103         case SOURCE_EXIT:
2104                 r = s->exit.callback(s, s->userdata);
2105                 break;
2106
2107         case SOURCE_WATCHDOG:
2108         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2109         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2110                 assert_not_reached("Wut? I shouldn't exist.");
2111         }
2112
2113         s->dispatching = false;
2114
2115         if (r < 0)
2116                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2117
2118         if (s->n_ref == 0)
2119                 source_free(s);
2120         else if (r < 0)
2121                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2122
2123         return 1;
2124 }
2125
2126 static int event_prepare(sd_event *e) {
2127         int r;
2128
2129         assert(e);
2130
2131         for (;;) {
2132                 sd_event_source *s;
2133
2134                 s = prioq_peek(e->prepare);
2135                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2136                         break;
2137
2138                 s->prepare_iteration = e->iteration;
2139                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2140                 if (r < 0)
2141                         return r;
2142
2143                 assert(s->prepare);
2144
2145                 s->dispatching = true;
2146                 r = s->prepare(s, s->userdata);
2147                 s->dispatching = false;
2148
2149                 if (r < 0)
2150                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2151
2152                 if (s->n_ref == 0)
2153                         source_free(s);
2154                 else if (r < 0)
2155                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2156         }
2157
2158         return 0;
2159 }
2160
2161 static int dispatch_exit(sd_event *e) {
2162         sd_event_source *p;
2163         int r;
2164
2165         assert(e);
2166
2167         p = prioq_peek(e->exit);
2168         if (!p || p->enabled == SD_EVENT_OFF) {
2169                 e->state = SD_EVENT_FINISHED;
2170                 return 0;
2171         }
2172
2173         sd_event_ref(e);
2174         e->iteration++;
2175         e->state = SD_EVENT_EXITING;
2176
2177         r = source_dispatch(p);
2178
2179         e->state = SD_EVENT_PASSIVE;
2180         sd_event_unref(e);
2181
2182         return r;
2183 }
2184
2185 static sd_event_source* event_next_pending(sd_event *e) {
2186         sd_event_source *p;
2187
2188         assert(e);
2189
2190         p = prioq_peek(e->pending);
2191         if (!p)
2192                 return NULL;
2193
2194         if (p->enabled == SD_EVENT_OFF)
2195                 return NULL;
2196
2197         return p;
2198 }
2199
2200 static int arm_watchdog(sd_event *e) {
2201         struct itimerspec its = {};
2202         usec_t t;
2203         int r;
2204
2205         assert(e);
2206         assert(e->watchdog_fd >= 0);
2207
2208         t = sleep_between(e,
2209                           e->watchdog_last + (e->watchdog_period / 2),
2210                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2211
2212         timespec_store(&its.it_value, t);
2213
2214         /* Make sure we never set the watchdog to 0, which tells the
2215          * kernel to disable it. */
2216         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2217                 its.it_value.tv_nsec = 1;
2218
2219         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2220         if (r < 0)
2221                 return -errno;
2222
2223         return 0;
2224 }
2225
2226 static int process_watchdog(sd_event *e) {
2227         assert(e);
2228
2229         if (!e->watchdog)
2230                 return 0;
2231
2232         /* Don't notify watchdog too often */
2233         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2234                 return 0;
2235
2236         sd_notify(false, "WATCHDOG=1");
2237         e->watchdog_last = e->timestamp.monotonic;
2238
2239         return arm_watchdog(e);
2240 }
2241
2242 _public_ int sd_event_prepare(sd_event *e) {
2243         int r;
2244
2245         assert_return(e, -EINVAL);
2246         assert_return(!event_pid_changed(e), -ECHILD);
2247         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2248         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2249
2250         if (e->exit_requested)
2251                 goto pending;
2252
2253         e->iteration++;
2254
2255         r = event_prepare(e);
2256         if (r < 0)
2257                 return r;
2258
2259         r = event_arm_timer(e, &e->realtime);
2260         if (r < 0)
2261                 return r;
2262
2263         r = event_arm_timer(e, &e->boottime);
2264         if (r < 0)
2265                 return r;
2266
2267         r = event_arm_timer(e, &e->monotonic);
2268         if (r < 0)
2269                 return r;
2270
2271         r = event_arm_timer(e, &e->realtime_alarm);
2272         if (r < 0)
2273                 return r;
2274
2275         r = event_arm_timer(e, &e->boottime_alarm);
2276         if (r < 0)
2277                 return r;
2278
2279         if (event_next_pending(e) || e->need_process_child)
2280                 goto pending;
2281
2282         e->state = SD_EVENT_PREPARED;
2283
2284         return 0;
2285
2286 pending:
2287         e->state = SD_EVENT_PREPARED;
2288         r = sd_event_wait(e, 0);
2289         if (r == 0)
2290                 e->state = SD_EVENT_PREPARED;
2291
2292         return r;
2293 }
2294
2295 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2296         struct epoll_event *ev_queue;
2297         unsigned ev_queue_max;
2298         int r, m, i;
2299
2300         assert_return(e, -EINVAL);
2301         assert_return(!event_pid_changed(e), -ECHILD);
2302         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2303         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2304
2305         if (e->exit_requested) {
2306                 e->state = SD_EVENT_PENDING;
2307                 return 1;
2308         }
2309
2310         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2311         ev_queue = newa(struct epoll_event, ev_queue_max);
2312
2313         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2314                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2315         if (m < 0) {
2316                 if (errno == EINTR) {
2317                         e->state = SD_EVENT_PENDING;
2318                         return 1;
2319                 }
2320
2321                 r = -errno;
2322
2323                 goto finish;
2324         }
2325
2326         dual_timestamp_get(&e->timestamp);
2327         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2328
2329         for (i = 0; i < m; i++) {
2330
2331                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2332                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2333                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2334                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2335                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2336                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2337                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2338                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2339                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2340                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2341                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2342                         r = process_signal(e, ev_queue[i].events);
2343                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2344                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2345                 else
2346                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2347
2348                 if (r < 0)
2349                         goto finish;
2350         }
2351
2352         r = process_watchdog(e);
2353         if (r < 0)
2354                 goto finish;
2355
2356         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2357         if (r < 0)
2358                 goto finish;
2359
2360         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2361         if (r < 0)
2362                 goto finish;
2363
2364         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2365         if (r < 0)
2366                 goto finish;
2367
2368         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2369         if (r < 0)
2370                 goto finish;
2371
2372         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2373         if (r < 0)
2374                 goto finish;
2375
2376         if (e->need_process_child) {
2377                 r = process_child(e);
2378                 if (r < 0)
2379                         goto finish;
2380         }
2381
2382         if (event_next_pending(e)) {
2383                 e->state = SD_EVENT_PENDING;
2384
2385                 return 1;
2386         }
2387
2388         r = 0;
2389
2390 finish:
2391         e->state = SD_EVENT_PASSIVE;
2392
2393         return r;
2394 }
2395
2396 _public_ int sd_event_dispatch(sd_event *e) {
2397         sd_event_source *p;
2398         int r;
2399
2400         assert_return(e, -EINVAL);
2401         assert_return(!event_pid_changed(e), -ECHILD);
2402         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2403         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2404
2405         if (e->exit_requested)
2406                 return dispatch_exit(e);
2407
2408         p = event_next_pending(e);
2409         if (p) {
2410                 sd_event_ref(e);
2411
2412                 e->state = SD_EVENT_RUNNING;
2413                 r = source_dispatch(p);
2414                 e->state = SD_EVENT_PASSIVE;
2415
2416                 sd_event_unref(e);
2417
2418                 return r;
2419         }
2420
2421         e->state = SD_EVENT_PASSIVE;
2422
2423         return 1;
2424 }
2425
2426 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2427         int r;
2428
2429         assert_return(e, -EINVAL);
2430         assert_return(!event_pid_changed(e), -ECHILD);
2431         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2432         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2433
2434         r = sd_event_prepare(e);
2435         if (r > 0)
2436                 return sd_event_dispatch(e);
2437         else if (r < 0)
2438                 return r;
2439
2440         r = sd_event_wait(e, timeout);
2441         if (r > 0)
2442                 return sd_event_dispatch(e);
2443         else
2444                 return r;
2445 }
2446
2447 _public_ int sd_event_loop(sd_event *e) {
2448         int r;
2449
2450         assert_return(e, -EINVAL);
2451         assert_return(!event_pid_changed(e), -ECHILD);
2452         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2453
2454         sd_event_ref(e);
2455
2456         while (e->state != SD_EVENT_FINISHED) {
2457                 r = sd_event_run(e, (uint64_t) -1);
2458                 if (r < 0)
2459                         goto finish;
2460         }
2461
2462         r = e->exit_code;
2463
2464 finish:
2465         sd_event_unref(e);
2466         return r;
2467 }
2468
2469 _public_ int sd_event_get_fd(sd_event *e) {
2470
2471         assert_return(e, -EINVAL);
2472         assert_return(!event_pid_changed(e), -ECHILD);
2473
2474         return e->epoll_fd;
2475 }
2476
2477 _public_ int sd_event_get_state(sd_event *e) {
2478         assert_return(e, -EINVAL);
2479         assert_return(!event_pid_changed(e), -ECHILD);
2480
2481         return e->state;
2482 }
2483
2484 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2485         assert_return(e, -EINVAL);
2486         assert_return(code, -EINVAL);
2487         assert_return(!event_pid_changed(e), -ECHILD);
2488
2489         if (!e->exit_requested)
2490                 return -ENODATA;
2491
2492         *code = e->exit_code;
2493         return 0;
2494 }
2495
2496 _public_ int sd_event_exit(sd_event *e, int code) {
2497         assert_return(e, -EINVAL);
2498         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2499         assert_return(!event_pid_changed(e), -ECHILD);
2500
2501         e->exit_requested = true;
2502         e->exit_code = code;
2503
2504         return 0;
2505 }
2506
2507 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2508         assert_return(e, -EINVAL);
2509         assert_return(usec, -EINVAL);
2510         assert_return(!event_pid_changed(e), -ECHILD);
2511
2512         /* If we haven't run yet, just get the actual time */
2513         if (!dual_timestamp_is_set(&e->timestamp))
2514                 return -ENODATA;
2515
2516         switch (clock) {
2517
2518         case CLOCK_REALTIME:
2519         case CLOCK_REALTIME_ALARM:
2520                 *usec = e->timestamp.realtime;
2521                 break;
2522
2523         case CLOCK_MONOTONIC:
2524                 *usec = e->timestamp.monotonic;
2525                 break;
2526
2527         case CLOCK_BOOTTIME:
2528         case CLOCK_BOOTTIME_ALARM:
2529                 *usec = e->timestamp_boottime;
2530                 break;
2531         }
2532
2533         return 0;
2534 }
2535
2536 _public_ int sd_event_default(sd_event **ret) {
2537
2538         static thread_local sd_event *default_event = NULL;
2539         sd_event *e = NULL;
2540         int r;
2541
2542         if (!ret)
2543                 return !!default_event;
2544
2545         if (default_event) {
2546                 *ret = sd_event_ref(default_event);
2547                 return 0;
2548         }
2549
2550         r = sd_event_new(&e);
2551         if (r < 0)
2552                 return r;
2553
2554         e->default_event_ptr = &default_event;
2555         e->tid = gettid();
2556         default_event = e;
2557
2558         *ret = e;
2559         return 1;
2560 }
2561
2562 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2563         assert_return(e, -EINVAL);
2564         assert_return(tid, -EINVAL);
2565         assert_return(!event_pid_changed(e), -ECHILD);
2566
2567         if (e->tid != 0) {
2568                 *tid = e->tid;
2569                 return 0;
2570         }
2571
2572         return -ENXIO;
2573 }
2574
2575 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2576         int r;
2577
2578         assert_return(e, -EINVAL);
2579         assert_return(!event_pid_changed(e), -ECHILD);
2580
2581         if (e->watchdog == !!b)
2582                 return e->watchdog;
2583
2584         if (b) {
2585                 struct epoll_event ev = {};
2586
2587                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2588                 if (r <= 0)
2589                         return r;
2590
2591                 /* Issue first ping immediately */
2592                 sd_notify(false, "WATCHDOG=1");
2593                 e->watchdog_last = now(CLOCK_MONOTONIC);
2594
2595                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2596                 if (e->watchdog_fd < 0)
2597                         return -errno;
2598
2599                 r = arm_watchdog(e);
2600                 if (r < 0)
2601                         goto fail;
2602
2603                 ev.events = EPOLLIN;
2604                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2605
2606                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2607                 if (r < 0) {
2608                         r = -errno;
2609                         goto fail;
2610                 }
2611
2612         } else {
2613                 if (e->watchdog_fd >= 0) {
2614                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2615                         e->watchdog_fd = safe_close(e->watchdog_fd);
2616                 }
2617         }
2618
2619         e->watchdog = !!b;
2620         return e->watchdog;
2621
2622 fail:
2623         e->watchdog_fd = safe_close(e->watchdog_fd);
2624         return r;
2625 }
2626
2627 _public_ int sd_event_get_watchdog(sd_event *e) {
2628         assert_return(e, -EINVAL);
2629         assert_return(!event_pid_changed(e), -ECHILD);
2630
2631         return e->watchdog;
2632 }