chiark / gitweb /
sd-event: make it easy to bind signal handling to event loop exits
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36
37 #include "sd-event.h"
38
39 #define EPOLL_QUEUE_MAX 512U
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_MONOTONIC,
46         SOURCE_TIME_REALTIME_ALARM,
47         SOURCE_TIME_BOOTTIME_ALARM,
48         SOURCE_SIGNAL,
49         SOURCE_CHILD,
50         SOURCE_DEFER,
51         SOURCE_POST,
52         SOURCE_EXIT,
53         SOURCE_WATCHDOG,
54         _SOUFCE_EVENT_SOURCE_TYPE_MAX,
55         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
56 } EventSourceType;
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event_source {
61         unsigned n_ref;
62
63         sd_event *event;
64         void *userdata;
65         sd_event_handler_t prepare;
66
67         EventSourceType type:5;
68         int enabled:3;
69         bool pending:1;
70         bool dispatching:1;
71
72         int64_t priority;
73         unsigned pending_index;
74         unsigned prepare_index;
75         unsigned pending_iteration;
76         unsigned prepare_iteration;
77
78         union {
79                 struct {
80                         sd_event_io_handler_t callback;
81                         int fd;
82                         uint32_t events;
83                         uint32_t revents;
84                         bool registered:1;
85                 } io;
86                 struct {
87                         sd_event_time_handler_t callback;
88                         usec_t next, accuracy;
89                         unsigned earliest_index;
90                         unsigned latest_index;
91                 } time;
92                 struct {
93                         sd_event_signal_handler_t callback;
94                         struct signalfd_siginfo siginfo;
95                         int sig;
96                 } signal;
97                 struct {
98                         sd_event_child_handler_t callback;
99                         siginfo_t siginfo;
100                         pid_t pid;
101                         int options;
102                 } child;
103                 struct {
104                         sd_event_handler_t callback;
105                 } defer;
106                 struct {
107                         sd_event_handler_t callback;
108                 } post;
109                 struct {
110                         sd_event_handler_t callback;
111                         unsigned prioq_index;
112                 } exit;
113         };
114 };
115
116 struct clock_data {
117         int fd;
118
119         /* For all clocks we maintain two priority queues each, one
120          * ordered for the earliest times the events may be
121          * dispatched, and one ordered by the latest times they must
122          * have been dispatched. The range between the top entries in
123          * the two prioqs is the time window we can freely schedule
124          * wakeups in */
125
126         Prioq *earliest;
127         Prioq *latest;
128         usec_t next;
129 };
130
131 struct sd_event {
132         unsigned n_ref;
133
134         int epoll_fd;
135         int signal_fd;
136         int watchdog_fd;
137
138         Prioq *pending;
139         Prioq *prepare;
140
141         /* timerfd_create() only supports these four clocks so far. We
142          * can add support for more clocks when the kernel learns to
143          * deal with them, too. */
144         struct clock_data realtime;
145         struct clock_data monotonic;
146         struct clock_data realtime_alarm;
147         struct clock_data boottime_alarm;
148
149         usec_t perturb;
150
151         sigset_t sigset;
152         sd_event_source **signal_sources;
153
154         Hashmap *child_sources;
155         unsigned n_enabled_child_sources;
156
157         Set *post_sources;
158
159         Prioq *exit;
160
161         pid_t original_pid;
162
163         unsigned iteration;
164         dual_timestamp timestamp;
165         usec_t timestamp_boottime;
166         int state;
167
168         bool exit_requested:1;
169         bool need_process_child:1;
170         bool watchdog:1;
171
172         int exit_code;
173
174         pid_t tid;
175         sd_event **default_event_ptr;
176
177         usec_t watchdog_last, watchdog_period;
178
179         unsigned n_sources;
180 };
181
182 static int pending_prioq_compare(const void *a, const void *b) {
183         const sd_event_source *x = a, *y = b;
184
185         assert(x->pending);
186         assert(y->pending);
187
188         /* Enabled ones first */
189         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190                 return -1;
191         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192                 return 1;
193
194         /* Lower priority values first */
195         if (x->priority < y->priority)
196                 return -1;
197         if (x->priority > y->priority)
198                 return 1;
199
200         /* Older entries first */
201         if (x->pending_iteration < y->pending_iteration)
202                 return -1;
203         if (x->pending_iteration > y->pending_iteration)
204                 return 1;
205
206         /* Stability for the rest */
207         if (x < y)
208                 return -1;
209         if (x > y)
210                 return 1;
211
212         return 0;
213 }
214
215 static int prepare_prioq_compare(const void *a, const void *b) {
216         const sd_event_source *x = a, *y = b;
217
218         assert(x->prepare);
219         assert(y->prepare);
220
221         /* Move most recently prepared ones last, so that we can stop
222          * preparing as soon as we hit one that has already been
223          * prepared in the current iteration */
224         if (x->prepare_iteration < y->prepare_iteration)
225                 return -1;
226         if (x->prepare_iteration > y->prepare_iteration)
227                 return 1;
228
229         /* Enabled ones first */
230         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
231                 return -1;
232         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233                 return 1;
234
235         /* Lower priority values first */
236         if (x->priority < y->priority)
237                 return -1;
238         if (x->priority > y->priority)
239                 return 1;
240
241         /* Stability for the rest */
242         if (x < y)
243                 return -1;
244         if (x > y)
245                 return 1;
246
247         return 0;
248 }
249
250 static int earliest_time_prioq_compare(const void *a, const void *b) {
251         const sd_event_source *x = a, *y = b;
252
253         assert(EVENT_SOURCE_IS_TIME(x->type));
254         assert(x->type == y->type);
255
256         /* Enabled ones first */
257         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
258                 return -1;
259         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260                 return 1;
261
262         /* Move the pending ones to the end */
263         if (!x->pending && y->pending)
264                 return -1;
265         if (x->pending && !y->pending)
266                 return 1;
267
268         /* Order by time */
269         if (x->time.next < y->time.next)
270                 return -1;
271         if (x->time.next > y->time.next)
272                 return 1;
273
274         /* Stability for the rest */
275         if (x < y)
276                 return -1;
277         if (x > y)
278                 return 1;
279
280         return 0;
281 }
282
283 static int latest_time_prioq_compare(const void *a, const void *b) {
284         const sd_event_source *x = a, *y = b;
285
286         assert(EVENT_SOURCE_IS_TIME(x->type));
287         assert(x->type == y->type);
288
289         /* Enabled ones first */
290         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
291                 return -1;
292         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293                 return 1;
294
295         /* Move the pending ones to the end */
296         if (!x->pending && y->pending)
297                 return -1;
298         if (x->pending && !y->pending)
299                 return 1;
300
301         /* Order by time */
302         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
303                 return -1;
304         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
305                 return 1;
306
307         /* Stability for the rest */
308         if (x < y)
309                 return -1;
310         if (x > y)
311                 return 1;
312
313         return 0;
314 }
315
316 static int exit_prioq_compare(const void *a, const void *b) {
317         const sd_event_source *x = a, *y = b;
318
319         assert(x->type == SOURCE_EXIT);
320         assert(y->type == SOURCE_EXIT);
321
322         /* Enabled ones first */
323         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
324                 return -1;
325         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
326                 return 1;
327
328         /* Lower priority values first */
329         if (x->priority < y->priority)
330                 return -1;
331         if (x->priority > y->priority)
332                 return 1;
333
334         /* Stability for the rest */
335         if (x < y)
336                 return -1;
337         if (x > y)
338                 return 1;
339
340         return 0;
341 }
342
343 static void free_clock_data(struct clock_data *d) {
344         assert(d);
345
346         safe_close(d->fd);
347         prioq_free(d->earliest);
348         prioq_free(d->latest);
349 }
350
351 static void event_free(sd_event *e) {
352         assert(e);
353         assert(e->n_sources == 0);
354
355         if (e->default_event_ptr)
356                 *(e->default_event_ptr) = NULL;
357
358         safe_close(e->epoll_fd);
359         safe_close(e->signal_fd);
360         safe_close(e->watchdog_fd);
361
362         free_clock_data(&e->realtime);
363         free_clock_data(&e->monotonic);
364         free_clock_data(&e->realtime_alarm);
365         free_clock_data(&e->boottime_alarm);
366
367         prioq_free(e->pending);
368         prioq_free(e->prepare);
369         prioq_free(e->exit);
370
371         free(e->signal_sources);
372
373         hashmap_free(e->child_sources);
374         set_free(e->post_sources);
375         free(e);
376 }
377
378 _public_ int sd_event_new(sd_event** ret) {
379         sd_event *e;
380         int r;
381
382         assert_return(ret, -EINVAL);
383
384         e = new0(sd_event, 1);
385         if (!e)
386                 return -ENOMEM;
387
388         e->n_ref = 1;
389         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
390         e->realtime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = (usec_t) -1;
391         e->original_pid = getpid();
392         e->perturb = (usec_t) -1;
393
394         assert_se(sigemptyset(&e->sigset) == 0);
395
396         e->pending = prioq_new(pending_prioq_compare);
397         if (!e->pending) {
398                 r = -ENOMEM;
399                 goto fail;
400         }
401
402         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
403         if (e->epoll_fd < 0) {
404                 r = -errno;
405                 goto fail;
406         }
407
408         *ret = e;
409         return 0;
410
411 fail:
412         event_free(e);
413         return r;
414 }
415
416 _public_ sd_event* sd_event_ref(sd_event *e) {
417         assert_return(e, NULL);
418
419         assert(e->n_ref >= 1);
420         e->n_ref++;
421
422         return e;
423 }
424
425 _public_ sd_event* sd_event_unref(sd_event *e) {
426
427         if (!e)
428                 return NULL;
429
430         assert(e->n_ref >= 1);
431         e->n_ref--;
432
433         if (e->n_ref <= 0)
434                 event_free(e);
435
436         return NULL;
437 }
438
439 static bool event_pid_changed(sd_event *e) {
440         assert(e);
441
442         /* We don't support people creating am event loop and keeping
443          * it around over a fork(). Let's complain. */
444
445         return e->original_pid != getpid();
446 }
447
448 static int source_io_unregister(sd_event_source *s) {
449         int r;
450
451         assert(s);
452         assert(s->type == SOURCE_IO);
453
454         if (!s->io.registered)
455                 return 0;
456
457         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
458         if (r < 0)
459                 return -errno;
460
461         s->io.registered = false;
462         return 0;
463 }
464
465 static int source_io_register(
466                 sd_event_source *s,
467                 int enabled,
468                 uint32_t events) {
469
470         struct epoll_event ev = {};
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475         assert(enabled != SD_EVENT_OFF);
476
477         ev.events = events;
478         ev.data.ptr = s;
479
480         if (enabled == SD_EVENT_ONESHOT)
481                 ev.events |= EPOLLONESHOT;
482
483         if (s->io.registered)
484                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
485         else
486                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
487
488         if (r < 0)
489                 return -errno;
490
491         s->io.registered = true;
492
493         return 0;
494 }
495
496 static clockid_t event_source_type_to_clock(EventSourceType t) {
497
498         switch (t) {
499
500         case SOURCE_TIME_REALTIME:
501                 return CLOCK_REALTIME;
502
503         case SOURCE_TIME_MONOTONIC:
504                 return CLOCK_MONOTONIC;
505
506         case SOURCE_TIME_REALTIME_ALARM:
507                 return CLOCK_REALTIME_ALARM;
508
509         case SOURCE_TIME_BOOTTIME_ALARM:
510                 return CLOCK_BOOTTIME_ALARM;
511
512         default:
513                 return (clockid_t) -1;
514         }
515 }
516
517 static EventSourceType clock_to_event_source_type(clockid_t clock) {
518
519         switch (clock) {
520
521         case CLOCK_REALTIME:
522                 return SOURCE_TIME_REALTIME;
523
524         case CLOCK_MONOTONIC:
525                 return SOURCE_TIME_MONOTONIC;
526
527         case CLOCK_REALTIME_ALARM:
528                 return SOURCE_TIME_REALTIME_ALARM;
529
530         case CLOCK_BOOTTIME_ALARM:
531                 return SOURCE_TIME_BOOTTIME_ALARM;
532
533         default:
534                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
535         }
536 }
537
538 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
539         assert(e);
540
541         switch (t) {
542
543         case SOURCE_TIME_REALTIME:
544                 return &e->realtime;
545
546         case SOURCE_TIME_MONOTONIC:
547                 return &e->monotonic;
548
549         case SOURCE_TIME_REALTIME_ALARM:
550                 return &e->realtime_alarm;
551
552         case SOURCE_TIME_BOOTTIME_ALARM:
553                 return &e->boottime_alarm;
554
555         default:
556                 return NULL;
557         }
558 }
559
560 static void source_free(sd_event_source *s) {
561         assert(s);
562
563         if (s->event) {
564                 assert(s->event->n_sources > 0);
565
566                 switch (s->type) {
567
568                 case SOURCE_IO:
569                         if (s->io.fd >= 0)
570                                 source_io_unregister(s);
571
572                         break;
573
574                 case SOURCE_TIME_REALTIME:
575                 case SOURCE_TIME_MONOTONIC:
576                 case SOURCE_TIME_REALTIME_ALARM:
577                 case SOURCE_TIME_BOOTTIME_ALARM: {
578                         struct clock_data *d;
579
580                         d = event_get_clock_data(s->event, s->type);
581                         assert(d);
582
583                         prioq_remove(d->earliest, s, &s->time.earliest_index);
584                         prioq_remove(d->latest, s, &s->time.latest_index);
585                         break;
586                 }
587
588                 case SOURCE_SIGNAL:
589                         if (s->signal.sig > 0) {
590                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
591                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
592
593                                 if (s->event->signal_sources)
594                                         s->event->signal_sources[s->signal.sig] = NULL;
595                         }
596
597                         break;
598
599                 case SOURCE_CHILD:
600                         if (s->child.pid > 0) {
601                                 if (s->enabled != SD_EVENT_OFF) {
602                                         assert(s->event->n_enabled_child_sources > 0);
603                                         s->event->n_enabled_child_sources--;
604                                 }
605
606                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
607                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
608
609                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
610                         }
611
612                         break;
613
614                 case SOURCE_DEFER:
615                         /* nothing */
616                         break;
617
618                 case SOURCE_POST:
619                         set_remove(s->event->post_sources, s);
620                         break;
621
622                 case SOURCE_EXIT:
623                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
624                         break;
625
626                 default:
627                         assert_not_reached("Wut? I shouldn't exist.");
628                 }
629
630                 if (s->pending)
631                         prioq_remove(s->event->pending, s, &s->pending_index);
632
633                 if (s->prepare)
634                         prioq_remove(s->event->prepare, s, &s->prepare_index);
635
636                 s->event->n_sources--;
637                 sd_event_unref(s->event);
638         }
639
640         free(s);
641 }
642
643 static int source_set_pending(sd_event_source *s, bool b) {
644         int r;
645
646         assert(s);
647         assert(s->type != SOURCE_EXIT);
648
649         if (s->pending == b)
650                 return 0;
651
652         s->pending = b;
653
654         if (b) {
655                 s->pending_iteration = s->event->iteration;
656
657                 r = prioq_put(s->event->pending, s, &s->pending_index);
658                 if (r < 0) {
659                         s->pending = false;
660                         return r;
661                 }
662         } else
663                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
664
665         if (EVENT_SOURCE_IS_TIME(s->type)) {
666                 struct clock_data *d;
667
668                 d = event_get_clock_data(s->event, s->type);
669                 assert(d);
670
671                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
672                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
673         }
674
675         return 0;
676 }
677
678 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
679         sd_event_source *s;
680
681         assert(e);
682
683         s = new0(sd_event_source, 1);
684         if (!s)
685                 return NULL;
686
687         s->n_ref = 1;
688         s->event = sd_event_ref(e);
689         s->type = type;
690         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
691
692         e->n_sources ++;
693
694         return s;
695 }
696
697 _public_ int sd_event_add_io(
698                 sd_event *e,
699                 sd_event_source **ret,
700                 int fd,
701                 uint32_t events,
702                 sd_event_io_handler_t callback,
703                 void *userdata) {
704
705         sd_event_source *s;
706         int r;
707
708         assert_return(e, -EINVAL);
709         assert_return(fd >= 0, -EINVAL);
710         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
711         assert_return(callback, -EINVAL);
712         assert_return(ret, -EINVAL);
713         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
714         assert_return(!event_pid_changed(e), -ECHILD);
715
716         s = source_new(e, SOURCE_IO);
717         if (!s)
718                 return -ENOMEM;
719
720         s->io.fd = fd;
721         s->io.events = events;
722         s->io.callback = callback;
723         s->userdata = userdata;
724         s->enabled = SD_EVENT_ON;
725
726         r = source_io_register(s, s->enabled, events);
727         if (r < 0) {
728                 source_free(s);
729                 return -errno;
730         }
731
732         *ret = s;
733         return 0;
734 }
735
736 static void initialize_perturb(sd_event *e) {
737         sd_id128_t bootid = {};
738
739         /* When we sleep for longer, we try to realign the wakeup to
740            the same time wihtin each minute/second/250ms, so that
741            events all across the system can be coalesced into a single
742            CPU wakeup. However, let's take some system-specific
743            randomness for this value, so that in a network of systems
744            with synced clocks timer events are distributed a
745            bit. Here, we calculate a perturbation usec offset from the
746            boot ID. */
747
748         if (_likely_(e->perturb != (usec_t) -1))
749                 return;
750
751         if (sd_id128_get_boot(&bootid) >= 0)
752                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
753 }
754
755 static int event_setup_timer_fd(
756                 sd_event *e,
757                 struct clock_data *d,
758                 clockid_t clock) {
759
760         struct epoll_event ev = {};
761         int r, fd;
762
763         assert(e);
764         assert(d);
765
766         if (_likely_(d->fd >= 0))
767                 return 0;
768
769         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
770         if (fd < 0)
771                 return -errno;
772
773         ev.events = EPOLLIN;
774         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
775
776         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
777         if (r < 0) {
778                 safe_close(fd);
779                 return -errno;
780         }
781
782         d->fd = fd;
783         return 0;
784 }
785
786 _public_ int sd_event_add_time(
787                 sd_event *e,
788                 sd_event_source **ret,
789                 clockid_t clock,
790                 uint64_t usec,
791                 uint64_t accuracy,
792                 sd_event_time_handler_t callback,
793                 void *userdata) {
794
795         EventSourceType type;
796         sd_event_source *s;
797         struct clock_data *d;
798         int r;
799
800         assert_return(e, -EINVAL);
801         assert_return(ret, -EINVAL);
802         assert_return(usec != (uint64_t) -1, -EINVAL);
803         assert_return(accuracy != (uint64_t) -1, -EINVAL);
804         assert_return(callback, -EINVAL);
805         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
806         assert_return(!event_pid_changed(e), -ECHILD);
807
808         type = clock_to_event_source_type(clock);
809         assert_return(type >= 0, -ENOTSUP);
810
811         d = event_get_clock_data(e, type);
812         assert(d);
813
814         if (!d->earliest) {
815                 d->earliest = prioq_new(earliest_time_prioq_compare);
816                 if (!d->earliest)
817                         return -ENOMEM;
818         }
819
820         if (!d->latest) {
821                 d->latest = prioq_new(latest_time_prioq_compare);
822                 if (!d->latest)
823                         return -ENOMEM;
824         }
825
826         if (d->fd < 0) {
827                 r = event_setup_timer_fd(e, d, clock);
828                 if (r < 0)
829                         return r;
830         }
831
832         s = source_new(e, type);
833         if (!s)
834                 return -ENOMEM;
835
836         s->time.next = usec;
837         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
838         s->time.callback = callback;
839         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
840         s->userdata = userdata;
841         s->enabled = SD_EVENT_ONESHOT;
842
843         r = prioq_put(d->earliest, s, &s->time.earliest_index);
844         if (r < 0)
845                 goto fail;
846
847         r = prioq_put(d->latest, s, &s->time.latest_index);
848         if (r < 0)
849                 goto fail;
850
851         *ret = s;
852         return 0;
853
854 fail:
855         source_free(s);
856         return r;
857 }
858
859 static int event_update_signal_fd(sd_event *e) {
860         struct epoll_event ev = {};
861         bool add_to_epoll;
862         int r;
863
864         assert(e);
865
866         add_to_epoll = e->signal_fd < 0;
867
868         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
869         if (r < 0)
870                 return -errno;
871
872         e->signal_fd = r;
873
874         if (!add_to_epoll)
875                 return 0;
876
877         ev.events = EPOLLIN;
878         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
879
880         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
881         if (r < 0) {
882                 e->signal_fd = safe_close(e->signal_fd);
883                 return -errno;
884         }
885
886         return 0;
887 }
888
889 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
890         assert(s);
891
892         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
893 }
894
895 _public_ int sd_event_add_signal(
896                 sd_event *e,
897                 sd_event_source **ret,
898                 int sig,
899                 sd_event_signal_handler_t callback,
900                 void *userdata) {
901
902         sd_event_source *s;
903         sigset_t ss;
904         int r;
905
906         assert_return(e, -EINVAL);
907         assert_return(sig > 0, -EINVAL);
908         assert_return(sig < _NSIG, -EINVAL);
909         assert_return(ret, -EINVAL);
910         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
911         assert_return(!event_pid_changed(e), -ECHILD);
912
913         if (!callback)
914                 callback = signal_exit_callback;
915
916         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
917         if (r < 0)
918                 return -errno;
919
920         if (!sigismember(&ss, sig))
921                 return -EBUSY;
922
923         if (!e->signal_sources) {
924                 e->signal_sources = new0(sd_event_source*, _NSIG);
925                 if (!e->signal_sources)
926                         return -ENOMEM;
927         } else if (e->signal_sources[sig])
928                 return -EBUSY;
929
930         s = source_new(e, SOURCE_SIGNAL);
931         if (!s)
932                 return -ENOMEM;
933
934         s->signal.sig = sig;
935         s->signal.callback = callback;
936         s->userdata = userdata;
937         s->enabled = SD_EVENT_ON;
938
939         e->signal_sources[sig] = s;
940         assert_se(sigaddset(&e->sigset, sig) == 0);
941
942         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
943                 r = event_update_signal_fd(e);
944                 if (r < 0) {
945                         source_free(s);
946                         return r;
947                 }
948         }
949
950         *ret = s;
951         return 0;
952 }
953
954 _public_ int sd_event_add_child(
955                 sd_event *e,
956                 sd_event_source **ret,
957                 pid_t pid,
958                 int options,
959                 sd_event_child_handler_t callback,
960                 void *userdata) {
961
962         sd_event_source *s;
963         int r;
964
965         assert_return(e, -EINVAL);
966         assert_return(pid > 1, -EINVAL);
967         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
968         assert_return(options != 0, -EINVAL);
969         assert_return(callback, -EINVAL);
970         assert_return(ret, -EINVAL);
971         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
972         assert_return(!event_pid_changed(e), -ECHILD);
973
974         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
975         if (r < 0)
976                 return r;
977
978         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
979                 return -EBUSY;
980
981         s = source_new(e, SOURCE_CHILD);
982         if (!s)
983                 return -ENOMEM;
984
985         s->child.pid = pid;
986         s->child.options = options;
987         s->child.callback = callback;
988         s->userdata = userdata;
989         s->enabled = SD_EVENT_ONESHOT;
990
991         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
992         if (r < 0) {
993                 source_free(s);
994                 return r;
995         }
996
997         e->n_enabled_child_sources ++;
998
999         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1000
1001         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1002                 r = event_update_signal_fd(e);
1003                 if (r < 0) {
1004                         source_free(s);
1005                         return -errno;
1006                 }
1007         }
1008
1009         e->need_process_child = true;
1010
1011         *ret = s;
1012         return 0;
1013 }
1014
1015 _public_ int sd_event_add_defer(
1016                 sd_event *e,
1017                 sd_event_source **ret,
1018                 sd_event_handler_t callback,
1019                 void *userdata) {
1020
1021         sd_event_source *s;
1022         int r;
1023
1024         assert_return(e, -EINVAL);
1025         assert_return(callback, -EINVAL);
1026         assert_return(ret, -EINVAL);
1027         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1028         assert_return(!event_pid_changed(e), -ECHILD);
1029
1030         s = source_new(e, SOURCE_DEFER);
1031         if (!s)
1032                 return -ENOMEM;
1033
1034         s->defer.callback = callback;
1035         s->userdata = userdata;
1036         s->enabled = SD_EVENT_ONESHOT;
1037
1038         r = source_set_pending(s, true);
1039         if (r < 0) {
1040                 source_free(s);
1041                 return r;
1042         }
1043
1044         *ret = s;
1045         return 0;
1046 }
1047
1048 _public_ int sd_event_add_post(
1049                 sd_event *e,
1050                 sd_event_source **ret,
1051                 sd_event_handler_t callback,
1052                 void *userdata) {
1053
1054         sd_event_source *s;
1055         int r;
1056
1057         assert_return(e, -EINVAL);
1058         assert_return(callback, -EINVAL);
1059         assert_return(ret, -EINVAL);
1060         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1061         assert_return(!event_pid_changed(e), -ECHILD);
1062
1063         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1064         if (r < 0)
1065                 return r;
1066
1067         s = source_new(e, SOURCE_POST);
1068         if (!s)
1069                 return -ENOMEM;
1070
1071         s->post.callback = callback;
1072         s->userdata = userdata;
1073         s->enabled = SD_EVENT_ON;
1074
1075         r = set_put(e->post_sources, s);
1076         if (r < 0) {
1077                 source_free(s);
1078                 return r;
1079         }
1080
1081         *ret = s;
1082         return 0;
1083 }
1084
1085 _public_ int sd_event_add_exit(
1086                 sd_event *e,
1087                 sd_event_source **ret,
1088                 sd_event_handler_t callback,
1089                 void *userdata) {
1090
1091         sd_event_source *s;
1092         int r;
1093
1094         assert_return(e, -EINVAL);
1095         assert_return(callback, -EINVAL);
1096         assert_return(ret, -EINVAL);
1097         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1098         assert_return(!event_pid_changed(e), -ECHILD);
1099
1100         if (!e->exit) {
1101                 e->exit = prioq_new(exit_prioq_compare);
1102                 if (!e->exit)
1103                         return -ENOMEM;
1104         }
1105
1106         s = source_new(e, SOURCE_EXIT);
1107         if (!s)
1108                 return -ENOMEM;
1109
1110         s->exit.callback = callback;
1111         s->userdata = userdata;
1112         s->exit.prioq_index = PRIOQ_IDX_NULL;
1113         s->enabled = SD_EVENT_ONESHOT;
1114
1115         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1116         if (r < 0) {
1117                 source_free(s);
1118                 return r;
1119         }
1120
1121         *ret = s;
1122         return 0;
1123 }
1124
1125 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1126         assert_return(s, NULL);
1127
1128         assert(s->n_ref >= 1);
1129         s->n_ref++;
1130
1131         return s;
1132 }
1133
1134 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1135
1136         if (!s)
1137                 return NULL;
1138
1139         assert(s->n_ref >= 1);
1140         s->n_ref--;
1141
1142         if (s->n_ref <= 0) {
1143                 /* Here's a special hack: when we are called from a
1144                  * dispatch handler we won't free the event source
1145                  * immediately, but we will detach the fd from the
1146                  * epoll. This way it is safe for the caller to unref
1147                  * the event source and immediately close the fd, but
1148                  * we still retain a valid event source object after
1149                  * the callback. */
1150
1151                 if (s->dispatching) {
1152                         if (s->type == SOURCE_IO)
1153                                 source_io_unregister(s);
1154                 } else
1155                         source_free(s);
1156         }
1157
1158         return NULL;
1159 }
1160
1161 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1162         assert_return(s, NULL);
1163
1164         return s->event;
1165 }
1166
1167 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1168         assert_return(s, -EINVAL);
1169         assert_return(s->type != SOURCE_EXIT, -EDOM);
1170         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1171         assert_return(!event_pid_changed(s->event), -ECHILD);
1172
1173         return s->pending;
1174 }
1175
1176 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1177         assert_return(s, -EINVAL);
1178         assert_return(s->type == SOURCE_IO, -EDOM);
1179         assert_return(!event_pid_changed(s->event), -ECHILD);
1180
1181         return s->io.fd;
1182 }
1183
1184 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1185         int r;
1186
1187         assert_return(s, -EINVAL);
1188         assert_return(fd >= 0, -EINVAL);
1189         assert_return(s->type == SOURCE_IO, -EDOM);
1190         assert_return(!event_pid_changed(s->event), -ECHILD);
1191
1192         if (s->io.fd == fd)
1193                 return 0;
1194
1195         if (s->enabled == SD_EVENT_OFF) {
1196                 s->io.fd = fd;
1197                 s->io.registered = false;
1198         } else {
1199                 int saved_fd;
1200
1201                 saved_fd = s->io.fd;
1202                 assert(s->io.registered);
1203
1204                 s->io.fd = fd;
1205                 s->io.registered = false;
1206
1207                 r = source_io_register(s, s->enabled, s->io.events);
1208                 if (r < 0) {
1209                         s->io.fd = saved_fd;
1210                         s->io.registered = true;
1211                         return r;
1212                 }
1213
1214                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1215         }
1216
1217         return 0;
1218 }
1219
1220 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1221         assert_return(s, -EINVAL);
1222         assert_return(events, -EINVAL);
1223         assert_return(s->type == SOURCE_IO, -EDOM);
1224         assert_return(!event_pid_changed(s->event), -ECHILD);
1225
1226         *events = s->io.events;
1227         return 0;
1228 }
1229
1230 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1231         int r;
1232
1233         assert_return(s, -EINVAL);
1234         assert_return(s->type == SOURCE_IO, -EDOM);
1235         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1236         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1237         assert_return(!event_pid_changed(s->event), -ECHILD);
1238
1239         if (s->io.events == events)
1240                 return 0;
1241
1242         if (s->enabled != SD_EVENT_OFF) {
1243                 r = source_io_register(s, s->enabled, events);
1244                 if (r < 0)
1245                         return r;
1246         }
1247
1248         s->io.events = events;
1249         source_set_pending(s, false);
1250
1251         return 0;
1252 }
1253
1254 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1255         assert_return(s, -EINVAL);
1256         assert_return(revents, -EINVAL);
1257         assert_return(s->type == SOURCE_IO, -EDOM);
1258         assert_return(s->pending, -ENODATA);
1259         assert_return(!event_pid_changed(s->event), -ECHILD);
1260
1261         *revents = s->io.revents;
1262         return 0;
1263 }
1264
1265 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1266         assert_return(s, -EINVAL);
1267         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1268         assert_return(!event_pid_changed(s->event), -ECHILD);
1269
1270         return s->signal.sig;
1271 }
1272
1273 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1274         assert_return(s, -EINVAL);
1275         assert_return(!event_pid_changed(s->event), -ECHILD);
1276
1277         return s->priority;
1278 }
1279
1280 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1281         assert_return(s, -EINVAL);
1282         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1283         assert_return(!event_pid_changed(s->event), -ECHILD);
1284
1285         if (s->priority == priority)
1286                 return 0;
1287
1288         s->priority = priority;
1289
1290         if (s->pending)
1291                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1292
1293         if (s->prepare)
1294                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1295
1296         if (s->type == SOURCE_EXIT)
1297                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1298
1299         return 0;
1300 }
1301
1302 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1303         assert_return(s, -EINVAL);
1304         assert_return(m, -EINVAL);
1305         assert_return(!event_pid_changed(s->event), -ECHILD);
1306
1307         *m = s->enabled;
1308         return 0;
1309 }
1310
1311 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1312         int r;
1313
1314         assert_return(s, -EINVAL);
1315         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1316         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1317         assert_return(!event_pid_changed(s->event), -ECHILD);
1318
1319         if (s->enabled == m)
1320                 return 0;
1321
1322         if (m == SD_EVENT_OFF) {
1323
1324                 switch (s->type) {
1325
1326                 case SOURCE_IO:
1327                         r = source_io_unregister(s);
1328                         if (r < 0)
1329                                 return r;
1330
1331                         s->enabled = m;
1332                         break;
1333
1334                 case SOURCE_TIME_REALTIME:
1335                 case SOURCE_TIME_MONOTONIC:
1336                 case SOURCE_TIME_REALTIME_ALARM:
1337                 case SOURCE_TIME_BOOTTIME_ALARM: {
1338                         struct clock_data *d;
1339
1340                         s->enabled = m;
1341                         d = event_get_clock_data(s->event, s->type);
1342                         assert(d);
1343
1344                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1345                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1346                         break;
1347                 }
1348
1349                 case SOURCE_SIGNAL:
1350                         s->enabled = m;
1351                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1352                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1353                                 event_update_signal_fd(s->event);
1354                         }
1355
1356                         break;
1357
1358                 case SOURCE_CHILD:
1359                         s->enabled = m;
1360
1361                         assert(s->event->n_enabled_child_sources > 0);
1362                         s->event->n_enabled_child_sources--;
1363
1364                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1365                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1366                                 event_update_signal_fd(s->event);
1367                         }
1368
1369                         break;
1370
1371                 case SOURCE_EXIT:
1372                         s->enabled = m;
1373                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1374                         break;
1375
1376                 case SOURCE_DEFER:
1377                 case SOURCE_POST:
1378                         s->enabled = m;
1379                         break;
1380
1381                 default:
1382                         assert_not_reached("Wut? I shouldn't exist.");
1383                 }
1384
1385         } else {
1386                 switch (s->type) {
1387
1388                 case SOURCE_IO:
1389                         r = source_io_register(s, m, s->io.events);
1390                         if (r < 0)
1391                                 return r;
1392
1393                         s->enabled = m;
1394                         break;
1395
1396                 case SOURCE_TIME_REALTIME:
1397                 case SOURCE_TIME_MONOTONIC:
1398                 case SOURCE_TIME_REALTIME_ALARM:
1399                 case SOURCE_TIME_BOOTTIME_ALARM: {
1400                         struct clock_data *d;
1401
1402                         s->enabled = m;
1403                         d = event_get_clock_data(s->event, s->type);
1404                         assert(d);
1405
1406                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1407                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1408                         break;
1409                 }
1410
1411                 case SOURCE_SIGNAL:
1412                         s->enabled = m;
1413
1414                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1415                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1416                                 event_update_signal_fd(s->event);
1417                         }
1418                         break;
1419
1420                 case SOURCE_CHILD:
1421                         if (s->enabled == SD_EVENT_OFF) {
1422                                 s->event->n_enabled_child_sources++;
1423
1424                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1425                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1426                                         event_update_signal_fd(s->event);
1427                                 }
1428                         }
1429
1430                         s->enabled = m;
1431                         break;
1432
1433                 case SOURCE_EXIT:
1434                         s->enabled = m;
1435                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1436                         break;
1437
1438                 case SOURCE_DEFER:
1439                 case SOURCE_POST:
1440                         s->enabled = m;
1441                         break;
1442
1443                 default:
1444                         assert_not_reached("Wut? I shouldn't exist.");
1445                 }
1446         }
1447
1448         if (s->pending)
1449                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1450
1451         if (s->prepare)
1452                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1453
1454         return 0;
1455 }
1456
1457 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1458         assert_return(s, -EINVAL);
1459         assert_return(usec, -EINVAL);
1460         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1461         assert_return(!event_pid_changed(s->event), -ECHILD);
1462
1463         *usec = s->time.next;
1464         return 0;
1465 }
1466
1467 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1468         struct clock_data *d;
1469
1470         assert_return(s, -EINVAL);
1471         assert_return(usec != (uint64_t) -1, -EINVAL);
1472         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1473         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1474         assert_return(!event_pid_changed(s->event), -ECHILD);
1475
1476         s->time.next = usec;
1477
1478         source_set_pending(s, false);
1479
1480         d = event_get_clock_data(s->event, s->type);
1481         assert(d);
1482
1483         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1484         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1485
1486         return 0;
1487 }
1488
1489 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1490         assert_return(s, -EINVAL);
1491         assert_return(usec, -EINVAL);
1492         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1493         assert_return(!event_pid_changed(s->event), -ECHILD);
1494
1495         *usec = s->time.accuracy;
1496         return 0;
1497 }
1498
1499 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1500         struct clock_data *d;
1501
1502         assert_return(s, -EINVAL);
1503         assert_return(usec != (uint64_t) -1, -EINVAL);
1504         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1505         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1506         assert_return(!event_pid_changed(s->event), -ECHILD);
1507
1508         if (usec == 0)
1509                 usec = DEFAULT_ACCURACY_USEC;
1510
1511         s->time.accuracy = usec;
1512
1513         source_set_pending(s, false);
1514
1515         d = event_get_clock_data(s->event, s->type);
1516         assert(d);
1517
1518         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1519
1520         return 0;
1521 }
1522
1523 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1524         assert_return(s, -EINVAL);
1525         assert_return(clock, -EINVAL);
1526         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1527         assert_return(!event_pid_changed(s->event), -ECHILD);
1528
1529         *clock = event_source_type_to_clock(s->type);
1530         return 0;
1531 }
1532
1533 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1534         assert_return(s, -EINVAL);
1535         assert_return(pid, -EINVAL);
1536         assert_return(s->type == SOURCE_CHILD, -EDOM);
1537         assert_return(!event_pid_changed(s->event), -ECHILD);
1538
1539         *pid = s->child.pid;
1540         return 0;
1541 }
1542
1543 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1544         int r;
1545
1546         assert_return(s, -EINVAL);
1547         assert_return(s->type != SOURCE_EXIT, -EDOM);
1548         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1549         assert_return(!event_pid_changed(s->event), -ECHILD);
1550
1551         if (s->prepare == callback)
1552                 return 0;
1553
1554         if (callback && s->prepare) {
1555                 s->prepare = callback;
1556                 return 0;
1557         }
1558
1559         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1560         if (r < 0)
1561                 return r;
1562
1563         s->prepare = callback;
1564
1565         if (callback) {
1566                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1567                 if (r < 0)
1568                         return r;
1569         } else
1570                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1571
1572         return 0;
1573 }
1574
1575 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1576         assert_return(s, NULL);
1577
1578         return s->userdata;
1579 }
1580
1581 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1582         void *ret;
1583
1584         assert_return(s, NULL);
1585
1586         ret = s->userdata;
1587         s->userdata = userdata;
1588
1589         return ret;
1590 }
1591
1592 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1593         usec_t c;
1594         assert(e);
1595         assert(a <= b);
1596
1597         if (a <= 0)
1598                 return 0;
1599
1600         if (b <= a + 1)
1601                 return a;
1602
1603         initialize_perturb(e);
1604
1605         /*
1606           Find a good time to wake up again between times a and b. We
1607           have two goals here:
1608
1609           a) We want to wake up as seldom as possible, hence prefer
1610              later times over earlier times.
1611
1612           b) But if we have to wake up, then let's make sure to
1613              dispatch as much as possible on the entire system.
1614
1615           We implement this by waking up everywhere at the same time
1616           within any given minute if we can, synchronised via the
1617           perturbation value determined from the boot ID. If we can't,
1618           then we try to find the same spot in every 10s, then 1s and
1619           then 250ms step. Otherwise, we pick the last possible time
1620           to wake up.
1621         */
1622
1623         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1624         if (c >= b) {
1625                 if (_unlikely_(c < USEC_PER_MINUTE))
1626                         return b;
1627
1628                 c -= USEC_PER_MINUTE;
1629         }
1630
1631         if (c >= a)
1632                 return c;
1633
1634         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1635         if (c >= b) {
1636                 if (_unlikely_(c < USEC_PER_SEC*10))
1637                         return b;
1638
1639                 c -= USEC_PER_SEC*10;
1640         }
1641
1642         if (c >= a)
1643                 return c;
1644
1645         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1646         if (c >= b) {
1647                 if (_unlikely_(c < USEC_PER_SEC))
1648                         return b;
1649
1650                 c -= USEC_PER_SEC;
1651         }
1652
1653         if (c >= a)
1654                 return c;
1655
1656         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1657         if (c >= b) {
1658                 if (_unlikely_(c < USEC_PER_MSEC*250))
1659                         return b;
1660
1661                 c -= USEC_PER_MSEC*250;
1662         }
1663
1664         if (c >= a)
1665                 return c;
1666
1667         return b;
1668 }
1669
1670 static int event_arm_timer(
1671                 sd_event *e,
1672                 struct clock_data *d) {
1673
1674         struct itimerspec its = {};
1675         sd_event_source *a, *b;
1676         usec_t t;
1677         int r;
1678
1679         assert(e);
1680         assert(d);
1681
1682         a = prioq_peek(d->earliest);
1683         if (!a || a->enabled == SD_EVENT_OFF) {
1684
1685                 if (d->fd < 0)
1686                         return 0;
1687
1688                 if (d->next == (usec_t) -1)
1689                         return 0;
1690
1691                 /* disarm */
1692                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1693                 if (r < 0)
1694                         return r;
1695
1696                 d->next = (usec_t) -1;
1697                 return 0;
1698         }
1699
1700         b = prioq_peek(d->latest);
1701         assert_se(b && b->enabled != SD_EVENT_OFF);
1702
1703         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1704         if (d->next == t)
1705                 return 0;
1706
1707         assert_se(d->fd >= 0);
1708
1709         if (t == 0) {
1710                 /* We don' want to disarm here, just mean some time looooong ago. */
1711                 its.it_value.tv_sec = 0;
1712                 its.it_value.tv_nsec = 1;
1713         } else
1714                 timespec_store(&its.it_value, t);
1715
1716         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1717         if (r < 0)
1718                 return -errno;
1719
1720         d->next = t;
1721         return 0;
1722 }
1723
1724 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1725         assert(e);
1726         assert(s);
1727         assert(s->type == SOURCE_IO);
1728
1729         /* If the event source was already pending, we just OR in the
1730          * new revents, otherwise we reset the value. The ORing is
1731          * necessary to handle EPOLLONESHOT events properly where
1732          * readability might happen independently of writability, and
1733          * we need to keep track of both */
1734
1735         if (s->pending)
1736                 s->io.revents |= revents;
1737         else
1738                 s->io.revents = revents;
1739
1740         return source_set_pending(s, true);
1741 }
1742
1743 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1744         uint64_t x;
1745         ssize_t ss;
1746
1747         assert(e);
1748         assert(fd >= 0);
1749
1750         assert_return(events == EPOLLIN, -EIO);
1751
1752         ss = read(fd, &x, sizeof(x));
1753         if (ss < 0) {
1754                 if (errno == EAGAIN || errno == EINTR)
1755                         return 0;
1756
1757                 return -errno;
1758         }
1759
1760         if (_unlikely_(ss != sizeof(x)))
1761                 return -EIO;
1762
1763         if (next)
1764                 *next = (usec_t) -1;
1765
1766         return 0;
1767 }
1768
1769 static int process_timer(
1770                 sd_event *e,
1771                 usec_t n,
1772                 struct clock_data *d) {
1773
1774         sd_event_source *s;
1775         int r;
1776
1777         assert(e);
1778         assert(d);
1779
1780         for (;;) {
1781                 s = prioq_peek(d->earliest);
1782                 if (!s ||
1783                     s->time.next > n ||
1784                     s->enabled == SD_EVENT_OFF ||
1785                     s->pending)
1786                         break;
1787
1788                 r = source_set_pending(s, true);
1789                 if (r < 0)
1790                         return r;
1791
1792                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1793                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1794         }
1795
1796         return 0;
1797 }
1798
1799 static int process_child(sd_event *e) {
1800         sd_event_source *s;
1801         Iterator i;
1802         int r;
1803
1804         assert(e);
1805
1806         e->need_process_child = false;
1807
1808         /*
1809            So, this is ugly. We iteratively invoke waitid() with P_PID
1810            + WNOHANG for each PID we wait for, instead of using
1811            P_ALL. This is because we only want to get child
1812            information of very specific child processes, and not all
1813            of them. We might not have processed the SIGCHLD even of a
1814            previous invocation and we don't want to maintain a
1815            unbounded *per-child* event queue, hence we really don't
1816            want anything flushed out of the kernel's queue that we
1817            don't care about. Since this is O(n) this means that if you
1818            have a lot of processes you probably want to handle SIGCHLD
1819            yourself.
1820
1821            We do not reap the children here (by using WNOWAIT), this
1822            is only done after the event source is dispatched so that
1823            the callback still sees the process as a zombie.
1824         */
1825
1826         HASHMAP_FOREACH(s, e->child_sources, i) {
1827                 assert(s->type == SOURCE_CHILD);
1828
1829                 if (s->pending)
1830                         continue;
1831
1832                 if (s->enabled == SD_EVENT_OFF)
1833                         continue;
1834
1835                 zero(s->child.siginfo);
1836                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1837                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1838                 if (r < 0)
1839                         return -errno;
1840
1841                 if (s->child.siginfo.si_pid != 0) {
1842                         bool zombie =
1843                                 s->child.siginfo.si_code == CLD_EXITED ||
1844                                 s->child.siginfo.si_code == CLD_KILLED ||
1845                                 s->child.siginfo.si_code == CLD_DUMPED;
1846
1847                         if (!zombie && (s->child.options & WEXITED)) {
1848                                 /* If the child isn't dead then let's
1849                                  * immediately remove the state change
1850                                  * from the queue, since there's no
1851                                  * benefit in leaving it queued */
1852
1853                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1854                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1855                         }
1856
1857                         r = source_set_pending(s, true);
1858                         if (r < 0)
1859                                 return r;
1860                 }
1861         }
1862
1863         return 0;
1864 }
1865
1866 static int process_signal(sd_event *e, uint32_t events) {
1867         bool read_one = false;
1868         int r;
1869
1870         assert(e);
1871         assert(e->signal_sources);
1872
1873         assert_return(events == EPOLLIN, -EIO);
1874
1875         for (;;) {
1876                 struct signalfd_siginfo si;
1877                 ssize_t ss;
1878                 sd_event_source *s;
1879
1880                 ss = read(e->signal_fd, &si, sizeof(si));
1881                 if (ss < 0) {
1882                         if (errno == EAGAIN || errno == EINTR)
1883                                 return read_one;
1884
1885                         return -errno;
1886                 }
1887
1888                 if (_unlikely_(ss != sizeof(si)))
1889                         return -EIO;
1890
1891                 read_one = true;
1892
1893                 s = e->signal_sources[si.ssi_signo];
1894                 if (si.ssi_signo == SIGCHLD) {
1895                         r = process_child(e);
1896                         if (r < 0)
1897                                 return r;
1898                         if (r > 0 || !s)
1899                                 continue;
1900                 } else
1901                         if (!s)
1902                                 return -EIO;
1903
1904                 s->signal.siginfo = si;
1905                 r = source_set_pending(s, true);
1906                 if (r < 0)
1907                         return r;
1908         }
1909 }
1910
1911 static int source_dispatch(sd_event_source *s) {
1912         int r = 0;
1913
1914         assert(s);
1915         assert(s->pending || s->type == SOURCE_EXIT);
1916
1917         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1918                 r = source_set_pending(s, false);
1919                 if (r < 0)
1920                         return r;
1921         }
1922
1923         if (s->type != SOURCE_POST) {
1924                 sd_event_source *z;
1925                 Iterator i;
1926
1927                 /* If we execute a non-post source, let's mark all
1928                  * post sources as pending */
1929
1930                 SET_FOREACH(z, s->event->post_sources, i) {
1931                         if (z->enabled == SD_EVENT_OFF)
1932                                 continue;
1933
1934                         r = source_set_pending(z, true);
1935                         if (r < 0)
1936                                 return r;
1937                 }
1938         }
1939
1940         if (s->enabled == SD_EVENT_ONESHOT) {
1941                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1942                 if (r < 0)
1943                         return r;
1944         }
1945
1946         s->dispatching = true;
1947
1948         switch (s->type) {
1949
1950         case SOURCE_IO:
1951                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1952                 break;
1953
1954         case SOURCE_TIME_REALTIME:
1955         case SOURCE_TIME_MONOTONIC:
1956         case SOURCE_TIME_REALTIME_ALARM:
1957         case SOURCE_TIME_BOOTTIME_ALARM:
1958                 r = s->time.callback(s, s->time.next, s->userdata);
1959                 break;
1960
1961         case SOURCE_SIGNAL:
1962                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1963                 break;
1964
1965         case SOURCE_CHILD: {
1966                 bool zombie;
1967
1968                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1969                          s->child.siginfo.si_code == CLD_KILLED ||
1970                          s->child.siginfo.si_code == CLD_DUMPED;
1971
1972                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1973
1974                 /* Now, reap the PID for good. */
1975                 if (zombie)
1976                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1977
1978                 break;
1979         }
1980
1981         case SOURCE_DEFER:
1982                 r = s->defer.callback(s, s->userdata);
1983                 break;
1984
1985         case SOURCE_POST:
1986                 r = s->post.callback(s, s->userdata);
1987                 break;
1988
1989         case SOURCE_EXIT:
1990                 r = s->exit.callback(s, s->userdata);
1991                 break;
1992
1993         case SOURCE_WATCHDOG:
1994         case _SOUFCE_EVENT_SOURCE_TYPE_MAX:
1995         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
1996                 assert_not_reached("Wut? I shouldn't exist.");
1997         }
1998
1999         s->dispatching = false;
2000
2001         if (r < 0)
2002                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2003
2004         if (s->n_ref == 0)
2005                 source_free(s);
2006         else if (r < 0)
2007                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2008
2009         return 1;
2010 }
2011
2012 static int event_prepare(sd_event *e) {
2013         int r;
2014
2015         assert(e);
2016
2017         for (;;) {
2018                 sd_event_source *s;
2019
2020                 s = prioq_peek(e->prepare);
2021                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2022                         break;
2023
2024                 s->prepare_iteration = e->iteration;
2025                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2026                 if (r < 0)
2027                         return r;
2028
2029                 assert(s->prepare);
2030
2031                 s->dispatching = true;
2032                 r = s->prepare(s, s->userdata);
2033                 s->dispatching = false;
2034
2035                 if (r < 0)
2036                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2037
2038                 if (s->n_ref == 0)
2039                         source_free(s);
2040                 else if (r < 0)
2041                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2042         }
2043
2044         return 0;
2045 }
2046
2047 static int dispatch_exit(sd_event *e) {
2048         sd_event_source *p;
2049         int r;
2050
2051         assert(e);
2052
2053         p = prioq_peek(e->exit);
2054         if (!p || p->enabled == SD_EVENT_OFF) {
2055                 e->state = SD_EVENT_FINISHED;
2056                 return 0;
2057         }
2058
2059         sd_event_ref(e);
2060         e->iteration++;
2061         e->state = SD_EVENT_EXITING;
2062
2063         r = source_dispatch(p);
2064
2065         e->state = SD_EVENT_PASSIVE;
2066         sd_event_unref(e);
2067
2068         return r;
2069 }
2070
2071 static sd_event_source* event_next_pending(sd_event *e) {
2072         sd_event_source *p;
2073
2074         assert(e);
2075
2076         p = prioq_peek(e->pending);
2077         if (!p)
2078                 return NULL;
2079
2080         if (p->enabled == SD_EVENT_OFF)
2081                 return NULL;
2082
2083         return p;
2084 }
2085
2086 static int arm_watchdog(sd_event *e) {
2087         struct itimerspec its = {};
2088         usec_t t;
2089         int r;
2090
2091         assert(e);
2092         assert(e->watchdog_fd >= 0);
2093
2094         t = sleep_between(e,
2095                           e->watchdog_last + (e->watchdog_period / 2),
2096                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2097
2098         timespec_store(&its.it_value, t);
2099
2100         /* Make sure we never set the watchdog to 0, which tells the
2101          * kernel to disable it. */
2102         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2103                 its.it_value.tv_nsec = 1;
2104
2105         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2106         if (r < 0)
2107                 return -errno;
2108
2109         return 0;
2110 }
2111
2112 static int process_watchdog(sd_event *e) {
2113         assert(e);
2114
2115         if (!e->watchdog)
2116                 return 0;
2117
2118         /* Don't notify watchdog too often */
2119         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2120                 return 0;
2121
2122         sd_notify(false, "WATCHDOG=1");
2123         e->watchdog_last = e->timestamp.monotonic;
2124
2125         return arm_watchdog(e);
2126 }
2127
2128 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2129         struct epoll_event *ev_queue;
2130         unsigned ev_queue_max;
2131         sd_event_source *p;
2132         int r, i, m;
2133
2134         assert_return(e, -EINVAL);
2135         assert_return(!event_pid_changed(e), -ECHILD);
2136         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2137         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2138
2139         if (e->exit_requested)
2140                 return dispatch_exit(e);
2141
2142         sd_event_ref(e);
2143         e->iteration++;
2144         e->state = SD_EVENT_RUNNING;
2145
2146         r = event_prepare(e);
2147         if (r < 0)
2148                 goto finish;
2149
2150         r = event_arm_timer(e, &e->realtime);
2151         if (r < 0)
2152                 goto finish;
2153
2154         r = event_arm_timer(e, &e->monotonic);
2155         if (r < 0)
2156                 goto finish;
2157
2158         r = event_arm_timer(e, &e->realtime_alarm);
2159         if (r < 0)
2160                 goto finish;
2161
2162         r = event_arm_timer(e, &e->boottime_alarm);
2163         if (r < 0)
2164                 goto finish;
2165
2166         if (event_next_pending(e) || e->need_process_child)
2167                 timeout = 0;
2168
2169         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2170         ev_queue = newa(struct epoll_event, ev_queue_max);
2171
2172         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2173                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2174         if (m < 0) {
2175                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2176                 goto finish;
2177         }
2178
2179         dual_timestamp_get(&e->timestamp);
2180         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2181
2182         for (i = 0; i < m; i++) {
2183
2184                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2185                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2186                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2187                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2188                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2189                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2190                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2191                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2192                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2193                         r = process_signal(e, ev_queue[i].events);
2194                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2195                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2196                 else
2197                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2198
2199                 if (r < 0)
2200                         goto finish;
2201         }
2202
2203         r = process_watchdog(e);
2204         if (r < 0)
2205                 goto finish;
2206
2207         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2208         if (r < 0)
2209                 goto finish;
2210
2211         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2212         if (r < 0)
2213                 goto finish;
2214
2215         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2216         if (r < 0)
2217                 goto finish;
2218
2219         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2220         if (r < 0)
2221                 goto finish;
2222
2223         if (e->need_process_child) {
2224                 r = process_child(e);
2225                 if (r < 0)
2226                         goto finish;
2227         }
2228
2229         p = event_next_pending(e);
2230         if (!p) {
2231                 r = 1;
2232                 goto finish;
2233         }
2234
2235         r = source_dispatch(p);
2236
2237 finish:
2238         e->state = SD_EVENT_PASSIVE;
2239         sd_event_unref(e);
2240
2241         return r;
2242 }
2243
2244 _public_ int sd_event_loop(sd_event *e) {
2245         int r;
2246
2247         assert_return(e, -EINVAL);
2248         assert_return(!event_pid_changed(e), -ECHILD);
2249         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2250
2251         sd_event_ref(e);
2252
2253         while (e->state != SD_EVENT_FINISHED) {
2254                 r = sd_event_run(e, (uint64_t) -1);
2255                 if (r < 0)
2256                         goto finish;
2257         }
2258
2259         r = e->exit_code;
2260
2261 finish:
2262         sd_event_unref(e);
2263         return r;
2264 }
2265
2266 _public_ int sd_event_get_state(sd_event *e) {
2267         assert_return(e, -EINVAL);
2268         assert_return(!event_pid_changed(e), -ECHILD);
2269
2270         return e->state;
2271 }
2272
2273 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2274         assert_return(e, -EINVAL);
2275         assert_return(code, -EINVAL);
2276         assert_return(!event_pid_changed(e), -ECHILD);
2277
2278         if (!e->exit_requested)
2279                 return -ENODATA;
2280
2281         *code = e->exit_code;
2282         return 0;
2283 }
2284
2285 _public_ int sd_event_exit(sd_event *e, int code) {
2286         assert_return(e, -EINVAL);
2287         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2288         assert_return(!event_pid_changed(e), -ECHILD);
2289
2290         e->exit_requested = true;
2291         e->exit_code = code;
2292
2293         return 0;
2294 }
2295
2296 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2297         assert_return(e, -EINVAL);
2298         assert_return(usec, -EINVAL);
2299         assert_return(!event_pid_changed(e), -ECHILD);
2300
2301         /* If we haven't run yet, just get the actual time */
2302         if (!dual_timestamp_is_set(&e->timestamp))
2303                 return -ENODATA;
2304
2305         switch (clock) {
2306
2307         case CLOCK_REALTIME:
2308         case CLOCK_REALTIME_ALARM:
2309                 *usec = e->timestamp.realtime;
2310                 break;
2311
2312         case CLOCK_MONOTONIC:
2313                 *usec = e->timestamp.monotonic;
2314                 break;
2315
2316         case CLOCK_BOOTTIME_ALARM:
2317                 *usec = e->timestamp_boottime;
2318                 break;
2319         }
2320
2321         return 0;
2322 }
2323
2324 _public_ int sd_event_default(sd_event **ret) {
2325
2326         static thread_local sd_event *default_event = NULL;
2327         sd_event *e = NULL;
2328         int r;
2329
2330         if (!ret)
2331                 return !!default_event;
2332
2333         if (default_event) {
2334                 *ret = sd_event_ref(default_event);
2335                 return 0;
2336         }
2337
2338         r = sd_event_new(&e);
2339         if (r < 0)
2340                 return r;
2341
2342         e->default_event_ptr = &default_event;
2343         e->tid = gettid();
2344         default_event = e;
2345
2346         *ret = e;
2347         return 1;
2348 }
2349
2350 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2351         assert_return(e, -EINVAL);
2352         assert_return(tid, -EINVAL);
2353         assert_return(!event_pid_changed(e), -ECHILD);
2354
2355         if (e->tid != 0) {
2356                 *tid = e->tid;
2357                 return 0;
2358         }
2359
2360         return -ENXIO;
2361 }
2362
2363 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2364         int r;
2365
2366         assert_return(e, -EINVAL);
2367         assert_return(!event_pid_changed(e), -ECHILD);
2368
2369         if (e->watchdog == !!b)
2370                 return e->watchdog;
2371
2372         if (b) {
2373                 struct epoll_event ev = {};
2374
2375                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2376                 if (r <= 0)
2377                         return r;
2378
2379                 /* Issue first ping immediately */
2380                 sd_notify(false, "WATCHDOG=1");
2381                 e->watchdog_last = now(CLOCK_MONOTONIC);
2382
2383                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2384                 if (e->watchdog_fd < 0)
2385                         return -errno;
2386
2387                 r = arm_watchdog(e);
2388                 if (r < 0)
2389                         goto fail;
2390
2391                 ev.events = EPOLLIN;
2392                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2393
2394                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2395                 if (r < 0) {
2396                         r = -errno;
2397                         goto fail;
2398                 }
2399
2400         } else {
2401                 if (e->watchdog_fd >= 0) {
2402                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2403                         e->watchdog_fd = safe_close(e->watchdog_fd);
2404                 }
2405         }
2406
2407         e->watchdog = !!b;
2408         return e->watchdog;
2409
2410 fail:
2411         e->watchdog_fd = safe_close(e->watchdog_fd);
2412         return r;
2413 }
2414
2415 _public_ int sd_event_get_watchdog(sd_event *e) {
2416         assert_return(e, -EINVAL);
2417         assert_return(!event_pid_changed(e), -ECHILD);
2418
2419         return e->watchdog;
2420 }