chiark / gitweb /
47970879da162c28d841ffecdcb4449c86f96088
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36
37 #include "sd-event.h"
38
39 #define EPOLL_QUEUE_MAX 512U
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_MONOTONIC,
46         SOURCE_TIME_REALTIME_ALARM,
47         SOURCE_TIME_BOOTTIME_ALARM,
48         SOURCE_SIGNAL,
49         SOURCE_CHILD,
50         SOURCE_DEFER,
51         SOURCE_POST,
52         SOURCE_EXIT,
53         SOURCE_WATCHDOG,
54         _SOUFCE_EVENT_SOURCE_TYPE_MAX,
55         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
56 } EventSourceType;
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event_source {
61         unsigned n_ref;
62
63         sd_event *event;
64         void *userdata;
65         sd_event_handler_t prepare;
66
67         EventSourceType type:5;
68         int enabled:3;
69         bool pending:1;
70         bool dispatching:1;
71
72         int64_t priority;
73         unsigned pending_index;
74         unsigned prepare_index;
75         unsigned pending_iteration;
76         unsigned prepare_iteration;
77
78         union {
79                 struct {
80                         sd_event_io_handler_t callback;
81                         int fd;
82                         uint32_t events;
83                         uint32_t revents;
84                         bool registered:1;
85                 } io;
86                 struct {
87                         sd_event_time_handler_t callback;
88                         usec_t next, accuracy;
89                         unsigned earliest_index;
90                         unsigned latest_index;
91                 } time;
92                 struct {
93                         sd_event_signal_handler_t callback;
94                         struct signalfd_siginfo siginfo;
95                         int sig;
96                 } signal;
97                 struct {
98                         sd_event_child_handler_t callback;
99                         siginfo_t siginfo;
100                         pid_t pid;
101                         int options;
102                 } child;
103                 struct {
104                         sd_event_handler_t callback;
105                 } defer;
106                 struct {
107                         sd_event_handler_t callback;
108                 } post;
109                 struct {
110                         sd_event_handler_t callback;
111                         unsigned prioq_index;
112                 } exit;
113         };
114 };
115
116 struct clock_data {
117         int fd;
118
119         /* For all clocks we maintain two priority queues each, one
120          * ordered for the earliest times the events may be
121          * dispatched, and one ordered by the latest times they must
122          * have been dispatched. The range between the top entries in
123          * the two prioqs is the time window we can freely schedule
124          * wakeups in */
125
126         Prioq *earliest;
127         Prioq *latest;
128         usec_t next;
129 };
130
131 struct sd_event {
132         unsigned n_ref;
133
134         int epoll_fd;
135         int signal_fd;
136         int watchdog_fd;
137
138         Prioq *pending;
139         Prioq *prepare;
140
141         /* timerfd_create() only supports these four clocks so far. We
142          * can add support for more clocks when the kernel learns to
143          * deal with them, too. */
144         struct clock_data realtime;
145         struct clock_data monotonic;
146         struct clock_data realtime_alarm;
147         struct clock_data boottime_alarm;
148
149         usec_t perturb;
150
151         sigset_t sigset;
152         sd_event_source **signal_sources;
153
154         Hashmap *child_sources;
155         unsigned n_enabled_child_sources;
156
157         Set *post_sources;
158
159         Prioq *exit;
160
161         pid_t original_pid;
162
163         unsigned iteration;
164         dual_timestamp timestamp;
165         usec_t timestamp_boottime;
166         int state;
167
168         bool exit_requested:1;
169         bool need_process_child:1;
170         bool watchdog:1;
171
172         int exit_code;
173
174         pid_t tid;
175         sd_event **default_event_ptr;
176
177         usec_t watchdog_last, watchdog_period;
178
179         unsigned n_sources;
180 };
181
182 static int pending_prioq_compare(const void *a, const void *b) {
183         const sd_event_source *x = a, *y = b;
184
185         assert(x->pending);
186         assert(y->pending);
187
188         /* Enabled ones first */
189         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190                 return -1;
191         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192                 return 1;
193
194         /* Lower priority values first */
195         if (x->priority < y->priority)
196                 return -1;
197         if (x->priority > y->priority)
198                 return 1;
199
200         /* Older entries first */
201         if (x->pending_iteration < y->pending_iteration)
202                 return -1;
203         if (x->pending_iteration > y->pending_iteration)
204                 return 1;
205
206         /* Stability for the rest */
207         if (x < y)
208                 return -1;
209         if (x > y)
210                 return 1;
211
212         return 0;
213 }
214
215 static int prepare_prioq_compare(const void *a, const void *b) {
216         const sd_event_source *x = a, *y = b;
217
218         assert(x->prepare);
219         assert(y->prepare);
220
221         /* Move most recently prepared ones last, so that we can stop
222          * preparing as soon as we hit one that has already been
223          * prepared in the current iteration */
224         if (x->prepare_iteration < y->prepare_iteration)
225                 return -1;
226         if (x->prepare_iteration > y->prepare_iteration)
227                 return 1;
228
229         /* Enabled ones first */
230         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
231                 return -1;
232         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233                 return 1;
234
235         /* Lower priority values first */
236         if (x->priority < y->priority)
237                 return -1;
238         if (x->priority > y->priority)
239                 return 1;
240
241         /* Stability for the rest */
242         if (x < y)
243                 return -1;
244         if (x > y)
245                 return 1;
246
247         return 0;
248 }
249
250 static int earliest_time_prioq_compare(const void *a, const void *b) {
251         const sd_event_source *x = a, *y = b;
252
253         assert(EVENT_SOURCE_IS_TIME(x->type));
254         assert(x->type == y->type);
255
256         /* Enabled ones first */
257         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
258                 return -1;
259         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260                 return 1;
261
262         /* Move the pending ones to the end */
263         if (!x->pending && y->pending)
264                 return -1;
265         if (x->pending && !y->pending)
266                 return 1;
267
268         /* Order by time */
269         if (x->time.next < y->time.next)
270                 return -1;
271         if (x->time.next > y->time.next)
272                 return 1;
273
274         /* Stability for the rest */
275         if (x < y)
276                 return -1;
277         if (x > y)
278                 return 1;
279
280         return 0;
281 }
282
283 static int latest_time_prioq_compare(const void *a, const void *b) {
284         const sd_event_source *x = a, *y = b;
285
286         assert(EVENT_SOURCE_IS_TIME(x->type));
287         assert(x->type == y->type);
288
289         /* Enabled ones first */
290         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
291                 return -1;
292         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293                 return 1;
294
295         /* Move the pending ones to the end */
296         if (!x->pending && y->pending)
297                 return -1;
298         if (x->pending && !y->pending)
299                 return 1;
300
301         /* Order by time */
302         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
303                 return -1;
304         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
305                 return 1;
306
307         /* Stability for the rest */
308         if (x < y)
309                 return -1;
310         if (x > y)
311                 return 1;
312
313         return 0;
314 }
315
316 static int exit_prioq_compare(const void *a, const void *b) {
317         const sd_event_source *x = a, *y = b;
318
319         assert(x->type == SOURCE_EXIT);
320         assert(y->type == SOURCE_EXIT);
321
322         /* Enabled ones first */
323         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
324                 return -1;
325         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
326                 return 1;
327
328         /* Lower priority values first */
329         if (x->priority < y->priority)
330                 return -1;
331         if (x->priority > y->priority)
332                 return 1;
333
334         /* Stability for the rest */
335         if (x < y)
336                 return -1;
337         if (x > y)
338                 return 1;
339
340         return 0;
341 }
342
343 static void free_clock_data(struct clock_data *d) {
344         assert(d);
345
346         safe_close(d->fd);
347         prioq_free(d->earliest);
348         prioq_free(d->latest);
349 }
350
351 static void event_free(sd_event *e) {
352         assert(e);
353         assert(e->n_sources == 0);
354
355         if (e->default_event_ptr)
356                 *(e->default_event_ptr) = NULL;
357
358         safe_close(e->epoll_fd);
359         safe_close(e->signal_fd);
360         safe_close(e->watchdog_fd);
361
362         free_clock_data(&e->realtime);
363         free_clock_data(&e->monotonic);
364         free_clock_data(&e->realtime_alarm);
365         free_clock_data(&e->boottime_alarm);
366
367         prioq_free(e->pending);
368         prioq_free(e->prepare);
369         prioq_free(e->exit);
370
371         free(e->signal_sources);
372
373         hashmap_free(e->child_sources);
374         set_free(e->post_sources);
375         free(e);
376 }
377
378 _public_ int sd_event_new(sd_event** ret) {
379         sd_event *e;
380         int r;
381
382         assert_return(ret, -EINVAL);
383
384         e = new0(sd_event, 1);
385         if (!e)
386                 return -ENOMEM;
387
388         e->n_ref = 1;
389         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
390         e->realtime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = (usec_t) -1;
391         e->original_pid = getpid();
392         e->perturb = (usec_t) -1;
393
394         assert_se(sigemptyset(&e->sigset) == 0);
395
396         e->pending = prioq_new(pending_prioq_compare);
397         if (!e->pending) {
398                 r = -ENOMEM;
399                 goto fail;
400         }
401
402         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
403         if (e->epoll_fd < 0) {
404                 r = -errno;
405                 goto fail;
406         }
407
408         *ret = e;
409         return 0;
410
411 fail:
412         event_free(e);
413         return r;
414 }
415
416 _public_ sd_event* sd_event_ref(sd_event *e) {
417         assert_return(e, NULL);
418
419         assert(e->n_ref >= 1);
420         e->n_ref++;
421
422         return e;
423 }
424
425 _public_ sd_event* sd_event_unref(sd_event *e) {
426
427         if (!e)
428                 return NULL;
429
430         assert(e->n_ref >= 1);
431         e->n_ref--;
432
433         if (e->n_ref <= 0)
434                 event_free(e);
435
436         return NULL;
437 }
438
439 static bool event_pid_changed(sd_event *e) {
440         assert(e);
441
442         /* We don't support people creating am event loop and keeping
443          * it around over a fork(). Let's complain. */
444
445         return e->original_pid != getpid();
446 }
447
448 static int source_io_unregister(sd_event_source *s) {
449         int r;
450
451         assert(s);
452         assert(s->type == SOURCE_IO);
453
454         if (!s->io.registered)
455                 return 0;
456
457         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
458         if (r < 0)
459                 return -errno;
460
461         s->io.registered = false;
462         return 0;
463 }
464
465 static int source_io_register(
466                 sd_event_source *s,
467                 int enabled,
468                 uint32_t events) {
469
470         struct epoll_event ev = {};
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475         assert(enabled != SD_EVENT_OFF);
476
477         ev.events = events;
478         ev.data.ptr = s;
479
480         if (enabled == SD_EVENT_ONESHOT)
481                 ev.events |= EPOLLONESHOT;
482
483         if (s->io.registered)
484                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
485         else
486                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
487
488         if (r < 0)
489                 return -errno;
490
491         s->io.registered = true;
492
493         return 0;
494 }
495
496 static clockid_t event_source_type_to_clock(EventSourceType t) {
497
498         switch (t) {
499
500         case SOURCE_TIME_REALTIME:
501                 return CLOCK_REALTIME;
502
503         case SOURCE_TIME_MONOTONIC:
504                 return CLOCK_MONOTONIC;
505
506         case SOURCE_TIME_REALTIME_ALARM:
507                 return CLOCK_REALTIME_ALARM;
508
509         case SOURCE_TIME_BOOTTIME_ALARM:
510                 return CLOCK_BOOTTIME_ALARM;
511
512         default:
513                 return (clockid_t) -1;
514         }
515 }
516
517 static EventSourceType clock_to_event_source_type(clockid_t clock) {
518
519         switch (clock) {
520
521         case CLOCK_REALTIME:
522                 return SOURCE_TIME_REALTIME;
523
524         case CLOCK_MONOTONIC:
525                 return SOURCE_TIME_MONOTONIC;
526
527         case CLOCK_REALTIME_ALARM:
528                 return SOURCE_TIME_REALTIME_ALARM;
529
530         case CLOCK_BOOTTIME_ALARM:
531                 return SOURCE_TIME_BOOTTIME_ALARM;
532
533         default:
534                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
535         }
536 }
537
538 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
539         assert(e);
540
541         switch (t) {
542
543         case SOURCE_TIME_REALTIME:
544                 return &e->realtime;
545
546         case SOURCE_TIME_MONOTONIC:
547                 return &e->monotonic;
548
549         case SOURCE_TIME_REALTIME_ALARM:
550                 return &e->realtime_alarm;
551
552         case SOURCE_TIME_BOOTTIME_ALARM:
553                 return &e->boottime_alarm;
554
555         default:
556                 return NULL;
557         }
558 }
559
560 static void source_free(sd_event_source *s) {
561         assert(s);
562
563         if (s->event) {
564                 assert(s->event->n_sources > 0);
565
566                 switch (s->type) {
567
568                 case SOURCE_IO:
569                         if (s->io.fd >= 0)
570                                 source_io_unregister(s);
571
572                         break;
573
574                 case SOURCE_TIME_REALTIME:
575                 case SOURCE_TIME_MONOTONIC:
576                 case SOURCE_TIME_REALTIME_ALARM:
577                 case SOURCE_TIME_BOOTTIME_ALARM: {
578                         struct clock_data *d;
579
580                         d = event_get_clock_data(s->event, s->type);
581                         assert(d);
582
583                         prioq_remove(d->earliest, s, &s->time.earliest_index);
584                         prioq_remove(d->latest, s, &s->time.latest_index);
585                         break;
586                 }
587
588                 case SOURCE_SIGNAL:
589                         if (s->signal.sig > 0) {
590                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
591                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
592
593                                 if (s->event->signal_sources)
594                                         s->event->signal_sources[s->signal.sig] = NULL;
595                         }
596
597                         break;
598
599                 case SOURCE_CHILD:
600                         if (s->child.pid > 0) {
601                                 if (s->enabled != SD_EVENT_OFF) {
602                                         assert(s->event->n_enabled_child_sources > 0);
603                                         s->event->n_enabled_child_sources--;
604                                 }
605
606                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
607                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
608
609                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
610                         }
611
612                         break;
613
614                 case SOURCE_DEFER:
615                         /* nothing */
616                         break;
617
618                 case SOURCE_POST:
619                         set_remove(s->event->post_sources, s);
620                         break;
621
622                 case SOURCE_EXIT:
623                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
624                         break;
625
626                 default:
627                         assert_not_reached("Wut? I shouldn't exist.");
628                 }
629
630                 if (s->pending)
631                         prioq_remove(s->event->pending, s, &s->pending_index);
632
633                 if (s->prepare)
634                         prioq_remove(s->event->prepare, s, &s->prepare_index);
635
636                 s->event->n_sources--;
637                 sd_event_unref(s->event);
638         }
639
640         free(s);
641 }
642
643 static int source_set_pending(sd_event_source *s, bool b) {
644         int r;
645
646         assert(s);
647         assert(s->type != SOURCE_EXIT);
648
649         if (s->pending == b)
650                 return 0;
651
652         s->pending = b;
653
654         if (b) {
655                 s->pending_iteration = s->event->iteration;
656
657                 r = prioq_put(s->event->pending, s, &s->pending_index);
658                 if (r < 0) {
659                         s->pending = false;
660                         return r;
661                 }
662         } else
663                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
664
665         if (EVENT_SOURCE_IS_TIME(s->type)) {
666                 struct clock_data *d;
667
668                 d = event_get_clock_data(s->event, s->type);
669                 assert(d);
670
671                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
672                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
673         }
674
675         return 0;
676 }
677
678 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
679         sd_event_source *s;
680
681         assert(e);
682
683         s = new0(sd_event_source, 1);
684         if (!s)
685                 return NULL;
686
687         s->n_ref = 1;
688         s->event = sd_event_ref(e);
689         s->type = type;
690         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
691
692         e->n_sources ++;
693
694         return s;
695 }
696
697 _public_ int sd_event_add_io(
698                 sd_event *e,
699                 sd_event_source **ret,
700                 int fd,
701                 uint32_t events,
702                 sd_event_io_handler_t callback,
703                 void *userdata) {
704
705         sd_event_source *s;
706         int r;
707
708         assert_return(e, -EINVAL);
709         assert_return(fd >= 0, -EINVAL);
710         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
711         assert_return(callback, -EINVAL);
712         assert_return(ret, -EINVAL);
713         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
714         assert_return(!event_pid_changed(e), -ECHILD);
715
716         s = source_new(e, SOURCE_IO);
717         if (!s)
718                 return -ENOMEM;
719
720         s->io.fd = fd;
721         s->io.events = events;
722         s->io.callback = callback;
723         s->userdata = userdata;
724         s->enabled = SD_EVENT_ON;
725
726         r = source_io_register(s, s->enabled, events);
727         if (r < 0) {
728                 source_free(s);
729                 return -errno;
730         }
731
732         *ret = s;
733         return 0;
734 }
735
736 static void initialize_perturb(sd_event *e) {
737         sd_id128_t bootid = {};
738
739         /* When we sleep for longer, we try to realign the wakeup to
740            the same time wihtin each minute/second/250ms, so that
741            events all across the system can be coalesced into a single
742            CPU wakeup. However, let's take some system-specific
743            randomness for this value, so that in a network of systems
744            with synced clocks timer events are distributed a
745            bit. Here, we calculate a perturbation usec offset from the
746            boot ID. */
747
748         if (_likely_(e->perturb != (usec_t) -1))
749                 return;
750
751         if (sd_id128_get_boot(&bootid) >= 0)
752                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
753 }
754
755 static int event_setup_timer_fd(
756                 sd_event *e,
757                 struct clock_data *d,
758                 clockid_t clock) {
759
760         struct epoll_event ev = {};
761         int r, fd;
762
763         assert(e);
764         assert(d);
765
766         if (_likely_(d->fd >= 0))
767                 return 0;
768
769         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
770         if (fd < 0)
771                 return -errno;
772
773         ev.events = EPOLLIN;
774         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
775
776         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
777         if (r < 0) {
778                 safe_close(fd);
779                 return -errno;
780         }
781
782         d->fd = fd;
783         return 0;
784 }
785
786 _public_ int sd_event_add_time(
787                 sd_event *e,
788                 sd_event_source **ret,
789                 clockid_t clock,
790                 uint64_t usec,
791                 uint64_t accuracy,
792                 sd_event_time_handler_t callback,
793                 void *userdata) {
794
795         EventSourceType type;
796         sd_event_source *s;
797         struct clock_data *d;
798         int r;
799
800         assert_return(e, -EINVAL);
801         assert_return(ret, -EINVAL);
802         assert_return(usec != (uint64_t) -1, -EINVAL);
803         assert_return(accuracy != (uint64_t) -1, -EINVAL);
804         assert_return(callback, -EINVAL);
805         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
806         assert_return(!event_pid_changed(e), -ECHILD);
807
808         type = clock_to_event_source_type(clock);
809         assert_return(type >= 0, -ENOTSUP);
810
811         d = event_get_clock_data(e, type);
812         assert(d);
813
814         if (!d->earliest) {
815                 d->earliest = prioq_new(earliest_time_prioq_compare);
816                 if (!d->earliest)
817                         return -ENOMEM;
818         }
819
820         if (!d->latest) {
821                 d->latest = prioq_new(latest_time_prioq_compare);
822                 if (!d->latest)
823                         return -ENOMEM;
824         }
825
826         if (d->fd < 0) {
827                 r = event_setup_timer_fd(e, d, clock);
828                 if (r < 0)
829                         return r;
830         }
831
832         s = source_new(e, type);
833         if (!s)
834                 return -ENOMEM;
835
836         s->time.next = usec;
837         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
838         s->time.callback = callback;
839         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
840         s->userdata = userdata;
841         s->enabled = SD_EVENT_ONESHOT;
842
843         r = prioq_put(d->earliest, s, &s->time.earliest_index);
844         if (r < 0)
845                 goto fail;
846
847         r = prioq_put(d->latest, s, &s->time.latest_index);
848         if (r < 0)
849                 goto fail;
850
851         *ret = s;
852         return 0;
853
854 fail:
855         source_free(s);
856         return r;
857 }
858
859 static int event_update_signal_fd(sd_event *e) {
860         struct epoll_event ev = {};
861         bool add_to_epoll;
862         int r;
863
864         assert(e);
865
866         add_to_epoll = e->signal_fd < 0;
867
868         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
869         if (r < 0)
870                 return -errno;
871
872         e->signal_fd = r;
873
874         if (!add_to_epoll)
875                 return 0;
876
877         ev.events = EPOLLIN;
878         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
879
880         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
881         if (r < 0) {
882                 e->signal_fd = safe_close(e->signal_fd);
883                 return -errno;
884         }
885
886         return 0;
887 }
888
889 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
890         assert(s);
891
892         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
893 }
894
895 _public_ int sd_event_add_signal(
896                 sd_event *e,
897                 sd_event_source **ret,
898                 int sig,
899                 sd_event_signal_handler_t callback,
900                 void *userdata) {
901
902         sd_event_source *s;
903         sigset_t ss;
904         int r;
905
906         assert_return(e, -EINVAL);
907         assert_return(sig > 0, -EINVAL);
908         assert_return(sig < _NSIG, -EINVAL);
909         assert_return(ret, -EINVAL);
910         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
911         assert_return(!event_pid_changed(e), -ECHILD);
912
913         if (!callback)
914                 callback = signal_exit_callback;
915
916         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
917         if (r < 0)
918                 return -errno;
919
920         if (!sigismember(&ss, sig))
921                 return -EBUSY;
922
923         if (!e->signal_sources) {
924                 e->signal_sources = new0(sd_event_source*, _NSIG);
925                 if (!e->signal_sources)
926                         return -ENOMEM;
927         } else if (e->signal_sources[sig])
928                 return -EBUSY;
929
930         s = source_new(e, SOURCE_SIGNAL);
931         if (!s)
932                 return -ENOMEM;
933
934         s->signal.sig = sig;
935         s->signal.callback = callback;
936         s->userdata = userdata;
937         s->enabled = SD_EVENT_ON;
938
939         e->signal_sources[sig] = s;
940         assert_se(sigaddset(&e->sigset, sig) == 0);
941
942         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
943                 r = event_update_signal_fd(e);
944                 if (r < 0) {
945                         source_free(s);
946                         return r;
947                 }
948         }
949
950         *ret = s;
951         return 0;
952 }
953
954 _public_ int sd_event_add_child(
955                 sd_event *e,
956                 sd_event_source **ret,
957                 pid_t pid,
958                 int options,
959                 sd_event_child_handler_t callback,
960                 void *userdata) {
961
962         sd_event_source *s;
963         int r;
964
965         assert_return(e, -EINVAL);
966         assert_return(pid > 1, -EINVAL);
967         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
968         assert_return(options != 0, -EINVAL);
969         assert_return(callback, -EINVAL);
970         assert_return(ret, -EINVAL);
971         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
972         assert_return(!event_pid_changed(e), -ECHILD);
973
974         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
975         if (r < 0)
976                 return r;
977
978         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
979                 return -EBUSY;
980
981         s = source_new(e, SOURCE_CHILD);
982         if (!s)
983                 return -ENOMEM;
984
985         s->child.pid = pid;
986         s->child.options = options;
987         s->child.callback = callback;
988         s->userdata = userdata;
989         s->enabled = SD_EVENT_ONESHOT;
990
991         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
992         if (r < 0) {
993                 source_free(s);
994                 return r;
995         }
996
997         e->n_enabled_child_sources ++;
998
999         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1000
1001         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1002                 r = event_update_signal_fd(e);
1003                 if (r < 0) {
1004                         source_free(s);
1005                         return -errno;
1006                 }
1007         }
1008
1009         e->need_process_child = true;
1010
1011         *ret = s;
1012         return 0;
1013 }
1014
1015 _public_ int sd_event_add_defer(
1016                 sd_event *e,
1017                 sd_event_source **ret,
1018                 sd_event_handler_t callback,
1019                 void *userdata) {
1020
1021         sd_event_source *s;
1022         int r;
1023
1024         assert_return(e, -EINVAL);
1025         assert_return(callback, -EINVAL);
1026         assert_return(ret, -EINVAL);
1027         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1028         assert_return(!event_pid_changed(e), -ECHILD);
1029
1030         s = source_new(e, SOURCE_DEFER);
1031         if (!s)
1032                 return -ENOMEM;
1033
1034         s->defer.callback = callback;
1035         s->userdata = userdata;
1036         s->enabled = SD_EVENT_ONESHOT;
1037
1038         r = source_set_pending(s, true);
1039         if (r < 0) {
1040                 source_free(s);
1041                 return r;
1042         }
1043
1044         *ret = s;
1045         return 0;
1046 }
1047
1048 _public_ int sd_event_add_post(
1049                 sd_event *e,
1050                 sd_event_source **ret,
1051                 sd_event_handler_t callback,
1052                 void *userdata) {
1053
1054         sd_event_source *s;
1055         int r;
1056
1057         assert_return(e, -EINVAL);
1058         assert_return(callback, -EINVAL);
1059         assert_return(ret, -EINVAL);
1060         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1061         assert_return(!event_pid_changed(e), -ECHILD);
1062
1063         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1064         if (r < 0)
1065                 return r;
1066
1067         s = source_new(e, SOURCE_POST);
1068         if (!s)
1069                 return -ENOMEM;
1070
1071         s->post.callback = callback;
1072         s->userdata = userdata;
1073         s->enabled = SD_EVENT_ON;
1074
1075         r = set_put(e->post_sources, s);
1076         if (r < 0) {
1077                 source_free(s);
1078                 return r;
1079         }
1080
1081         *ret = s;
1082         return 0;
1083 }
1084
1085 _public_ int sd_event_add_exit(
1086                 sd_event *e,
1087                 sd_event_source **ret,
1088                 sd_event_handler_t callback,
1089                 void *userdata) {
1090
1091         sd_event_source *s;
1092         int r;
1093
1094         assert_return(e, -EINVAL);
1095         assert_return(callback, -EINVAL);
1096         assert_return(ret, -EINVAL);
1097         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1098         assert_return(!event_pid_changed(e), -ECHILD);
1099
1100         if (!e->exit) {
1101                 e->exit = prioq_new(exit_prioq_compare);
1102                 if (!e->exit)
1103                         return -ENOMEM;
1104         }
1105
1106         s = source_new(e, SOURCE_EXIT);
1107         if (!s)
1108                 return -ENOMEM;
1109
1110         s->exit.callback = callback;
1111         s->userdata = userdata;
1112         s->exit.prioq_index = PRIOQ_IDX_NULL;
1113         s->enabled = SD_EVENT_ONESHOT;
1114
1115         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1116         if (r < 0) {
1117                 source_free(s);
1118                 return r;
1119         }
1120
1121         *ret = s;
1122         return 0;
1123 }
1124
1125 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1126         assert_return(s, NULL);
1127
1128         assert(s->n_ref >= 1);
1129         s->n_ref++;
1130
1131         return s;
1132 }
1133
1134 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1135
1136         if (!s)
1137                 return NULL;
1138
1139         assert(s->n_ref >= 1);
1140         s->n_ref--;
1141
1142         if (s->n_ref <= 0) {
1143                 /* Here's a special hack: when we are called from a
1144                  * dispatch handler we won't free the event source
1145                  * immediately, but we will detach the fd from the
1146                  * epoll. This way it is safe for the caller to unref
1147                  * the event source and immediately close the fd, but
1148                  * we still retain a valid event source object after
1149                  * the callback. */
1150
1151                 if (s->dispatching) {
1152                         if (s->type == SOURCE_IO)
1153                                 source_io_unregister(s);
1154                 } else
1155                         source_free(s);
1156         }
1157
1158         return NULL;
1159 }
1160
1161 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1162         assert_return(s, NULL);
1163
1164         return s->event;
1165 }
1166
1167 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1168         assert_return(s, -EINVAL);
1169         assert_return(s->type != SOURCE_EXIT, -EDOM);
1170         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1171         assert_return(!event_pid_changed(s->event), -ECHILD);
1172
1173         return s->pending;
1174 }
1175
1176 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1177         assert_return(s, -EINVAL);
1178         assert_return(s->type == SOURCE_IO, -EDOM);
1179         assert_return(!event_pid_changed(s->event), -ECHILD);
1180
1181         return s->io.fd;
1182 }
1183
1184 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1185         int r;
1186
1187         assert_return(s, -EINVAL);
1188         assert_return(fd >= 0, -EINVAL);
1189         assert_return(s->type == SOURCE_IO, -EDOM);
1190         assert_return(!event_pid_changed(s->event), -ECHILD);
1191
1192         if (s->io.fd == fd)
1193                 return 0;
1194
1195         if (s->enabled == SD_EVENT_OFF) {
1196                 s->io.fd = fd;
1197                 s->io.registered = false;
1198         } else {
1199                 int saved_fd;
1200
1201                 saved_fd = s->io.fd;
1202                 assert(s->io.registered);
1203
1204                 s->io.fd = fd;
1205                 s->io.registered = false;
1206
1207                 r = source_io_register(s, s->enabled, s->io.events);
1208                 if (r < 0) {
1209                         s->io.fd = saved_fd;
1210                         s->io.registered = true;
1211                         return r;
1212                 }
1213
1214                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1215         }
1216
1217         return 0;
1218 }
1219
1220 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1221         assert_return(s, -EINVAL);
1222         assert_return(events, -EINVAL);
1223         assert_return(s->type == SOURCE_IO, -EDOM);
1224         assert_return(!event_pid_changed(s->event), -ECHILD);
1225
1226         *events = s->io.events;
1227         return 0;
1228 }
1229
1230 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1231         int r;
1232
1233         assert_return(s, -EINVAL);
1234         assert_return(s->type == SOURCE_IO, -EDOM);
1235         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1236         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1237         assert_return(!event_pid_changed(s->event), -ECHILD);
1238
1239         if (s->io.events == events)
1240                 return 0;
1241
1242         if (s->enabled != SD_EVENT_OFF) {
1243                 r = source_io_register(s, s->enabled, events);
1244                 if (r < 0)
1245                         return r;
1246         }
1247
1248         s->io.events = events;
1249         source_set_pending(s, false);
1250
1251         return 0;
1252 }
1253
1254 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1255         assert_return(s, -EINVAL);
1256         assert_return(revents, -EINVAL);
1257         assert_return(s->type == SOURCE_IO, -EDOM);
1258         assert_return(s->pending, -ENODATA);
1259         assert_return(!event_pid_changed(s->event), -ECHILD);
1260
1261         *revents = s->io.revents;
1262         return 0;
1263 }
1264
1265 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1266         assert_return(s, -EINVAL);
1267         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1268         assert_return(!event_pid_changed(s->event), -ECHILD);
1269
1270         return s->signal.sig;
1271 }
1272
1273 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1274         assert_return(s, -EINVAL);
1275         assert_return(!event_pid_changed(s->event), -ECHILD);
1276
1277         return s->priority;
1278 }
1279
1280 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1281         assert_return(s, -EINVAL);
1282         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1283         assert_return(!event_pid_changed(s->event), -ECHILD);
1284
1285         if (s->priority == priority)
1286                 return 0;
1287
1288         s->priority = priority;
1289
1290         if (s->pending)
1291                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1292
1293         if (s->prepare)
1294                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1295
1296         if (s->type == SOURCE_EXIT)
1297                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1298
1299         return 0;
1300 }
1301
1302 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1303         assert_return(s, -EINVAL);
1304         assert_return(m, -EINVAL);
1305         assert_return(!event_pid_changed(s->event), -ECHILD);
1306
1307         *m = s->enabled;
1308         return 0;
1309 }
1310
1311 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1312         int r;
1313
1314         assert_return(s, -EINVAL);
1315         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1316         assert_return(!event_pid_changed(s->event), -ECHILD);
1317
1318         /* If we are dead anyway, we are fine with turning off
1319          * sources, but everything else needs to fail. */
1320         if (s->event->state == SD_EVENT_FINISHED)
1321                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1322
1323         if (s->enabled == m)
1324                 return 0;
1325
1326         if (m == SD_EVENT_OFF) {
1327
1328                 switch (s->type) {
1329
1330                 case SOURCE_IO:
1331                         r = source_io_unregister(s);
1332                         if (r < 0)
1333                                 return r;
1334
1335                         s->enabled = m;
1336                         break;
1337
1338                 case SOURCE_TIME_REALTIME:
1339                 case SOURCE_TIME_MONOTONIC:
1340                 case SOURCE_TIME_REALTIME_ALARM:
1341                 case SOURCE_TIME_BOOTTIME_ALARM: {
1342                         struct clock_data *d;
1343
1344                         s->enabled = m;
1345                         d = event_get_clock_data(s->event, s->type);
1346                         assert(d);
1347
1348                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1349                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1350                         break;
1351                 }
1352
1353                 case SOURCE_SIGNAL:
1354                         s->enabled = m;
1355                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1356                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1357                                 event_update_signal_fd(s->event);
1358                         }
1359
1360                         break;
1361
1362                 case SOURCE_CHILD:
1363                         s->enabled = m;
1364
1365                         assert(s->event->n_enabled_child_sources > 0);
1366                         s->event->n_enabled_child_sources--;
1367
1368                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1369                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1370                                 event_update_signal_fd(s->event);
1371                         }
1372
1373                         break;
1374
1375                 case SOURCE_EXIT:
1376                         s->enabled = m;
1377                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1378                         break;
1379
1380                 case SOURCE_DEFER:
1381                 case SOURCE_POST:
1382                         s->enabled = m;
1383                         break;
1384
1385                 default:
1386                         assert_not_reached("Wut? I shouldn't exist.");
1387                 }
1388
1389         } else {
1390                 switch (s->type) {
1391
1392                 case SOURCE_IO:
1393                         r = source_io_register(s, m, s->io.events);
1394                         if (r < 0)
1395                                 return r;
1396
1397                         s->enabled = m;
1398                         break;
1399
1400                 case SOURCE_TIME_REALTIME:
1401                 case SOURCE_TIME_MONOTONIC:
1402                 case SOURCE_TIME_REALTIME_ALARM:
1403                 case SOURCE_TIME_BOOTTIME_ALARM: {
1404                         struct clock_data *d;
1405
1406                         s->enabled = m;
1407                         d = event_get_clock_data(s->event, s->type);
1408                         assert(d);
1409
1410                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1411                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1412                         break;
1413                 }
1414
1415                 case SOURCE_SIGNAL:
1416                         s->enabled = m;
1417
1418                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1419                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1420                                 event_update_signal_fd(s->event);
1421                         }
1422                         break;
1423
1424                 case SOURCE_CHILD:
1425                         if (s->enabled == SD_EVENT_OFF) {
1426                                 s->event->n_enabled_child_sources++;
1427
1428                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1429                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1430                                         event_update_signal_fd(s->event);
1431                                 }
1432                         }
1433
1434                         s->enabled = m;
1435                         break;
1436
1437                 case SOURCE_EXIT:
1438                         s->enabled = m;
1439                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1440                         break;
1441
1442                 case SOURCE_DEFER:
1443                 case SOURCE_POST:
1444                         s->enabled = m;
1445                         break;
1446
1447                 default:
1448                         assert_not_reached("Wut? I shouldn't exist.");
1449                 }
1450         }
1451
1452         if (s->pending)
1453                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1454
1455         if (s->prepare)
1456                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1457
1458         return 0;
1459 }
1460
1461 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1462         assert_return(s, -EINVAL);
1463         assert_return(usec, -EINVAL);
1464         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1465         assert_return(!event_pid_changed(s->event), -ECHILD);
1466
1467         *usec = s->time.next;
1468         return 0;
1469 }
1470
1471 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1472         struct clock_data *d;
1473
1474         assert_return(s, -EINVAL);
1475         assert_return(usec != (uint64_t) -1, -EINVAL);
1476         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1477         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1478         assert_return(!event_pid_changed(s->event), -ECHILD);
1479
1480         s->time.next = usec;
1481
1482         source_set_pending(s, false);
1483
1484         d = event_get_clock_data(s->event, s->type);
1485         assert(d);
1486
1487         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1488         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1489
1490         return 0;
1491 }
1492
1493 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1494         assert_return(s, -EINVAL);
1495         assert_return(usec, -EINVAL);
1496         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1497         assert_return(!event_pid_changed(s->event), -ECHILD);
1498
1499         *usec = s->time.accuracy;
1500         return 0;
1501 }
1502
1503 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1504         struct clock_data *d;
1505
1506         assert_return(s, -EINVAL);
1507         assert_return(usec != (uint64_t) -1, -EINVAL);
1508         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1509         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1510         assert_return(!event_pid_changed(s->event), -ECHILD);
1511
1512         if (usec == 0)
1513                 usec = DEFAULT_ACCURACY_USEC;
1514
1515         s->time.accuracy = usec;
1516
1517         source_set_pending(s, false);
1518
1519         d = event_get_clock_data(s->event, s->type);
1520         assert(d);
1521
1522         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1523
1524         return 0;
1525 }
1526
1527 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1528         assert_return(s, -EINVAL);
1529         assert_return(clock, -EINVAL);
1530         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1531         assert_return(!event_pid_changed(s->event), -ECHILD);
1532
1533         *clock = event_source_type_to_clock(s->type);
1534         return 0;
1535 }
1536
1537 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1538         assert_return(s, -EINVAL);
1539         assert_return(pid, -EINVAL);
1540         assert_return(s->type == SOURCE_CHILD, -EDOM);
1541         assert_return(!event_pid_changed(s->event), -ECHILD);
1542
1543         *pid = s->child.pid;
1544         return 0;
1545 }
1546
1547 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1548         int r;
1549
1550         assert_return(s, -EINVAL);
1551         assert_return(s->type != SOURCE_EXIT, -EDOM);
1552         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1553         assert_return(!event_pid_changed(s->event), -ECHILD);
1554
1555         if (s->prepare == callback)
1556                 return 0;
1557
1558         if (callback && s->prepare) {
1559                 s->prepare = callback;
1560                 return 0;
1561         }
1562
1563         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1564         if (r < 0)
1565                 return r;
1566
1567         s->prepare = callback;
1568
1569         if (callback) {
1570                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1571                 if (r < 0)
1572                         return r;
1573         } else
1574                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1575
1576         return 0;
1577 }
1578
1579 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1580         assert_return(s, NULL);
1581
1582         return s->userdata;
1583 }
1584
1585 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1586         void *ret;
1587
1588         assert_return(s, NULL);
1589
1590         ret = s->userdata;
1591         s->userdata = userdata;
1592
1593         return ret;
1594 }
1595
1596 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1597         usec_t c;
1598         assert(e);
1599         assert(a <= b);
1600
1601         if (a <= 0)
1602                 return 0;
1603
1604         if (b <= a + 1)
1605                 return a;
1606
1607         initialize_perturb(e);
1608
1609         /*
1610           Find a good time to wake up again between times a and b. We
1611           have two goals here:
1612
1613           a) We want to wake up as seldom as possible, hence prefer
1614              later times over earlier times.
1615
1616           b) But if we have to wake up, then let's make sure to
1617              dispatch as much as possible on the entire system.
1618
1619           We implement this by waking up everywhere at the same time
1620           within any given minute if we can, synchronised via the
1621           perturbation value determined from the boot ID. If we can't,
1622           then we try to find the same spot in every 10s, then 1s and
1623           then 250ms step. Otherwise, we pick the last possible time
1624           to wake up.
1625         */
1626
1627         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1628         if (c >= b) {
1629                 if (_unlikely_(c < USEC_PER_MINUTE))
1630                         return b;
1631
1632                 c -= USEC_PER_MINUTE;
1633         }
1634
1635         if (c >= a)
1636                 return c;
1637
1638         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1639         if (c >= b) {
1640                 if (_unlikely_(c < USEC_PER_SEC*10))
1641                         return b;
1642
1643                 c -= USEC_PER_SEC*10;
1644         }
1645
1646         if (c >= a)
1647                 return c;
1648
1649         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1650         if (c >= b) {
1651                 if (_unlikely_(c < USEC_PER_SEC))
1652                         return b;
1653
1654                 c -= USEC_PER_SEC;
1655         }
1656
1657         if (c >= a)
1658                 return c;
1659
1660         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1661         if (c >= b) {
1662                 if (_unlikely_(c < USEC_PER_MSEC*250))
1663                         return b;
1664
1665                 c -= USEC_PER_MSEC*250;
1666         }
1667
1668         if (c >= a)
1669                 return c;
1670
1671         return b;
1672 }
1673
1674 static int event_arm_timer(
1675                 sd_event *e,
1676                 struct clock_data *d) {
1677
1678         struct itimerspec its = {};
1679         sd_event_source *a, *b;
1680         usec_t t;
1681         int r;
1682
1683         assert(e);
1684         assert(d);
1685
1686         a = prioq_peek(d->earliest);
1687         if (!a || a->enabled == SD_EVENT_OFF) {
1688
1689                 if (d->fd < 0)
1690                         return 0;
1691
1692                 if (d->next == (usec_t) -1)
1693                         return 0;
1694
1695                 /* disarm */
1696                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1697                 if (r < 0)
1698                         return r;
1699
1700                 d->next = (usec_t) -1;
1701                 return 0;
1702         }
1703
1704         b = prioq_peek(d->latest);
1705         assert_se(b && b->enabled != SD_EVENT_OFF);
1706
1707         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1708         if (d->next == t)
1709                 return 0;
1710
1711         assert_se(d->fd >= 0);
1712
1713         if (t == 0) {
1714                 /* We don' want to disarm here, just mean some time looooong ago. */
1715                 its.it_value.tv_sec = 0;
1716                 its.it_value.tv_nsec = 1;
1717         } else
1718                 timespec_store(&its.it_value, t);
1719
1720         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1721         if (r < 0)
1722                 return -errno;
1723
1724         d->next = t;
1725         return 0;
1726 }
1727
1728 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1729         assert(e);
1730         assert(s);
1731         assert(s->type == SOURCE_IO);
1732
1733         /* If the event source was already pending, we just OR in the
1734          * new revents, otherwise we reset the value. The ORing is
1735          * necessary to handle EPOLLONESHOT events properly where
1736          * readability might happen independently of writability, and
1737          * we need to keep track of both */
1738
1739         if (s->pending)
1740                 s->io.revents |= revents;
1741         else
1742                 s->io.revents = revents;
1743
1744         return source_set_pending(s, true);
1745 }
1746
1747 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1748         uint64_t x;
1749         ssize_t ss;
1750
1751         assert(e);
1752         assert(fd >= 0);
1753
1754         assert_return(events == EPOLLIN, -EIO);
1755
1756         ss = read(fd, &x, sizeof(x));
1757         if (ss < 0) {
1758                 if (errno == EAGAIN || errno == EINTR)
1759                         return 0;
1760
1761                 return -errno;
1762         }
1763
1764         if (_unlikely_(ss != sizeof(x)))
1765                 return -EIO;
1766
1767         if (next)
1768                 *next = (usec_t) -1;
1769
1770         return 0;
1771 }
1772
1773 static int process_timer(
1774                 sd_event *e,
1775                 usec_t n,
1776                 struct clock_data *d) {
1777
1778         sd_event_source *s;
1779         int r;
1780
1781         assert(e);
1782         assert(d);
1783
1784         for (;;) {
1785                 s = prioq_peek(d->earliest);
1786                 if (!s ||
1787                     s->time.next > n ||
1788                     s->enabled == SD_EVENT_OFF ||
1789                     s->pending)
1790                         break;
1791
1792                 r = source_set_pending(s, true);
1793                 if (r < 0)
1794                         return r;
1795
1796                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1797                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1798         }
1799
1800         return 0;
1801 }
1802
1803 static int process_child(sd_event *e) {
1804         sd_event_source *s;
1805         Iterator i;
1806         int r;
1807
1808         assert(e);
1809
1810         e->need_process_child = false;
1811
1812         /*
1813            So, this is ugly. We iteratively invoke waitid() with P_PID
1814            + WNOHANG for each PID we wait for, instead of using
1815            P_ALL. This is because we only want to get child
1816            information of very specific child processes, and not all
1817            of them. We might not have processed the SIGCHLD even of a
1818            previous invocation and we don't want to maintain a
1819            unbounded *per-child* event queue, hence we really don't
1820            want anything flushed out of the kernel's queue that we
1821            don't care about. Since this is O(n) this means that if you
1822            have a lot of processes you probably want to handle SIGCHLD
1823            yourself.
1824
1825            We do not reap the children here (by using WNOWAIT), this
1826            is only done after the event source is dispatched so that
1827            the callback still sees the process as a zombie.
1828         */
1829
1830         HASHMAP_FOREACH(s, e->child_sources, i) {
1831                 assert(s->type == SOURCE_CHILD);
1832
1833                 if (s->pending)
1834                         continue;
1835
1836                 if (s->enabled == SD_EVENT_OFF)
1837                         continue;
1838
1839                 zero(s->child.siginfo);
1840                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1841                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1842                 if (r < 0)
1843                         return -errno;
1844
1845                 if (s->child.siginfo.si_pid != 0) {
1846                         bool zombie =
1847                                 s->child.siginfo.si_code == CLD_EXITED ||
1848                                 s->child.siginfo.si_code == CLD_KILLED ||
1849                                 s->child.siginfo.si_code == CLD_DUMPED;
1850
1851                         if (!zombie && (s->child.options & WEXITED)) {
1852                                 /* If the child isn't dead then let's
1853                                  * immediately remove the state change
1854                                  * from the queue, since there's no
1855                                  * benefit in leaving it queued */
1856
1857                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1858                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1859                         }
1860
1861                         r = source_set_pending(s, true);
1862                         if (r < 0)
1863                                 return r;
1864                 }
1865         }
1866
1867         return 0;
1868 }
1869
1870 static int process_signal(sd_event *e, uint32_t events) {
1871         bool read_one = false;
1872         int r;
1873
1874         assert(e);
1875         assert(e->signal_sources);
1876
1877         assert_return(events == EPOLLIN, -EIO);
1878
1879         for (;;) {
1880                 struct signalfd_siginfo si;
1881                 ssize_t ss;
1882                 sd_event_source *s;
1883
1884                 ss = read(e->signal_fd, &si, sizeof(si));
1885                 if (ss < 0) {
1886                         if (errno == EAGAIN || errno == EINTR)
1887                                 return read_one;
1888
1889                         return -errno;
1890                 }
1891
1892                 if (_unlikely_(ss != sizeof(si)))
1893                         return -EIO;
1894
1895                 read_one = true;
1896
1897                 s = e->signal_sources[si.ssi_signo];
1898                 if (si.ssi_signo == SIGCHLD) {
1899                         r = process_child(e);
1900                         if (r < 0)
1901                                 return r;
1902                         if (r > 0 || !s)
1903                                 continue;
1904                 } else
1905                         if (!s)
1906                                 return -EIO;
1907
1908                 s->signal.siginfo = si;
1909                 r = source_set_pending(s, true);
1910                 if (r < 0)
1911                         return r;
1912         }
1913 }
1914
1915 static int source_dispatch(sd_event_source *s) {
1916         int r = 0;
1917
1918         assert(s);
1919         assert(s->pending || s->type == SOURCE_EXIT);
1920
1921         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1922                 r = source_set_pending(s, false);
1923                 if (r < 0)
1924                         return r;
1925         }
1926
1927         if (s->type != SOURCE_POST) {
1928                 sd_event_source *z;
1929                 Iterator i;
1930
1931                 /* If we execute a non-post source, let's mark all
1932                  * post sources as pending */
1933
1934                 SET_FOREACH(z, s->event->post_sources, i) {
1935                         if (z->enabled == SD_EVENT_OFF)
1936                                 continue;
1937
1938                         r = source_set_pending(z, true);
1939                         if (r < 0)
1940                                 return r;
1941                 }
1942         }
1943
1944         if (s->enabled == SD_EVENT_ONESHOT) {
1945                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1946                 if (r < 0)
1947                         return r;
1948         }
1949
1950         s->dispatching = true;
1951
1952         switch (s->type) {
1953
1954         case SOURCE_IO:
1955                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1956                 break;
1957
1958         case SOURCE_TIME_REALTIME:
1959         case SOURCE_TIME_MONOTONIC:
1960         case SOURCE_TIME_REALTIME_ALARM:
1961         case SOURCE_TIME_BOOTTIME_ALARM:
1962                 r = s->time.callback(s, s->time.next, s->userdata);
1963                 break;
1964
1965         case SOURCE_SIGNAL:
1966                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1967                 break;
1968
1969         case SOURCE_CHILD: {
1970                 bool zombie;
1971
1972                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1973                          s->child.siginfo.si_code == CLD_KILLED ||
1974                          s->child.siginfo.si_code == CLD_DUMPED;
1975
1976                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1977
1978                 /* Now, reap the PID for good. */
1979                 if (zombie)
1980                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1981
1982                 break;
1983         }
1984
1985         case SOURCE_DEFER:
1986                 r = s->defer.callback(s, s->userdata);
1987                 break;
1988
1989         case SOURCE_POST:
1990                 r = s->post.callback(s, s->userdata);
1991                 break;
1992
1993         case SOURCE_EXIT:
1994                 r = s->exit.callback(s, s->userdata);
1995                 break;
1996
1997         case SOURCE_WATCHDOG:
1998         case _SOUFCE_EVENT_SOURCE_TYPE_MAX:
1999         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2000                 assert_not_reached("Wut? I shouldn't exist.");
2001         }
2002
2003         s->dispatching = false;
2004
2005         if (r < 0)
2006                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2007
2008         if (s->n_ref == 0)
2009                 source_free(s);
2010         else if (r < 0)
2011                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2012
2013         return 1;
2014 }
2015
2016 static int event_prepare(sd_event *e) {
2017         int r;
2018
2019         assert(e);
2020
2021         for (;;) {
2022                 sd_event_source *s;
2023
2024                 s = prioq_peek(e->prepare);
2025                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2026                         break;
2027
2028                 s->prepare_iteration = e->iteration;
2029                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2030                 if (r < 0)
2031                         return r;
2032
2033                 assert(s->prepare);
2034
2035                 s->dispatching = true;
2036                 r = s->prepare(s, s->userdata);
2037                 s->dispatching = false;
2038
2039                 if (r < 0)
2040                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2041
2042                 if (s->n_ref == 0)
2043                         source_free(s);
2044                 else if (r < 0)
2045                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2046         }
2047
2048         return 0;
2049 }
2050
2051 static int dispatch_exit(sd_event *e) {
2052         sd_event_source *p;
2053         int r;
2054
2055         assert(e);
2056
2057         p = prioq_peek(e->exit);
2058         if (!p || p->enabled == SD_EVENT_OFF) {
2059                 e->state = SD_EVENT_FINISHED;
2060                 return 0;
2061         }
2062
2063         sd_event_ref(e);
2064         e->iteration++;
2065         e->state = SD_EVENT_EXITING;
2066
2067         r = source_dispatch(p);
2068
2069         e->state = SD_EVENT_PASSIVE;
2070         sd_event_unref(e);
2071
2072         return r;
2073 }
2074
2075 static sd_event_source* event_next_pending(sd_event *e) {
2076         sd_event_source *p;
2077
2078         assert(e);
2079
2080         p = prioq_peek(e->pending);
2081         if (!p)
2082                 return NULL;
2083
2084         if (p->enabled == SD_EVENT_OFF)
2085                 return NULL;
2086
2087         return p;
2088 }
2089
2090 static int arm_watchdog(sd_event *e) {
2091         struct itimerspec its = {};
2092         usec_t t;
2093         int r;
2094
2095         assert(e);
2096         assert(e->watchdog_fd >= 0);
2097
2098         t = sleep_between(e,
2099                           e->watchdog_last + (e->watchdog_period / 2),
2100                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2101
2102         timespec_store(&its.it_value, t);
2103
2104         /* Make sure we never set the watchdog to 0, which tells the
2105          * kernel to disable it. */
2106         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2107                 its.it_value.tv_nsec = 1;
2108
2109         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2110         if (r < 0)
2111                 return -errno;
2112
2113         return 0;
2114 }
2115
2116 static int process_watchdog(sd_event *e) {
2117         assert(e);
2118
2119         if (!e->watchdog)
2120                 return 0;
2121
2122         /* Don't notify watchdog too often */
2123         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2124                 return 0;
2125
2126         sd_notify(false, "WATCHDOG=1");
2127         e->watchdog_last = e->timestamp.monotonic;
2128
2129         return arm_watchdog(e);
2130 }
2131
2132 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2133         struct epoll_event *ev_queue;
2134         unsigned ev_queue_max;
2135         sd_event_source *p;
2136         int r, i, m;
2137
2138         assert_return(e, -EINVAL);
2139         assert_return(!event_pid_changed(e), -ECHILD);
2140         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2141         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2142
2143         if (e->exit_requested)
2144                 return dispatch_exit(e);
2145
2146         sd_event_ref(e);
2147         e->iteration++;
2148         e->state = SD_EVENT_RUNNING;
2149
2150         r = event_prepare(e);
2151         if (r < 0)
2152                 goto finish;
2153
2154         r = event_arm_timer(e, &e->realtime);
2155         if (r < 0)
2156                 goto finish;
2157
2158         r = event_arm_timer(e, &e->monotonic);
2159         if (r < 0)
2160                 goto finish;
2161
2162         r = event_arm_timer(e, &e->realtime_alarm);
2163         if (r < 0)
2164                 goto finish;
2165
2166         r = event_arm_timer(e, &e->boottime_alarm);
2167         if (r < 0)
2168                 goto finish;
2169
2170         if (event_next_pending(e) || e->need_process_child)
2171                 timeout = 0;
2172
2173         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2174         ev_queue = newa(struct epoll_event, ev_queue_max);
2175
2176         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2177                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2178         if (m < 0) {
2179                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2180                 goto finish;
2181         }
2182
2183         dual_timestamp_get(&e->timestamp);
2184         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2185
2186         for (i = 0; i < m; i++) {
2187
2188                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2189                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2190                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2191                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2192                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2193                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2194                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2195                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2196                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2197                         r = process_signal(e, ev_queue[i].events);
2198                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2199                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2200                 else
2201                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2202
2203                 if (r < 0)
2204                         goto finish;
2205         }
2206
2207         r = process_watchdog(e);
2208         if (r < 0)
2209                 goto finish;
2210
2211         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2212         if (r < 0)
2213                 goto finish;
2214
2215         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2216         if (r < 0)
2217                 goto finish;
2218
2219         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2220         if (r < 0)
2221                 goto finish;
2222
2223         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2224         if (r < 0)
2225                 goto finish;
2226
2227         if (e->need_process_child) {
2228                 r = process_child(e);
2229                 if (r < 0)
2230                         goto finish;
2231         }
2232
2233         p = event_next_pending(e);
2234         if (!p) {
2235                 r = 1;
2236                 goto finish;
2237         }
2238
2239         r = source_dispatch(p);
2240
2241 finish:
2242         e->state = SD_EVENT_PASSIVE;
2243         sd_event_unref(e);
2244
2245         return r;
2246 }
2247
2248 _public_ int sd_event_loop(sd_event *e) {
2249         int r;
2250
2251         assert_return(e, -EINVAL);
2252         assert_return(!event_pid_changed(e), -ECHILD);
2253         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2254
2255         sd_event_ref(e);
2256
2257         while (e->state != SD_EVENT_FINISHED) {
2258                 r = sd_event_run(e, (uint64_t) -1);
2259                 if (r < 0)
2260                         goto finish;
2261         }
2262
2263         r = e->exit_code;
2264
2265 finish:
2266         sd_event_unref(e);
2267         return r;
2268 }
2269
2270 _public_ int sd_event_get_state(sd_event *e) {
2271         assert_return(e, -EINVAL);
2272         assert_return(!event_pid_changed(e), -ECHILD);
2273
2274         return e->state;
2275 }
2276
2277 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2278         assert_return(e, -EINVAL);
2279         assert_return(code, -EINVAL);
2280         assert_return(!event_pid_changed(e), -ECHILD);
2281
2282         if (!e->exit_requested)
2283                 return -ENODATA;
2284
2285         *code = e->exit_code;
2286         return 0;
2287 }
2288
2289 _public_ int sd_event_exit(sd_event *e, int code) {
2290         assert_return(e, -EINVAL);
2291         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2292         assert_return(!event_pid_changed(e), -ECHILD);
2293
2294         e->exit_requested = true;
2295         e->exit_code = code;
2296
2297         return 0;
2298 }
2299
2300 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2301         assert_return(e, -EINVAL);
2302         assert_return(usec, -EINVAL);
2303         assert_return(!event_pid_changed(e), -ECHILD);
2304
2305         /* If we haven't run yet, just get the actual time */
2306         if (!dual_timestamp_is_set(&e->timestamp))
2307                 return -ENODATA;
2308
2309         switch (clock) {
2310
2311         case CLOCK_REALTIME:
2312         case CLOCK_REALTIME_ALARM:
2313                 *usec = e->timestamp.realtime;
2314                 break;
2315
2316         case CLOCK_MONOTONIC:
2317                 *usec = e->timestamp.monotonic;
2318                 break;
2319
2320         case CLOCK_BOOTTIME_ALARM:
2321                 *usec = e->timestamp_boottime;
2322                 break;
2323         }
2324
2325         return 0;
2326 }
2327
2328 _public_ int sd_event_default(sd_event **ret) {
2329
2330         static thread_local sd_event *default_event = NULL;
2331         sd_event *e = NULL;
2332         int r;
2333
2334         if (!ret)
2335                 return !!default_event;
2336
2337         if (default_event) {
2338                 *ret = sd_event_ref(default_event);
2339                 return 0;
2340         }
2341
2342         r = sd_event_new(&e);
2343         if (r < 0)
2344                 return r;
2345
2346         e->default_event_ptr = &default_event;
2347         e->tid = gettid();
2348         default_event = e;
2349
2350         *ret = e;
2351         return 1;
2352 }
2353
2354 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2355         assert_return(e, -EINVAL);
2356         assert_return(tid, -EINVAL);
2357         assert_return(!event_pid_changed(e), -ECHILD);
2358
2359         if (e->tid != 0) {
2360                 *tid = e->tid;
2361                 return 0;
2362         }
2363
2364         return -ENXIO;
2365 }
2366
2367 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2368         int r;
2369
2370         assert_return(e, -EINVAL);
2371         assert_return(!event_pid_changed(e), -ECHILD);
2372
2373         if (e->watchdog == !!b)
2374                 return e->watchdog;
2375
2376         if (b) {
2377                 struct epoll_event ev = {};
2378
2379                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2380                 if (r <= 0)
2381                         return r;
2382
2383                 /* Issue first ping immediately */
2384                 sd_notify(false, "WATCHDOG=1");
2385                 e->watchdog_last = now(CLOCK_MONOTONIC);
2386
2387                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2388                 if (e->watchdog_fd < 0)
2389                         return -errno;
2390
2391                 r = arm_watchdog(e);
2392                 if (r < 0)
2393                         goto fail;
2394
2395                 ev.events = EPOLLIN;
2396                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2397
2398                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2399                 if (r < 0) {
2400                         r = -errno;
2401                         goto fail;
2402                 }
2403
2404         } else {
2405                 if (e->watchdog_fd >= 0) {
2406                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2407                         e->watchdog_fd = safe_close(e->watchdog_fd);
2408                 }
2409         }
2410
2411         e->watchdog = !!b;
2412         return e->watchdog;
2413
2414 fail:
2415         e->watchdog_fd = safe_close(e->watchdog_fd);
2416         return r;
2417 }
2418
2419 _public_ int sd_event_get_watchdog(sd_event *e) {
2420         assert_return(e, -EINVAL);
2421         assert_return(!event_pid_changed(e), -ECHILD);
2422
2423         return e->watchdog;
2424 }