chiark / gitweb /
silence warnings
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36
37 #include "sd-event.h"
38
39 #define EPOLL_QUEUE_MAX 512U
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_MONOTONIC,
46         SOURCE_TIME_REALTIME_ALARM,
47         SOURCE_TIME_BOOTTIME_ALARM,
48         SOURCE_SIGNAL,
49         SOURCE_CHILD,
50         SOURCE_DEFER,
51         SOURCE_POST,
52         SOURCE_EXIT,
53         SOURCE_WATCHDOG,
54         _SOUFCE_EVENT_SOURCE_TYPE_MAX,
55         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
56 } EventSourceType;
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event_source {
61         unsigned n_ref;
62
63         sd_event *event;
64         void *userdata;
65         sd_event_handler_t prepare;
66
67         EventSourceType type:5;
68         int enabled:3;
69         bool pending:1;
70         bool dispatching:1;
71
72         int64_t priority;
73         unsigned pending_index;
74         unsigned prepare_index;
75         unsigned pending_iteration;
76         unsigned prepare_iteration;
77
78         union {
79                 struct {
80                         sd_event_io_handler_t callback;
81                         int fd;
82                         uint32_t events;
83                         uint32_t revents;
84                         bool registered:1;
85                 } io;
86                 struct {
87                         sd_event_time_handler_t callback;
88                         usec_t next, accuracy;
89                         unsigned earliest_index;
90                         unsigned latest_index;
91                 } time;
92                 struct {
93                         sd_event_signal_handler_t callback;
94                         struct signalfd_siginfo siginfo;
95                         int sig;
96                 } signal;
97                 struct {
98                         sd_event_child_handler_t callback;
99                         siginfo_t siginfo;
100                         pid_t pid;
101                         int options;
102                 } child;
103                 struct {
104                         sd_event_handler_t callback;
105                 } defer;
106                 struct {
107                         sd_event_handler_t callback;
108                 } post;
109                 struct {
110                         sd_event_handler_t callback;
111                         unsigned prioq_index;
112                 } exit;
113         };
114 };
115
116 struct clock_data {
117         int fd;
118
119         /* For all clocks we maintain two priority queues each, one
120          * ordered for the earliest times the events may be
121          * dispatched, and one ordered by the latest times they must
122          * have been dispatched. The range between the top entries in
123          * the two prioqs is the time window we can freely schedule
124          * wakeups in */
125
126         Prioq *earliest;
127         Prioq *latest;
128         usec_t next;
129 };
130
131 struct sd_event {
132         unsigned n_ref;
133
134         int epoll_fd;
135         int signal_fd;
136         int watchdog_fd;
137
138         Prioq *pending;
139         Prioq *prepare;
140
141         /* timerfd_create() only supports these four clocks so far. We
142          * can add support for more clocks when the kernel learns to
143          * deal with them, too. */
144         struct clock_data realtime;
145         struct clock_data monotonic;
146         struct clock_data realtime_alarm;
147         struct clock_data boottime_alarm;
148
149         usec_t perturb;
150
151         sigset_t sigset;
152         sd_event_source **signal_sources;
153
154         Hashmap *child_sources;
155         unsigned n_enabled_child_sources;
156
157         Set *post_sources;
158
159         Prioq *exit;
160
161         pid_t original_pid;
162
163         unsigned iteration;
164         dual_timestamp timestamp;
165         usec_t timestamp_boottime;
166         int state;
167
168         bool exit_requested:1;
169         bool need_process_child:1;
170         bool watchdog:1;
171
172         int exit_code;
173
174         pid_t tid;
175         sd_event **default_event_ptr;
176
177         usec_t watchdog_last, watchdog_period;
178
179         unsigned n_sources;
180 };
181
182 static int pending_prioq_compare(const void *a, const void *b) {
183         const sd_event_source *x = a, *y = b;
184
185         assert(x->pending);
186         assert(y->pending);
187
188         /* Enabled ones first */
189         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190                 return -1;
191         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192                 return 1;
193
194         /* Lower priority values first */
195         if (x->priority < y->priority)
196                 return -1;
197         if (x->priority > y->priority)
198                 return 1;
199
200         /* Older entries first */
201         if (x->pending_iteration < y->pending_iteration)
202                 return -1;
203         if (x->pending_iteration > y->pending_iteration)
204                 return 1;
205
206         /* Stability for the rest */
207         if (x < y)
208                 return -1;
209         if (x > y)
210                 return 1;
211
212         return 0;
213 }
214
215 static int prepare_prioq_compare(const void *a, const void *b) {
216         const sd_event_source *x = a, *y = b;
217
218         assert(x->prepare);
219         assert(y->prepare);
220
221         /* Move most recently prepared ones last, so that we can stop
222          * preparing as soon as we hit one that has already been
223          * prepared in the current iteration */
224         if (x->prepare_iteration < y->prepare_iteration)
225                 return -1;
226         if (x->prepare_iteration > y->prepare_iteration)
227                 return 1;
228
229         /* Enabled ones first */
230         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
231                 return -1;
232         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233                 return 1;
234
235         /* Lower priority values first */
236         if (x->priority < y->priority)
237                 return -1;
238         if (x->priority > y->priority)
239                 return 1;
240
241         /* Stability for the rest */
242         if (x < y)
243                 return -1;
244         if (x > y)
245                 return 1;
246
247         return 0;
248 }
249
250 static int earliest_time_prioq_compare(const void *a, const void *b) {
251         const sd_event_source *x = a, *y = b;
252
253         assert(EVENT_SOURCE_IS_TIME(x->type));
254         assert(x->type == y->type);
255
256         /* Enabled ones first */
257         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
258                 return -1;
259         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260                 return 1;
261
262         /* Move the pending ones to the end */
263         if (!x->pending && y->pending)
264                 return -1;
265         if (x->pending && !y->pending)
266                 return 1;
267
268         /* Order by time */
269         if (x->time.next < y->time.next)
270                 return -1;
271         if (x->time.next > y->time.next)
272                 return 1;
273
274         /* Stability for the rest */
275         if (x < y)
276                 return -1;
277         if (x > y)
278                 return 1;
279
280         return 0;
281 }
282
283 static int latest_time_prioq_compare(const void *a, const void *b) {
284         const sd_event_source *x = a, *y = b;
285
286         assert(EVENT_SOURCE_IS_TIME(x->type));
287         assert(x->type == y->type);
288
289         /* Enabled ones first */
290         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
291                 return -1;
292         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293                 return 1;
294
295         /* Move the pending ones to the end */
296         if (!x->pending && y->pending)
297                 return -1;
298         if (x->pending && !y->pending)
299                 return 1;
300
301         /* Order by time */
302         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
303                 return -1;
304         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
305                 return 1;
306
307         /* Stability for the rest */
308         if (x < y)
309                 return -1;
310         if (x > y)
311                 return 1;
312
313         return 0;
314 }
315
316 static int exit_prioq_compare(const void *a, const void *b) {
317         const sd_event_source *x = a, *y = b;
318
319         assert(x->type == SOURCE_EXIT);
320         assert(y->type == SOURCE_EXIT);
321
322         /* Enabled ones first */
323         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
324                 return -1;
325         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
326                 return 1;
327
328         /* Lower priority values first */
329         if (x->priority < y->priority)
330                 return -1;
331         if (x->priority > y->priority)
332                 return 1;
333
334         /* Stability for the rest */
335         if (x < y)
336                 return -1;
337         if (x > y)
338                 return 1;
339
340         return 0;
341 }
342
343 static void free_clock_data(struct clock_data *d) {
344         assert(d);
345
346         safe_close(d->fd);
347         prioq_free(d->earliest);
348         prioq_free(d->latest);
349 }
350
351 static void event_free(sd_event *e) {
352         assert(e);
353         assert(e->n_sources == 0);
354
355         if (e->default_event_ptr)
356                 *(e->default_event_ptr) = NULL;
357
358         safe_close(e->epoll_fd);
359         safe_close(e->signal_fd);
360         safe_close(e->watchdog_fd);
361
362         free_clock_data(&e->realtime);
363         free_clock_data(&e->monotonic);
364         free_clock_data(&e->realtime_alarm);
365         free_clock_data(&e->boottime_alarm);
366
367         prioq_free(e->pending);
368         prioq_free(e->prepare);
369         prioq_free(e->exit);
370
371         free(e->signal_sources);
372
373         hashmap_free(e->child_sources);
374         set_free(e->post_sources);
375         free(e);
376 }
377
378 _public_ int sd_event_new(sd_event** ret) {
379         sd_event *e;
380         int r;
381
382         assert_return(ret, -EINVAL);
383
384         e = new0(sd_event, 1);
385         if (!e)
386                 return -ENOMEM;
387
388         e->n_ref = 1;
389         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
390         e->realtime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = (usec_t) -1;
391         e->original_pid = getpid();
392         e->perturb = (usec_t) -1;
393
394         assert_se(sigemptyset(&e->sigset) == 0);
395
396         e->pending = prioq_new(pending_prioq_compare);
397         if (!e->pending) {
398                 r = -ENOMEM;
399                 goto fail;
400         }
401
402         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
403         if (e->epoll_fd < 0) {
404                 r = -errno;
405                 goto fail;
406         }
407
408         *ret = e;
409         return 0;
410
411 fail:
412         event_free(e);
413         return r;
414 }
415
416 _public_ sd_event* sd_event_ref(sd_event *e) {
417         assert_return(e, NULL);
418
419         assert(e->n_ref >= 1);
420         e->n_ref++;
421
422         return e;
423 }
424
425 _public_ sd_event* sd_event_unref(sd_event *e) {
426
427         if (!e)
428                 return NULL;
429
430         assert(e->n_ref >= 1);
431         e->n_ref--;
432
433         if (e->n_ref <= 0)
434                 event_free(e);
435
436         return NULL;
437 }
438
439 static bool event_pid_changed(sd_event *e) {
440         assert(e);
441
442         /* We don't support people creating am event loop and keeping
443          * it around over a fork(). Let's complain. */
444
445         return e->original_pid != getpid();
446 }
447
448 static int source_io_unregister(sd_event_source *s) {
449         int r;
450
451         assert(s);
452         assert(s->type == SOURCE_IO);
453
454         if (!s->io.registered)
455                 return 0;
456
457         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
458         if (r < 0)
459                 return -errno;
460
461         s->io.registered = false;
462         return 0;
463 }
464
465 static int source_io_register(
466                 sd_event_source *s,
467                 int enabled,
468                 uint32_t events) {
469
470         struct epoll_event ev = {};
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475         assert(enabled != SD_EVENT_OFF);
476
477         ev.events = events;
478         ev.data.ptr = s;
479
480         if (enabled == SD_EVENT_ONESHOT)
481                 ev.events |= EPOLLONESHOT;
482
483         if (s->io.registered)
484                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
485         else
486                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
487
488         if (r < 0)
489                 return -errno;
490
491         s->io.registered = true;
492
493         return 0;
494 }
495
496 static clockid_t event_source_type_to_clock(EventSourceType t) {
497
498         switch (t) {
499
500         case SOURCE_TIME_REALTIME:
501                 return CLOCK_REALTIME;
502
503         case SOURCE_TIME_MONOTONIC:
504                 return CLOCK_MONOTONIC;
505
506         case SOURCE_TIME_REALTIME_ALARM:
507                 return CLOCK_REALTIME_ALARM;
508
509         case SOURCE_TIME_BOOTTIME_ALARM:
510                 return CLOCK_BOOTTIME_ALARM;
511
512         default:
513                 return (clockid_t) -1;
514         }
515 }
516
517 static EventSourceType clock_to_event_source_type(clockid_t clock) {
518
519         switch (clock) {
520
521         case CLOCK_REALTIME:
522                 return SOURCE_TIME_REALTIME;
523
524         case CLOCK_MONOTONIC:
525                 return SOURCE_TIME_MONOTONIC;
526
527         case CLOCK_REALTIME_ALARM:
528                 return SOURCE_TIME_REALTIME_ALARM;
529
530         case CLOCK_BOOTTIME_ALARM:
531                 return SOURCE_TIME_BOOTTIME_ALARM;
532
533         default:
534                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
535         }
536 }
537
538 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
539         assert(e);
540
541         switch (t) {
542
543         case SOURCE_TIME_REALTIME:
544                 return &e->realtime;
545
546         case SOURCE_TIME_MONOTONIC:
547                 return &e->monotonic;
548
549         case SOURCE_TIME_REALTIME_ALARM:
550                 return &e->realtime_alarm;
551
552         case SOURCE_TIME_BOOTTIME_ALARM:
553                 return &e->boottime_alarm;
554
555         default:
556                 return NULL;
557         }
558 }
559
560 static void source_free(sd_event_source *s) {
561         assert(s);
562
563         if (s->event) {
564                 assert(s->event->n_sources > 0);
565
566                 switch (s->type) {
567
568                 case SOURCE_IO:
569                         if (s->io.fd >= 0)
570                                 source_io_unregister(s);
571
572                         break;
573
574                 case SOURCE_TIME_REALTIME:
575                 case SOURCE_TIME_MONOTONIC:
576                 case SOURCE_TIME_REALTIME_ALARM:
577                 case SOURCE_TIME_BOOTTIME_ALARM: {
578                         struct clock_data *d;
579
580                         d = event_get_clock_data(s->event, s->type);
581                         assert(d);
582
583                         prioq_remove(d->earliest, s, &s->time.earliest_index);
584                         prioq_remove(d->latest, s, &s->time.latest_index);
585                         break;
586                 }
587
588                 case SOURCE_SIGNAL:
589                         if (s->signal.sig > 0) {
590                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
591                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
592
593                                 if (s->event->signal_sources)
594                                         s->event->signal_sources[s->signal.sig] = NULL;
595                         }
596
597                         break;
598
599                 case SOURCE_CHILD:
600                         if (s->child.pid > 0) {
601                                 if (s->enabled != SD_EVENT_OFF) {
602                                         assert(s->event->n_enabled_child_sources > 0);
603                                         s->event->n_enabled_child_sources--;
604                                 }
605
606                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
607                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
608
609                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
610                         }
611
612                         break;
613
614                 case SOURCE_DEFER:
615                         /* nothing */
616                         break;
617
618                 case SOURCE_POST:
619                         set_remove(s->event->post_sources, s);
620                         break;
621
622                 case SOURCE_EXIT:
623                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
624                         break;
625
626                 default:
627                         assert_not_reached("Wut? I shouldn't exist.");
628                 }
629
630                 if (s->pending)
631                         prioq_remove(s->event->pending, s, &s->pending_index);
632
633                 if (s->prepare)
634                         prioq_remove(s->event->prepare, s, &s->prepare_index);
635
636                 s->event->n_sources--;
637                 sd_event_unref(s->event);
638         }
639
640         free(s);
641 }
642
643 static int source_set_pending(sd_event_source *s, bool b) {
644         int r;
645
646         assert(s);
647         assert(s->type != SOURCE_EXIT);
648
649         if (s->pending == b)
650                 return 0;
651
652         s->pending = b;
653
654         if (b) {
655                 s->pending_iteration = s->event->iteration;
656
657                 r = prioq_put(s->event->pending, s, &s->pending_index);
658                 if (r < 0) {
659                         s->pending = false;
660                         return r;
661                 }
662         } else
663                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
664
665         if (EVENT_SOURCE_IS_TIME(s->type)) {
666                 struct clock_data *d;
667
668                 d = event_get_clock_data(s->event, s->type);
669                 assert(d);
670
671                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
672                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
673         }
674
675         return 0;
676 }
677
678 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
679         sd_event_source *s;
680
681         assert(e);
682
683         s = new0(sd_event_source, 1);
684         if (!s)
685                 return NULL;
686
687         s->n_ref = 1;
688         s->event = sd_event_ref(e);
689         s->type = type;
690         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
691
692         e->n_sources ++;
693
694         return s;
695 }
696
697 _public_ int sd_event_add_io(
698                 sd_event *e,
699                 sd_event_source **ret,
700                 int fd,
701                 uint32_t events,
702                 sd_event_io_handler_t callback,
703                 void *userdata) {
704
705         sd_event_source *s;
706         int r;
707
708         assert_return(e, -EINVAL);
709         assert_return(fd >= 0, -EINVAL);
710         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
711         assert_return(callback, -EINVAL);
712         assert_return(ret, -EINVAL);
713         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
714         assert_return(!event_pid_changed(e), -ECHILD);
715
716         s = source_new(e, SOURCE_IO);
717         if (!s)
718                 return -ENOMEM;
719
720         s->io.fd = fd;
721         s->io.events = events;
722         s->io.callback = callback;
723         s->userdata = userdata;
724         s->enabled = SD_EVENT_ON;
725
726         r = source_io_register(s, s->enabled, events);
727         if (r < 0) {
728                 source_free(s);
729                 return -errno;
730         }
731
732         *ret = s;
733         return 0;
734 }
735
736 static void initialize_perturb(sd_event *e) {
737         sd_id128_t bootid = {};
738
739         /* When we sleep for longer, we try to realign the wakeup to
740            the same time wihtin each minute/second/250ms, so that
741            events all across the system can be coalesced into a single
742            CPU wakeup. However, let's take some system-specific
743            randomness for this value, so that in a network of systems
744            with synced clocks timer events are distributed a
745            bit. Here, we calculate a perturbation usec offset from the
746            boot ID. */
747
748         if (_likely_(e->perturb != (usec_t) -1))
749                 return;
750
751         if (sd_id128_get_boot(&bootid) >= 0)
752                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
753 }
754
755 static int event_setup_timer_fd(
756                 sd_event *e,
757                 struct clock_data *d,
758                 clockid_t clock) {
759
760         struct epoll_event ev = {};
761         int r, fd;
762
763         assert(e);
764         assert(d);
765
766         if (_likely_(d->fd >= 0))
767                 return 0;
768
769         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
770         if (fd < 0)
771                 return -errno;
772
773         ev.events = EPOLLIN;
774         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
775
776         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
777         if (r < 0) {
778                 safe_close(fd);
779                 return -errno;
780         }
781
782         d->fd = fd;
783         return 0;
784 }
785
786 _public_ int sd_event_add_time(
787                 sd_event *e,
788                 sd_event_source **ret,
789                 clockid_t clock,
790                 uint64_t usec,
791                 uint64_t accuracy,
792                 sd_event_time_handler_t callback,
793                 void *userdata) {
794
795         EventSourceType type;
796         sd_event_source *s;
797         struct clock_data *d;
798         int r;
799
800         assert_return(e, -EINVAL);
801         assert_return(ret, -EINVAL);
802         assert_return(usec != (uint64_t) -1, -EINVAL);
803         assert_return(accuracy != (uint64_t) -1, -EINVAL);
804         assert_return(callback, -EINVAL);
805         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
806         assert_return(!event_pid_changed(e), -ECHILD);
807
808         type = clock_to_event_source_type(clock);
809         assert_return(type >= 0, -ENOTSUP);
810
811         d = event_get_clock_data(e, type);
812         assert(d);
813
814         if (!d->earliest) {
815                 d->earliest = prioq_new(earliest_time_prioq_compare);
816                 if (!d->earliest)
817                         return -ENOMEM;
818         }
819
820         if (!d->latest) {
821                 d->latest = prioq_new(latest_time_prioq_compare);
822                 if (!d->latest)
823                         return -ENOMEM;
824         }
825
826         if (d->fd < 0) {
827                 r = event_setup_timer_fd(e, d, clock);
828                 if (r < 0)
829                         return r;
830         }
831
832         s = source_new(e, type);
833         if (!s)
834                 return -ENOMEM;
835
836         s->time.next = usec;
837         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
838         s->time.callback = callback;
839         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
840         s->userdata = userdata;
841         s->enabled = SD_EVENT_ONESHOT;
842
843         r = prioq_put(d->earliest, s, &s->time.earliest_index);
844         if (r < 0)
845                 goto fail;
846
847         r = prioq_put(d->latest, s, &s->time.latest_index);
848         if (r < 0)
849                 goto fail;
850
851         *ret = s;
852         return 0;
853
854 fail:
855         source_free(s);
856         return r;
857 }
858
859 static int event_update_signal_fd(sd_event *e) {
860         struct epoll_event ev = {};
861         bool add_to_epoll;
862         int r;
863
864         assert(e);
865
866         add_to_epoll = e->signal_fd < 0;
867
868         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
869         if (r < 0)
870                 return -errno;
871
872         e->signal_fd = r;
873
874         if (!add_to_epoll)
875                 return 0;
876
877         ev.events = EPOLLIN;
878         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
879
880         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
881         if (r < 0) {
882                 e->signal_fd = safe_close(e->signal_fd);
883                 return -errno;
884         }
885
886         return 0;
887 }
888
889 _public_ int sd_event_add_signal(
890                 sd_event *e,
891                 sd_event_source **ret,
892                 int sig,
893                 sd_event_signal_handler_t callback,
894                 void *userdata) {
895
896         sd_event_source *s;
897         sigset_t ss;
898         int r;
899
900         assert_return(e, -EINVAL);
901         assert_return(sig > 0, -EINVAL);
902         assert_return(sig < _NSIG, -EINVAL);
903         assert_return(callback, -EINVAL);
904         assert_return(ret, -EINVAL);
905         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
906         assert_return(!event_pid_changed(e), -ECHILD);
907
908         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
909         if (r < 0)
910                 return -errno;
911
912         if (!sigismember(&ss, sig))
913                 return -EBUSY;
914
915         if (!e->signal_sources) {
916                 e->signal_sources = new0(sd_event_source*, _NSIG);
917                 if (!e->signal_sources)
918                         return -ENOMEM;
919         } else if (e->signal_sources[sig])
920                 return -EBUSY;
921
922         s = source_new(e, SOURCE_SIGNAL);
923         if (!s)
924                 return -ENOMEM;
925
926         s->signal.sig = sig;
927         s->signal.callback = callback;
928         s->userdata = userdata;
929         s->enabled = SD_EVENT_ON;
930
931         e->signal_sources[sig] = s;
932         assert_se(sigaddset(&e->sigset, sig) == 0);
933
934         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
935                 r = event_update_signal_fd(e);
936                 if (r < 0) {
937                         source_free(s);
938                         return r;
939                 }
940         }
941
942         *ret = s;
943         return 0;
944 }
945
946 _public_ int sd_event_add_child(
947                 sd_event *e,
948                 sd_event_source **ret,
949                 pid_t pid,
950                 int options,
951                 sd_event_child_handler_t callback,
952                 void *userdata) {
953
954         sd_event_source *s;
955         int r;
956
957         assert_return(e, -EINVAL);
958         assert_return(pid > 1, -EINVAL);
959         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
960         assert_return(options != 0, -EINVAL);
961         assert_return(callback, -EINVAL);
962         assert_return(ret, -EINVAL);
963         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
964         assert_return(!event_pid_changed(e), -ECHILD);
965
966         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
967         if (r < 0)
968                 return r;
969
970         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
971                 return -EBUSY;
972
973         s = source_new(e, SOURCE_CHILD);
974         if (!s)
975                 return -ENOMEM;
976
977         s->child.pid = pid;
978         s->child.options = options;
979         s->child.callback = callback;
980         s->userdata = userdata;
981         s->enabled = SD_EVENT_ONESHOT;
982
983         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
984         if (r < 0) {
985                 source_free(s);
986                 return r;
987         }
988
989         e->n_enabled_child_sources ++;
990
991         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
992
993         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
994                 r = event_update_signal_fd(e);
995                 if (r < 0) {
996                         source_free(s);
997                         return -errno;
998                 }
999         }
1000
1001         e->need_process_child = true;
1002
1003         *ret = s;
1004         return 0;
1005 }
1006
1007 _public_ int sd_event_add_defer(
1008                 sd_event *e,
1009                 sd_event_source **ret,
1010                 sd_event_handler_t callback,
1011                 void *userdata) {
1012
1013         sd_event_source *s;
1014         int r;
1015
1016         assert_return(e, -EINVAL);
1017         assert_return(callback, -EINVAL);
1018         assert_return(ret, -EINVAL);
1019         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1020         assert_return(!event_pid_changed(e), -ECHILD);
1021
1022         s = source_new(e, SOURCE_DEFER);
1023         if (!s)
1024                 return -ENOMEM;
1025
1026         s->defer.callback = callback;
1027         s->userdata = userdata;
1028         s->enabled = SD_EVENT_ONESHOT;
1029
1030         r = source_set_pending(s, true);
1031         if (r < 0) {
1032                 source_free(s);
1033                 return r;
1034         }
1035
1036         *ret = s;
1037         return 0;
1038 }
1039
1040 _public_ int sd_event_add_post(
1041                 sd_event *e,
1042                 sd_event_source **ret,
1043                 sd_event_handler_t callback,
1044                 void *userdata) {
1045
1046         sd_event_source *s;
1047         int r;
1048
1049         assert_return(e, -EINVAL);
1050         assert_return(callback, -EINVAL);
1051         assert_return(ret, -EINVAL);
1052         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1053         assert_return(!event_pid_changed(e), -ECHILD);
1054
1055         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1056         if (r < 0)
1057                 return r;
1058
1059         s = source_new(e, SOURCE_POST);
1060         if (!s)
1061                 return -ENOMEM;
1062
1063         s->post.callback = callback;
1064         s->userdata = userdata;
1065         s->enabled = SD_EVENT_ON;
1066
1067         r = set_put(e->post_sources, s);
1068         if (r < 0) {
1069                 source_free(s);
1070                 return r;
1071         }
1072
1073         *ret = s;
1074         return 0;
1075 }
1076
1077 _public_ int sd_event_add_exit(
1078                 sd_event *e,
1079                 sd_event_source **ret,
1080                 sd_event_handler_t callback,
1081                 void *userdata) {
1082
1083         sd_event_source *s;
1084         int r;
1085
1086         assert_return(e, -EINVAL);
1087         assert_return(callback, -EINVAL);
1088         assert_return(ret, -EINVAL);
1089         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1090         assert_return(!event_pid_changed(e), -ECHILD);
1091
1092         if (!e->exit) {
1093                 e->exit = prioq_new(exit_prioq_compare);
1094                 if (!e->exit)
1095                         return -ENOMEM;
1096         }
1097
1098         s = source_new(e, SOURCE_EXIT);
1099         if (!s)
1100                 return -ENOMEM;
1101
1102         s->exit.callback = callback;
1103         s->userdata = userdata;
1104         s->exit.prioq_index = PRIOQ_IDX_NULL;
1105         s->enabled = SD_EVENT_ONESHOT;
1106
1107         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1108         if (r < 0) {
1109                 source_free(s);
1110                 return r;
1111         }
1112
1113         *ret = s;
1114         return 0;
1115 }
1116
1117 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1118         assert_return(s, NULL);
1119
1120         assert(s->n_ref >= 1);
1121         s->n_ref++;
1122
1123         return s;
1124 }
1125
1126 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1127
1128         if (!s)
1129                 return NULL;
1130
1131         assert(s->n_ref >= 1);
1132         s->n_ref--;
1133
1134         if (s->n_ref <= 0) {
1135                 /* Here's a special hack: when we are called from a
1136                  * dispatch handler we won't free the event source
1137                  * immediately, but we will detach the fd from the
1138                  * epoll. This way it is safe for the caller to unref
1139                  * the event source and immediately close the fd, but
1140                  * we still retain a valid event source object after
1141                  * the callback. */
1142
1143                 if (s->dispatching) {
1144                         if (s->type == SOURCE_IO)
1145                                 source_io_unregister(s);
1146                 } else
1147                         source_free(s);
1148         }
1149
1150         return NULL;
1151 }
1152
1153 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1154         assert_return(s, NULL);
1155
1156         return s->event;
1157 }
1158
1159 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1160         assert_return(s, -EINVAL);
1161         assert_return(s->type != SOURCE_EXIT, -EDOM);
1162         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1163         assert_return(!event_pid_changed(s->event), -ECHILD);
1164
1165         return s->pending;
1166 }
1167
1168 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1169         assert_return(s, -EINVAL);
1170         assert_return(s->type == SOURCE_IO, -EDOM);
1171         assert_return(!event_pid_changed(s->event), -ECHILD);
1172
1173         return s->io.fd;
1174 }
1175
1176 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1177         int r;
1178
1179         assert_return(s, -EINVAL);
1180         assert_return(fd >= 0, -EINVAL);
1181         assert_return(s->type == SOURCE_IO, -EDOM);
1182         assert_return(!event_pid_changed(s->event), -ECHILD);
1183
1184         if (s->io.fd == fd)
1185                 return 0;
1186
1187         if (s->enabled == SD_EVENT_OFF) {
1188                 s->io.fd = fd;
1189                 s->io.registered = false;
1190         } else {
1191                 int saved_fd;
1192
1193                 saved_fd = s->io.fd;
1194                 assert(s->io.registered);
1195
1196                 s->io.fd = fd;
1197                 s->io.registered = false;
1198
1199                 r = source_io_register(s, s->enabled, s->io.events);
1200                 if (r < 0) {
1201                         s->io.fd = saved_fd;
1202                         s->io.registered = true;
1203                         return r;
1204                 }
1205
1206                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1207         }
1208
1209         return 0;
1210 }
1211
1212 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1213         assert_return(s, -EINVAL);
1214         assert_return(events, -EINVAL);
1215         assert_return(s->type == SOURCE_IO, -EDOM);
1216         assert_return(!event_pid_changed(s->event), -ECHILD);
1217
1218         *events = s->io.events;
1219         return 0;
1220 }
1221
1222 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1223         int r;
1224
1225         assert_return(s, -EINVAL);
1226         assert_return(s->type == SOURCE_IO, -EDOM);
1227         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1228         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1229         assert_return(!event_pid_changed(s->event), -ECHILD);
1230
1231         if (s->io.events == events)
1232                 return 0;
1233
1234         if (s->enabled != SD_EVENT_OFF) {
1235                 r = source_io_register(s, s->enabled, events);
1236                 if (r < 0)
1237                         return r;
1238         }
1239
1240         s->io.events = events;
1241         source_set_pending(s, false);
1242
1243         return 0;
1244 }
1245
1246 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1247         assert_return(s, -EINVAL);
1248         assert_return(revents, -EINVAL);
1249         assert_return(s->type == SOURCE_IO, -EDOM);
1250         assert_return(s->pending, -ENODATA);
1251         assert_return(!event_pid_changed(s->event), -ECHILD);
1252
1253         *revents = s->io.revents;
1254         return 0;
1255 }
1256
1257 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1258         assert_return(s, -EINVAL);
1259         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1260         assert_return(!event_pid_changed(s->event), -ECHILD);
1261
1262         return s->signal.sig;
1263 }
1264
1265 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1266         assert_return(s, -EINVAL);
1267         assert_return(!event_pid_changed(s->event), -ECHILD);
1268
1269         return s->priority;
1270 }
1271
1272 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1273         assert_return(s, -EINVAL);
1274         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1275         assert_return(!event_pid_changed(s->event), -ECHILD);
1276
1277         if (s->priority == priority)
1278                 return 0;
1279
1280         s->priority = priority;
1281
1282         if (s->pending)
1283                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1284
1285         if (s->prepare)
1286                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1287
1288         if (s->type == SOURCE_EXIT)
1289                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1290
1291         return 0;
1292 }
1293
1294 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1295         assert_return(s, -EINVAL);
1296         assert_return(m, -EINVAL);
1297         assert_return(!event_pid_changed(s->event), -ECHILD);
1298
1299         *m = s->enabled;
1300         return 0;
1301 }
1302
1303 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1304         int r;
1305
1306         assert_return(s, -EINVAL);
1307         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1308         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1309         assert_return(!event_pid_changed(s->event), -ECHILD);
1310
1311         if (s->enabled == m)
1312                 return 0;
1313
1314         if (m == SD_EVENT_OFF) {
1315
1316                 switch (s->type) {
1317
1318                 case SOURCE_IO:
1319                         r = source_io_unregister(s);
1320                         if (r < 0)
1321                                 return r;
1322
1323                         s->enabled = m;
1324                         break;
1325
1326                 case SOURCE_TIME_REALTIME:
1327                 case SOURCE_TIME_MONOTONIC:
1328                 case SOURCE_TIME_REALTIME_ALARM:
1329                 case SOURCE_TIME_BOOTTIME_ALARM: {
1330                         struct clock_data *d;
1331
1332                         s->enabled = m;
1333                         d = event_get_clock_data(s->event, s->type);
1334                         assert(d);
1335
1336                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1337                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1338                         break;
1339                 }
1340
1341                 case SOURCE_SIGNAL:
1342                         s->enabled = m;
1343                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1344                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1345                                 event_update_signal_fd(s->event);
1346                         }
1347
1348                         break;
1349
1350                 case SOURCE_CHILD:
1351                         s->enabled = m;
1352
1353                         assert(s->event->n_enabled_child_sources > 0);
1354                         s->event->n_enabled_child_sources--;
1355
1356                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1357                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1358                                 event_update_signal_fd(s->event);
1359                         }
1360
1361                         break;
1362
1363                 case SOURCE_EXIT:
1364                         s->enabled = m;
1365                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1366                         break;
1367
1368                 case SOURCE_DEFER:
1369                 case SOURCE_POST:
1370                         s->enabled = m;
1371                         break;
1372
1373                 default:
1374                         assert_not_reached("Wut? I shouldn't exist.");
1375                 }
1376
1377         } else {
1378                 switch (s->type) {
1379
1380                 case SOURCE_IO:
1381                         r = source_io_register(s, m, s->io.events);
1382                         if (r < 0)
1383                                 return r;
1384
1385                         s->enabled = m;
1386                         break;
1387
1388                 case SOURCE_TIME_REALTIME:
1389                 case SOURCE_TIME_MONOTONIC:
1390                 case SOURCE_TIME_REALTIME_ALARM:
1391                 case SOURCE_TIME_BOOTTIME_ALARM: {
1392                         struct clock_data *d;
1393
1394                         s->enabled = m;
1395                         d = event_get_clock_data(s->event, s->type);
1396                         assert(d);
1397
1398                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1399                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1400                         break;
1401                 }
1402
1403                 case SOURCE_SIGNAL:
1404                         s->enabled = m;
1405
1406                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1407                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1408                                 event_update_signal_fd(s->event);
1409                         }
1410                         break;
1411
1412                 case SOURCE_CHILD:
1413                         if (s->enabled == SD_EVENT_OFF) {
1414                                 s->event->n_enabled_child_sources++;
1415
1416                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1417                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1418                                         event_update_signal_fd(s->event);
1419                                 }
1420                         }
1421
1422                         s->enabled = m;
1423                         break;
1424
1425                 case SOURCE_EXIT:
1426                         s->enabled = m;
1427                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1428                         break;
1429
1430                 case SOURCE_DEFER:
1431                 case SOURCE_POST:
1432                         s->enabled = m;
1433                         break;
1434
1435                 default:
1436                         assert_not_reached("Wut? I shouldn't exist.");
1437                 }
1438         }
1439
1440         if (s->pending)
1441                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1442
1443         if (s->prepare)
1444                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1445
1446         return 0;
1447 }
1448
1449 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1450         assert_return(s, -EINVAL);
1451         assert_return(usec, -EINVAL);
1452         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1453         assert_return(!event_pid_changed(s->event), -ECHILD);
1454
1455         *usec = s->time.next;
1456         return 0;
1457 }
1458
1459 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1460         struct clock_data *d;
1461
1462         assert_return(s, -EINVAL);
1463         assert_return(usec != (uint64_t) -1, -EINVAL);
1464         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1465         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1466         assert_return(!event_pid_changed(s->event), -ECHILD);
1467
1468         s->time.next = usec;
1469
1470         source_set_pending(s, false);
1471
1472         d = event_get_clock_data(s->event, s->type);
1473         assert(d);
1474
1475         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1476         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1477
1478         return 0;
1479 }
1480
1481 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1482         assert_return(s, -EINVAL);
1483         assert_return(usec, -EINVAL);
1484         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1485         assert_return(!event_pid_changed(s->event), -ECHILD);
1486
1487         *usec = s->time.accuracy;
1488         return 0;
1489 }
1490
1491 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1492         struct clock_data *d;
1493
1494         assert_return(s, -EINVAL);
1495         assert_return(usec != (uint64_t) -1, -EINVAL);
1496         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1497         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1498         assert_return(!event_pid_changed(s->event), -ECHILD);
1499
1500         if (usec == 0)
1501                 usec = DEFAULT_ACCURACY_USEC;
1502
1503         s->time.accuracy = usec;
1504
1505         source_set_pending(s, false);
1506
1507         d = event_get_clock_data(s->event, s->type);
1508         assert(d);
1509
1510         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1511
1512         return 0;
1513 }
1514
1515 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1516         assert_return(s, -EINVAL);
1517         assert_return(clock, -EINVAL);
1518         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1519         assert_return(!event_pid_changed(s->event), -ECHILD);
1520
1521         *clock = event_source_type_to_clock(s->type);
1522         return 0;
1523 }
1524
1525 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1526         assert_return(s, -EINVAL);
1527         assert_return(pid, -EINVAL);
1528         assert_return(s->type == SOURCE_CHILD, -EDOM);
1529         assert_return(!event_pid_changed(s->event), -ECHILD);
1530
1531         *pid = s->child.pid;
1532         return 0;
1533 }
1534
1535 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1536         int r;
1537
1538         assert_return(s, -EINVAL);
1539         assert_return(s->type != SOURCE_EXIT, -EDOM);
1540         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1541         assert_return(!event_pid_changed(s->event), -ECHILD);
1542
1543         if (s->prepare == callback)
1544                 return 0;
1545
1546         if (callback && s->prepare) {
1547                 s->prepare = callback;
1548                 return 0;
1549         }
1550
1551         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1552         if (r < 0)
1553                 return r;
1554
1555         s->prepare = callback;
1556
1557         if (callback) {
1558                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1559                 if (r < 0)
1560                         return r;
1561         } else
1562                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1563
1564         return 0;
1565 }
1566
1567 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1568         assert_return(s, NULL);
1569
1570         return s->userdata;
1571 }
1572
1573 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1574         void *ret;
1575
1576         assert_return(s, NULL);
1577
1578         ret = s->userdata;
1579         s->userdata = userdata;
1580
1581         return ret;
1582 }
1583
1584 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1585         usec_t c;
1586         assert(e);
1587         assert(a <= b);
1588
1589         if (a <= 0)
1590                 return 0;
1591
1592         if (b <= a + 1)
1593                 return a;
1594
1595         initialize_perturb(e);
1596
1597         /*
1598           Find a good time to wake up again between times a and b. We
1599           have two goals here:
1600
1601           a) We want to wake up as seldom as possible, hence prefer
1602              later times over earlier times.
1603
1604           b) But if we have to wake up, then let's make sure to
1605              dispatch as much as possible on the entire system.
1606
1607           We implement this by waking up everywhere at the same time
1608           within any given minute if we can, synchronised via the
1609           perturbation value determined from the boot ID. If we can't,
1610           then we try to find the same spot in every 10s, then 1s and
1611           then 250ms step. Otherwise, we pick the last possible time
1612           to wake up.
1613         */
1614
1615         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1616         if (c >= b) {
1617                 if (_unlikely_(c < USEC_PER_MINUTE))
1618                         return b;
1619
1620                 c -= USEC_PER_MINUTE;
1621         }
1622
1623         if (c >= a)
1624                 return c;
1625
1626         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1627         if (c >= b) {
1628                 if (_unlikely_(c < USEC_PER_SEC*10))
1629                         return b;
1630
1631                 c -= USEC_PER_SEC*10;
1632         }
1633
1634         if (c >= a)
1635                 return c;
1636
1637         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1638         if (c >= b) {
1639                 if (_unlikely_(c < USEC_PER_SEC))
1640                         return b;
1641
1642                 c -= USEC_PER_SEC;
1643         }
1644
1645         if (c >= a)
1646                 return c;
1647
1648         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1649         if (c >= b) {
1650                 if (_unlikely_(c < USEC_PER_MSEC*250))
1651                         return b;
1652
1653                 c -= USEC_PER_MSEC*250;
1654         }
1655
1656         if (c >= a)
1657                 return c;
1658
1659         return b;
1660 }
1661
1662 static int event_arm_timer(
1663                 sd_event *e,
1664                 struct clock_data *d) {
1665
1666         struct itimerspec its = {};
1667         sd_event_source *a, *b;
1668         usec_t t;
1669         int r;
1670
1671         assert(e);
1672         assert(d);
1673
1674         a = prioq_peek(d->earliest);
1675         if (!a || a->enabled == SD_EVENT_OFF) {
1676
1677                 if (d->fd < 0)
1678                         return 0;
1679
1680                 if (d->next == (usec_t) -1)
1681                         return 0;
1682
1683                 /* disarm */
1684                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1685                 if (r < 0)
1686                         return r;
1687
1688                 d->next = (usec_t) -1;
1689                 return 0;
1690         }
1691
1692         b = prioq_peek(d->latest);
1693         assert_se(b && b->enabled != SD_EVENT_OFF);
1694
1695         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1696         if (d->next == t)
1697                 return 0;
1698
1699         assert_se(d->fd >= 0);
1700
1701         if (t == 0) {
1702                 /* We don' want to disarm here, just mean some time looooong ago. */
1703                 its.it_value.tv_sec = 0;
1704                 its.it_value.tv_nsec = 1;
1705         } else
1706                 timespec_store(&its.it_value, t);
1707
1708         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1709         if (r < 0)
1710                 return -errno;
1711
1712         d->next = t;
1713         return 0;
1714 }
1715
1716 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1717         assert(e);
1718         assert(s);
1719         assert(s->type == SOURCE_IO);
1720
1721         /* If the event source was already pending, we just OR in the
1722          * new revents, otherwise we reset the value. The ORing is
1723          * necessary to handle EPOLLONESHOT events properly where
1724          * readability might happen independently of writability, and
1725          * we need to keep track of both */
1726
1727         if (s->pending)
1728                 s->io.revents |= revents;
1729         else
1730                 s->io.revents = revents;
1731
1732         return source_set_pending(s, true);
1733 }
1734
1735 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1736         uint64_t x;
1737         ssize_t ss;
1738
1739         assert(e);
1740         assert(fd >= 0);
1741
1742         assert_return(events == EPOLLIN, -EIO);
1743
1744         ss = read(fd, &x, sizeof(x));
1745         if (ss < 0) {
1746                 if (errno == EAGAIN || errno == EINTR)
1747                         return 0;
1748
1749                 return -errno;
1750         }
1751
1752         if (_unlikely_(ss != sizeof(x)))
1753                 return -EIO;
1754
1755         if (next)
1756                 *next = (usec_t) -1;
1757
1758         return 0;
1759 }
1760
1761 static int process_timer(
1762                 sd_event *e,
1763                 usec_t n,
1764                 struct clock_data *d) {
1765
1766         sd_event_source *s;
1767         int r;
1768
1769         assert(e);
1770         assert(d);
1771
1772         for (;;) {
1773                 s = prioq_peek(d->earliest);
1774                 if (!s ||
1775                     s->time.next > n ||
1776                     s->enabled == SD_EVENT_OFF ||
1777                     s->pending)
1778                         break;
1779
1780                 r = source_set_pending(s, true);
1781                 if (r < 0)
1782                         return r;
1783
1784                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1785                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1786         }
1787
1788         return 0;
1789 }
1790
1791 static int process_child(sd_event *e) {
1792         sd_event_source *s;
1793         Iterator i;
1794         int r;
1795
1796         assert(e);
1797
1798         e->need_process_child = false;
1799
1800         /*
1801            So, this is ugly. We iteratively invoke waitid() with P_PID
1802            + WNOHANG for each PID we wait for, instead of using
1803            P_ALL. This is because we only want to get child
1804            information of very specific child processes, and not all
1805            of them. We might not have processed the SIGCHLD even of a
1806            previous invocation and we don't want to maintain a
1807            unbounded *per-child* event queue, hence we really don't
1808            want anything flushed out of the kernel's queue that we
1809            don't care about. Since this is O(n) this means that if you
1810            have a lot of processes you probably want to handle SIGCHLD
1811            yourself.
1812
1813            We do not reap the children here (by using WNOWAIT), this
1814            is only done after the event source is dispatched so that
1815            the callback still sees the process as a zombie.
1816         */
1817
1818         HASHMAP_FOREACH(s, e->child_sources, i) {
1819                 assert(s->type == SOURCE_CHILD);
1820
1821                 if (s->pending)
1822                         continue;
1823
1824                 if (s->enabled == SD_EVENT_OFF)
1825                         continue;
1826
1827                 zero(s->child.siginfo);
1828                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1829                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1830                 if (r < 0)
1831                         return -errno;
1832
1833                 if (s->child.siginfo.si_pid != 0) {
1834                         bool zombie =
1835                                 s->child.siginfo.si_code == CLD_EXITED ||
1836                                 s->child.siginfo.si_code == CLD_KILLED ||
1837                                 s->child.siginfo.si_code == CLD_DUMPED;
1838
1839                         if (!zombie && (s->child.options & WEXITED)) {
1840                                 /* If the child isn't dead then let's
1841                                  * immediately remove the state change
1842                                  * from the queue, since there's no
1843                                  * benefit in leaving it queued */
1844
1845                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1846                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1847                         }
1848
1849                         r = source_set_pending(s, true);
1850                         if (r < 0)
1851                                 return r;
1852                 }
1853         }
1854
1855         return 0;
1856 }
1857
1858 static int process_signal(sd_event *e, uint32_t events) {
1859         bool read_one = false;
1860         int r;
1861
1862         assert(e);
1863         assert(e->signal_sources);
1864
1865         assert_return(events == EPOLLIN, -EIO);
1866
1867         for (;;) {
1868                 struct signalfd_siginfo si;
1869                 ssize_t ss;
1870                 sd_event_source *s;
1871
1872                 ss = read(e->signal_fd, &si, sizeof(si));
1873                 if (ss < 0) {
1874                         if (errno == EAGAIN || errno == EINTR)
1875                                 return read_one;
1876
1877                         return -errno;
1878                 }
1879
1880                 if (_unlikely_(ss != sizeof(si)))
1881                         return -EIO;
1882
1883                 read_one = true;
1884
1885                 s = e->signal_sources[si.ssi_signo];
1886                 if (si.ssi_signo == SIGCHLD) {
1887                         r = process_child(e);
1888                         if (r < 0)
1889                                 return r;
1890                         if (r > 0 || !s)
1891                                 continue;
1892                 } else
1893                         if (!s)
1894                                 return -EIO;
1895
1896                 s->signal.siginfo = si;
1897                 r = source_set_pending(s, true);
1898                 if (r < 0)
1899                         return r;
1900         }
1901 }
1902
1903 static int source_dispatch(sd_event_source *s) {
1904         int r = 0;
1905
1906         assert(s);
1907         assert(s->pending || s->type == SOURCE_EXIT);
1908
1909         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1910                 r = source_set_pending(s, false);
1911                 if (r < 0)
1912                         return r;
1913         }
1914
1915         if (s->type != SOURCE_POST) {
1916                 sd_event_source *z;
1917                 Iterator i;
1918
1919                 /* If we execute a non-post source, let's mark all
1920                  * post sources as pending */
1921
1922                 SET_FOREACH(z, s->event->post_sources, i) {
1923                         if (z->enabled == SD_EVENT_OFF)
1924                                 continue;
1925
1926                         r = source_set_pending(z, true);
1927                         if (r < 0)
1928                                 return r;
1929                 }
1930         }
1931
1932         if (s->enabled == SD_EVENT_ONESHOT) {
1933                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1934                 if (r < 0)
1935                         return r;
1936         }
1937
1938         s->dispatching = true;
1939
1940         switch (s->type) {
1941
1942         case SOURCE_IO:
1943                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1944                 break;
1945
1946         case SOURCE_TIME_REALTIME:
1947         case SOURCE_TIME_MONOTONIC:
1948         case SOURCE_TIME_REALTIME_ALARM:
1949         case SOURCE_TIME_BOOTTIME_ALARM:
1950                 r = s->time.callback(s, s->time.next, s->userdata);
1951                 break;
1952
1953         case SOURCE_SIGNAL:
1954                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1955                 break;
1956
1957         case SOURCE_CHILD: {
1958                 bool zombie;
1959
1960                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1961                          s->child.siginfo.si_code == CLD_KILLED ||
1962                          s->child.siginfo.si_code == CLD_DUMPED;
1963
1964                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1965
1966                 /* Now, reap the PID for good. */
1967                 if (zombie)
1968                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1969
1970                 break;
1971         }
1972
1973         case SOURCE_DEFER:
1974                 r = s->defer.callback(s, s->userdata);
1975                 break;
1976
1977         case SOURCE_POST:
1978                 r = s->post.callback(s, s->userdata);
1979                 break;
1980
1981         case SOURCE_EXIT:
1982                 r = s->exit.callback(s, s->userdata);
1983                 break;
1984
1985         case SOURCE_WATCHDOG:
1986         case _SOUFCE_EVENT_SOURCE_TYPE_MAX:
1987         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
1988                 assert_not_reached("Wut? I shouldn't exist.");
1989         }
1990
1991         s->dispatching = false;
1992
1993         if (r < 0)
1994                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1995
1996         if (s->n_ref == 0)
1997                 source_free(s);
1998         else if (r < 0)
1999                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2000
2001         return 1;
2002 }
2003
2004 static int event_prepare(sd_event *e) {
2005         int r;
2006
2007         assert(e);
2008
2009         for (;;) {
2010                 sd_event_source *s;
2011
2012                 s = prioq_peek(e->prepare);
2013                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2014                         break;
2015
2016                 s->prepare_iteration = e->iteration;
2017                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2018                 if (r < 0)
2019                         return r;
2020
2021                 assert(s->prepare);
2022
2023                 s->dispatching = true;
2024                 r = s->prepare(s, s->userdata);
2025                 s->dispatching = false;
2026
2027                 if (r < 0)
2028                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2029
2030                 if (s->n_ref == 0)
2031                         source_free(s);
2032                 else if (r < 0)
2033                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2034         }
2035
2036         return 0;
2037 }
2038
2039 static int dispatch_exit(sd_event *e) {
2040         sd_event_source *p;
2041         int r;
2042
2043         assert(e);
2044
2045         p = prioq_peek(e->exit);
2046         if (!p || p->enabled == SD_EVENT_OFF) {
2047                 e->state = SD_EVENT_FINISHED;
2048                 return 0;
2049         }
2050
2051         sd_event_ref(e);
2052         e->iteration++;
2053         e->state = SD_EVENT_EXITING;
2054
2055         r = source_dispatch(p);
2056
2057         e->state = SD_EVENT_PASSIVE;
2058         sd_event_unref(e);
2059
2060         return r;
2061 }
2062
2063 static sd_event_source* event_next_pending(sd_event *e) {
2064         sd_event_source *p;
2065
2066         assert(e);
2067
2068         p = prioq_peek(e->pending);
2069         if (!p)
2070                 return NULL;
2071
2072         if (p->enabled == SD_EVENT_OFF)
2073                 return NULL;
2074
2075         return p;
2076 }
2077
2078 static int arm_watchdog(sd_event *e) {
2079         struct itimerspec its = {};
2080         usec_t t;
2081         int r;
2082
2083         assert(e);
2084         assert(e->watchdog_fd >= 0);
2085
2086         t = sleep_between(e,
2087                           e->watchdog_last + (e->watchdog_period / 2),
2088                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2089
2090         timespec_store(&its.it_value, t);
2091
2092         /* Make sure we never set the watchdog to 0, which tells the
2093          * kernel to disable it. */
2094         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2095                 its.it_value.tv_nsec = 1;
2096
2097         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2098         if (r < 0)
2099                 return -errno;
2100
2101         return 0;
2102 }
2103
2104 static int process_watchdog(sd_event *e) {
2105         assert(e);
2106
2107         if (!e->watchdog)
2108                 return 0;
2109
2110         /* Don't notify watchdog too often */
2111         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2112                 return 0;
2113
2114         sd_notify(false, "WATCHDOG=1");
2115         e->watchdog_last = e->timestamp.monotonic;
2116
2117         return arm_watchdog(e);
2118 }
2119
2120 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2121         struct epoll_event *ev_queue;
2122         unsigned ev_queue_max;
2123         sd_event_source *p;
2124         int r, i, m;
2125
2126         assert_return(e, -EINVAL);
2127         assert_return(!event_pid_changed(e), -ECHILD);
2128         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2129         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2130
2131         if (e->exit_requested)
2132                 return dispatch_exit(e);
2133
2134         sd_event_ref(e);
2135         e->iteration++;
2136         e->state = SD_EVENT_RUNNING;
2137
2138         r = event_prepare(e);
2139         if (r < 0)
2140                 goto finish;
2141
2142         r = event_arm_timer(e, &e->realtime);
2143         if (r < 0)
2144                 goto finish;
2145
2146         r = event_arm_timer(e, &e->monotonic);
2147         if (r < 0)
2148                 goto finish;
2149
2150         r = event_arm_timer(e, &e->realtime_alarm);
2151         if (r < 0)
2152                 goto finish;
2153
2154         r = event_arm_timer(e, &e->boottime_alarm);
2155         if (r < 0)
2156                 goto finish;
2157
2158         if (event_next_pending(e) || e->need_process_child)
2159                 timeout = 0;
2160
2161         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2162         ev_queue = newa(struct epoll_event, ev_queue_max);
2163
2164         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2165                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2166         if (m < 0) {
2167                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2168                 goto finish;
2169         }
2170
2171         dual_timestamp_get(&e->timestamp);
2172         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2173
2174         for (i = 0; i < m; i++) {
2175
2176                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2177                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2178                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2179                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2180                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2181                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2182                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2183                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2184                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2185                         r = process_signal(e, ev_queue[i].events);
2186                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2187                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2188                 else
2189                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2190
2191                 if (r < 0)
2192                         goto finish;
2193         }
2194
2195         r = process_watchdog(e);
2196         if (r < 0)
2197                 goto finish;
2198
2199         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2200         if (r < 0)
2201                 goto finish;
2202
2203         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2204         if (r < 0)
2205                 goto finish;
2206
2207         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2208         if (r < 0)
2209                 goto finish;
2210
2211         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2212         if (r < 0)
2213                 goto finish;
2214
2215         if (e->need_process_child) {
2216                 r = process_child(e);
2217                 if (r < 0)
2218                         goto finish;
2219         }
2220
2221         p = event_next_pending(e);
2222         if (!p) {
2223                 r = 1;
2224                 goto finish;
2225         }
2226
2227         r = source_dispatch(p);
2228
2229 finish:
2230         e->state = SD_EVENT_PASSIVE;
2231         sd_event_unref(e);
2232
2233         return r;
2234 }
2235
2236 _public_ int sd_event_loop(sd_event *e) {
2237         int r;
2238
2239         assert_return(e, -EINVAL);
2240         assert_return(!event_pid_changed(e), -ECHILD);
2241         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2242
2243         sd_event_ref(e);
2244
2245         while (e->state != SD_EVENT_FINISHED) {
2246                 r = sd_event_run(e, (uint64_t) -1);
2247                 if (r < 0)
2248                         goto finish;
2249         }
2250
2251         r = e->exit_code;
2252
2253 finish:
2254         sd_event_unref(e);
2255         return r;
2256 }
2257
2258 _public_ int sd_event_get_state(sd_event *e) {
2259         assert_return(e, -EINVAL);
2260         assert_return(!event_pid_changed(e), -ECHILD);
2261
2262         return e->state;
2263 }
2264
2265 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2266         assert_return(e, -EINVAL);
2267         assert_return(code, -EINVAL);
2268         assert_return(!event_pid_changed(e), -ECHILD);
2269
2270         if (!e->exit_requested)
2271                 return -ENODATA;
2272
2273         *code = e->exit_code;
2274         return 0;
2275 }
2276
2277 _public_ int sd_event_exit(sd_event *e, int code) {
2278         assert_return(e, -EINVAL);
2279         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2280         assert_return(!event_pid_changed(e), -ECHILD);
2281
2282         e->exit_requested = true;
2283         e->exit_code = code;
2284
2285         return 0;
2286 }
2287
2288 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2289         assert_return(e, -EINVAL);
2290         assert_return(usec, -EINVAL);
2291         assert_return(!event_pid_changed(e), -ECHILD);
2292
2293         /* If we haven't run yet, just get the actual time */
2294         if (!dual_timestamp_is_set(&e->timestamp))
2295                 return -ENODATA;
2296
2297         switch (clock) {
2298
2299         case CLOCK_REALTIME:
2300         case CLOCK_REALTIME_ALARM:
2301                 *usec = e->timestamp.realtime;
2302                 break;
2303
2304         case CLOCK_MONOTONIC:
2305                 *usec = e->timestamp.monotonic;
2306                 break;
2307
2308         case CLOCK_BOOTTIME_ALARM:
2309                 *usec = e->timestamp_boottime;
2310                 break;
2311         }
2312
2313         return 0;
2314 }
2315
2316 _public_ int sd_event_default(sd_event **ret) {
2317
2318         static thread_local sd_event *default_event = NULL;
2319         sd_event *e = NULL;
2320         int r;
2321
2322         if (!ret)
2323                 return !!default_event;
2324
2325         if (default_event) {
2326                 *ret = sd_event_ref(default_event);
2327                 return 0;
2328         }
2329
2330         r = sd_event_new(&e);
2331         if (r < 0)
2332                 return r;
2333
2334         e->default_event_ptr = &default_event;
2335         e->tid = gettid();
2336         default_event = e;
2337
2338         *ret = e;
2339         return 1;
2340 }
2341
2342 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2343         assert_return(e, -EINVAL);
2344         assert_return(tid, -EINVAL);
2345         assert_return(!event_pid_changed(e), -ECHILD);
2346
2347         if (e->tid != 0) {
2348                 *tid = e->tid;
2349                 return 0;
2350         }
2351
2352         return -ENXIO;
2353 }
2354
2355 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2356         int r;
2357
2358         assert_return(e, -EINVAL);
2359         assert_return(!event_pid_changed(e), -ECHILD);
2360
2361         if (e->watchdog == !!b)
2362                 return e->watchdog;
2363
2364         if (b) {
2365                 struct epoll_event ev = {};
2366
2367                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2368                 if (r <= 0)
2369                         return r;
2370
2371                 /* Issue first ping immediately */
2372                 sd_notify(false, "WATCHDOG=1");
2373                 e->watchdog_last = now(CLOCK_MONOTONIC);
2374
2375                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2376                 if (e->watchdog_fd < 0)
2377                         return -errno;
2378
2379                 r = arm_watchdog(e);
2380                 if (r < 0)
2381                         goto fail;
2382
2383                 ev.events = EPOLLIN;
2384                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2385
2386                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2387                 if (r < 0) {
2388                         r = -errno;
2389                         goto fail;
2390                 }
2391
2392         } else {
2393                 if (e->watchdog_fd >= 0) {
2394                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2395                         e->watchdog_fd = safe_close(e->watchdog_fd);
2396                 }
2397         }
2398
2399         e->watchdog = !!b;
2400         return e->watchdog;
2401
2402 fail:
2403         e->watchdog_fd = safe_close(e->watchdog_fd);
2404         return r;
2405 }
2406
2407 _public_ int sd_event_get_watchdog(sd_event *e) {
2408         assert_return(e, -EINVAL);
2409         assert_return(!event_pid_changed(e), -ECHILD);
2410
2411         return e->watchdog;
2412 }