chiark / gitweb /
d6a3d1c3f405d2d4c3534635d08ddf875c697a40
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36
37 #include "sd-event.h"
38
39 #define EPOLL_QUEUE_MAX 512U
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_MONOTONIC,
46         SOURCE_TIME_REALTIME_ALARM,
47         SOURCE_TIME_BOOTTIME_ALARM,
48         SOURCE_SIGNAL,
49         SOURCE_CHILD,
50         SOURCE_DEFER,
51         SOURCE_POST,
52         SOURCE_EXIT,
53         SOURCE_WATCHDOG,
54         _SOUFCE_EVENT_SOURCE_TYPE_MAX,
55         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
56 } EventSourceType;
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event_source {
61         unsigned n_ref;
62
63         sd_event *event;
64         void *userdata;
65         sd_event_handler_t prepare;
66
67         EventSourceType type:5;
68         int enabled:3;
69         bool pending:1;
70         bool dispatching:1;
71
72         int64_t priority;
73         unsigned pending_index;
74         unsigned prepare_index;
75         unsigned pending_iteration;
76         unsigned prepare_iteration;
77
78         union {
79                 struct {
80                         sd_event_io_handler_t callback;
81                         int fd;
82                         uint32_t events;
83                         uint32_t revents;
84                         bool registered:1;
85                 } io;
86                 struct {
87                         sd_event_time_handler_t callback;
88                         usec_t next, accuracy;
89                         unsigned earliest_index;
90                         unsigned latest_index;
91                 } time;
92                 struct {
93                         sd_event_signal_handler_t callback;
94                         struct signalfd_siginfo siginfo;
95                         int sig;
96                 } signal;
97                 struct {
98                         sd_event_child_handler_t callback;
99                         siginfo_t siginfo;
100                         pid_t pid;
101                         int options;
102                 } child;
103                 struct {
104                         sd_event_handler_t callback;
105                 } defer;
106                 struct {
107                         sd_event_handler_t callback;
108                 } post;
109                 struct {
110                         sd_event_handler_t callback;
111                         unsigned prioq_index;
112                 } exit;
113         };
114 };
115
116 struct clock_data {
117         int fd;
118
119         /* For all clocks we maintain two priority queues each, one
120          * ordered for the earliest times the events may be
121          * dispatched, and one ordered by the latest times they must
122          * have been dispatched. The range between the top entries in
123          * the two prioqs is the time window we can freely schedule
124          * wakeups in */
125
126         Prioq *earliest;
127         Prioq *latest;
128         usec_t next;
129 };
130
131 struct sd_event {
132         unsigned n_ref;
133
134         int epoll_fd;
135         int signal_fd;
136         int watchdog_fd;
137
138         Prioq *pending;
139         Prioq *prepare;
140
141         /* timerfd_create() only supports these four clocks so far. We
142          * can add support for more clocks when the kernel learns to
143          * deal with them, too. */
144         struct clock_data realtime;
145         struct clock_data monotonic;
146         struct clock_data realtime_alarm;
147         struct clock_data boottime_alarm;
148
149         usec_t perturb;
150
151         sigset_t sigset;
152         sd_event_source **signal_sources;
153
154         Hashmap *child_sources;
155         unsigned n_enabled_child_sources;
156
157         Set *post_sources;
158
159         Prioq *exit;
160
161         pid_t original_pid;
162
163         unsigned iteration;
164         dual_timestamp timestamp;
165         usec_t timestamp_boottime;
166         int state;
167
168         bool exit_requested:1;
169         bool need_process_child:1;
170         bool watchdog:1;
171
172         int exit_code;
173
174         pid_t tid;
175         sd_event **default_event_ptr;
176
177         usec_t watchdog_last, watchdog_period;
178
179         unsigned n_sources;
180 };
181
182 static int pending_prioq_compare(const void *a, const void *b) {
183         const sd_event_source *x = a, *y = b;
184
185         assert(x->pending);
186         assert(y->pending);
187
188         /* Enabled ones first */
189         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190                 return -1;
191         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192                 return 1;
193
194         /* Lower priority values first */
195         if (x->priority < y->priority)
196                 return -1;
197         if (x->priority > y->priority)
198                 return 1;
199
200         /* Older entries first */
201         if (x->pending_iteration < y->pending_iteration)
202                 return -1;
203         if (x->pending_iteration > y->pending_iteration)
204                 return 1;
205
206         /* Stability for the rest */
207         if (x < y)
208                 return -1;
209         if (x > y)
210                 return 1;
211
212         return 0;
213 }
214
215 static int prepare_prioq_compare(const void *a, const void *b) {
216         const sd_event_source *x = a, *y = b;
217
218         assert(x->prepare);
219         assert(y->prepare);
220
221         /* Move most recently prepared ones last, so that we can stop
222          * preparing as soon as we hit one that has already been
223          * prepared in the current iteration */
224         if (x->prepare_iteration < y->prepare_iteration)
225                 return -1;
226         if (x->prepare_iteration > y->prepare_iteration)
227                 return 1;
228
229         /* Enabled ones first */
230         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
231                 return -1;
232         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233                 return 1;
234
235         /* Lower priority values first */
236         if (x->priority < y->priority)
237                 return -1;
238         if (x->priority > y->priority)
239                 return 1;
240
241         /* Stability for the rest */
242         if (x < y)
243                 return -1;
244         if (x > y)
245                 return 1;
246
247         return 0;
248 }
249
250 static int earliest_time_prioq_compare(const void *a, const void *b) {
251         const sd_event_source *x = a, *y = b;
252
253         assert(EVENT_SOURCE_IS_TIME(x->type));
254         assert(x->type == y->type);
255
256         /* Enabled ones first */
257         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
258                 return -1;
259         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260                 return 1;
261
262         /* Move the pending ones to the end */
263         if (!x->pending && y->pending)
264                 return -1;
265         if (x->pending && !y->pending)
266                 return 1;
267
268         /* Order by time */
269         if (x->time.next < y->time.next)
270                 return -1;
271         if (x->time.next > y->time.next)
272                 return 1;
273
274         /* Stability for the rest */
275         if (x < y)
276                 return -1;
277         if (x > y)
278                 return 1;
279
280         return 0;
281 }
282
283 static int latest_time_prioq_compare(const void *a, const void *b) {
284         const sd_event_source *x = a, *y = b;
285
286         assert(EVENT_SOURCE_IS_TIME(x->type));
287         assert(x->type == y->type);
288
289         /* Enabled ones first */
290         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
291                 return -1;
292         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293                 return 1;
294
295         /* Move the pending ones to the end */
296         if (!x->pending && y->pending)
297                 return -1;
298         if (x->pending && !y->pending)
299                 return 1;
300
301         /* Order by time */
302         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
303                 return -1;
304         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
305                 return 1;
306
307         /* Stability for the rest */
308         if (x < y)
309                 return -1;
310         if (x > y)
311                 return 1;
312
313         return 0;
314 }
315
316 static int exit_prioq_compare(const void *a, const void *b) {
317         const sd_event_source *x = a, *y = b;
318
319         assert(x->type == SOURCE_EXIT);
320         assert(y->type == SOURCE_EXIT);
321
322         /* Enabled ones first */
323         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
324                 return -1;
325         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
326                 return 1;
327
328         /* Lower priority values first */
329         if (x->priority < y->priority)
330                 return -1;
331         if (x->priority > y->priority)
332                 return 1;
333
334         /* Stability for the rest */
335         if (x < y)
336                 return -1;
337         if (x > y)
338                 return 1;
339
340         return 0;
341 }
342
343 static void free_clock_data(struct clock_data *d) {
344         assert(d);
345
346         safe_close(d->fd);
347         prioq_free(d->earliest);
348         prioq_free(d->latest);
349 }
350
351 static void event_free(sd_event *e) {
352         assert(e);
353         assert(e->n_sources == 0);
354
355         if (e->default_event_ptr)
356                 *(e->default_event_ptr) = NULL;
357
358         safe_close(e->epoll_fd);
359         safe_close(e->signal_fd);
360         safe_close(e->watchdog_fd);
361
362         free_clock_data(&e->realtime);
363         free_clock_data(&e->monotonic);
364         free_clock_data(&e->realtime_alarm);
365         free_clock_data(&e->boottime_alarm);
366
367         prioq_free(e->pending);
368         prioq_free(e->prepare);
369         prioq_free(e->exit);
370
371         free(e->signal_sources);
372
373         hashmap_free(e->child_sources);
374         set_free(e->post_sources);
375         free(e);
376 }
377
378 _public_ int sd_event_new(sd_event** ret) {
379         sd_event *e;
380         int r;
381
382         assert_return(ret, -EINVAL);
383
384         e = new0(sd_event, 1);
385         if (!e)
386                 return -ENOMEM;
387
388         e->n_ref = 1;
389         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
390         e->realtime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = (usec_t) -1;
391         e->original_pid = getpid();
392         e->perturb = (usec_t) -1;
393
394         assert_se(sigemptyset(&e->sigset) == 0);
395
396         e->pending = prioq_new(pending_prioq_compare);
397         if (!e->pending) {
398                 r = -ENOMEM;
399                 goto fail;
400         }
401
402         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
403         if (e->epoll_fd < 0) {
404                 r = -errno;
405                 goto fail;
406         }
407
408         *ret = e;
409         return 0;
410
411 fail:
412         event_free(e);
413         return r;
414 }
415
416 _public_ sd_event* sd_event_ref(sd_event *e) {
417         assert_return(e, NULL);
418
419         assert(e->n_ref >= 1);
420         e->n_ref++;
421
422         return e;
423 }
424
425 _public_ sd_event* sd_event_unref(sd_event *e) {
426
427         if (!e)
428                 return NULL;
429
430         assert(e->n_ref >= 1);
431         e->n_ref--;
432
433         if (e->n_ref <= 0)
434                 event_free(e);
435
436         return NULL;
437 }
438
439 static bool event_pid_changed(sd_event *e) {
440         assert(e);
441
442         /* We don't support people creating am event loop and keeping
443          * it around over a fork(). Let's complain. */
444
445         return e->original_pid != getpid();
446 }
447
448 static int source_io_unregister(sd_event_source *s) {
449         int r;
450
451         assert(s);
452         assert(s->type == SOURCE_IO);
453
454         if (!s->io.registered)
455                 return 0;
456
457         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
458         if (r < 0)
459                 return -errno;
460
461         s->io.registered = false;
462         return 0;
463 }
464
465 static int source_io_register(
466                 sd_event_source *s,
467                 int enabled,
468                 uint32_t events) {
469
470         struct epoll_event ev = {};
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475         assert(enabled != SD_EVENT_OFF);
476
477         ev.events = events;
478         ev.data.ptr = s;
479
480         if (enabled == SD_EVENT_ONESHOT)
481                 ev.events |= EPOLLONESHOT;
482
483         if (s->io.registered)
484                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
485         else
486                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
487
488         if (r < 0)
489                 return -errno;
490
491         s->io.registered = true;
492
493         return 0;
494 }
495
496 static clockid_t event_source_type_to_clock(EventSourceType t) {
497
498         switch (t) {
499
500         case SOURCE_TIME_REALTIME:
501                 return CLOCK_REALTIME;
502
503         case SOURCE_TIME_MONOTONIC:
504                 return CLOCK_MONOTONIC;
505
506         case SOURCE_TIME_REALTIME_ALARM:
507                 return CLOCK_REALTIME_ALARM;
508
509         case SOURCE_TIME_BOOTTIME_ALARM:
510                 return CLOCK_BOOTTIME_ALARM;
511
512         default:
513                 return (clockid_t) -1;
514         }
515 }
516
517 static EventSourceType clock_to_event_source_type(clockid_t clock) {
518
519         switch (clock) {
520
521         case CLOCK_REALTIME:
522                 return SOURCE_TIME_REALTIME;
523
524         case CLOCK_MONOTONIC:
525                 return SOURCE_TIME_MONOTONIC;
526
527         case CLOCK_REALTIME_ALARM:
528                 return SOURCE_TIME_REALTIME_ALARM;
529
530         case CLOCK_BOOTTIME_ALARM:
531                 return SOURCE_TIME_BOOTTIME_ALARM;
532
533         default:
534                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
535         }
536 }
537
538 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
539         assert(e);
540
541         switch (t) {
542
543         case SOURCE_TIME_REALTIME:
544                 return &e->realtime;
545
546         case SOURCE_TIME_MONOTONIC:
547                 return &e->monotonic;
548
549         case SOURCE_TIME_REALTIME_ALARM:
550                 return &e->realtime_alarm;
551
552         case SOURCE_TIME_BOOTTIME_ALARM:
553                 return &e->boottime_alarm;
554
555         default:
556                 return NULL;
557         }
558 }
559
560 static void source_free(sd_event_source *s) {
561         assert(s);
562
563         if (s->event) {
564                 assert(s->event->n_sources > 0);
565
566                 switch (s->type) {
567
568                 case SOURCE_IO:
569                         if (s->io.fd >= 0)
570                                 source_io_unregister(s);
571
572                         break;
573
574                 case SOURCE_TIME_REALTIME:
575                 case SOURCE_TIME_MONOTONIC:
576                 case SOURCE_TIME_REALTIME_ALARM:
577                 case SOURCE_TIME_BOOTTIME_ALARM: {
578                         struct clock_data *d;
579
580                         d = event_get_clock_data(s->event, s->type);
581                         assert(d);
582
583                         prioq_remove(d->earliest, s, &s->time.earliest_index);
584                         prioq_remove(d->latest, s, &s->time.latest_index);
585                         break;
586                 }
587
588                 case SOURCE_SIGNAL:
589                         if (s->signal.sig > 0) {
590                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
591                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
592
593                                 if (s->event->signal_sources)
594                                         s->event->signal_sources[s->signal.sig] = NULL;
595                         }
596
597                         break;
598
599                 case SOURCE_CHILD:
600                         if (s->child.pid > 0) {
601                                 if (s->enabled != SD_EVENT_OFF) {
602                                         assert(s->event->n_enabled_child_sources > 0);
603                                         s->event->n_enabled_child_sources--;
604                                 }
605
606                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
607                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
608
609                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
610                         }
611
612                         break;
613
614                 case SOURCE_DEFER:
615                         /* nothing */
616                         break;
617
618                 case SOURCE_POST:
619                         set_remove(s->event->post_sources, s);
620                         break;
621
622                 case SOURCE_EXIT:
623                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
624                         break;
625
626                 default:
627                         assert_not_reached("Wut? I shouldn't exist.");
628                 }
629
630                 if (s->pending)
631                         prioq_remove(s->event->pending, s, &s->pending_index);
632
633                 if (s->prepare)
634                         prioq_remove(s->event->prepare, s, &s->prepare_index);
635
636                 s->event->n_sources--;
637                 sd_event_unref(s->event);
638         }
639
640         free(s);
641 }
642
643 static int source_set_pending(sd_event_source *s, bool b) {
644         int r;
645
646         assert(s);
647         assert(s->type != SOURCE_EXIT);
648
649         if (s->pending == b)
650                 return 0;
651
652         s->pending = b;
653
654         if (b) {
655                 s->pending_iteration = s->event->iteration;
656
657                 r = prioq_put(s->event->pending, s, &s->pending_index);
658                 if (r < 0) {
659                         s->pending = false;
660                         return r;
661                 }
662         } else
663                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
664
665         if (EVENT_SOURCE_IS_TIME(s->type)) {
666                 struct clock_data *d;
667
668                 d = event_get_clock_data(s->event, s->type);
669                 assert(d);
670
671                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
672                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
673         }
674
675         return 0;
676 }
677
678 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
679         sd_event_source *s;
680
681         assert(e);
682
683         s = new0(sd_event_source, 1);
684         if (!s)
685                 return NULL;
686
687         s->n_ref = 1;
688         s->event = sd_event_ref(e);
689         s->type = type;
690         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
691
692         e->n_sources ++;
693
694         return s;
695 }
696
697 _public_ int sd_event_add_io(
698                 sd_event *e,
699                 sd_event_source **ret,
700                 int fd,
701                 uint32_t events,
702                 sd_event_io_handler_t callback,
703                 void *userdata) {
704
705         sd_event_source *s;
706         int r;
707
708         assert_return(e, -EINVAL);
709         assert_return(fd >= 0, -EINVAL);
710         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
711         assert_return(callback, -EINVAL);
712         assert_return(ret, -EINVAL);
713         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
714         assert_return(!event_pid_changed(e), -ECHILD);
715
716         s = source_new(e, SOURCE_IO);
717         if (!s)
718                 return -ENOMEM;
719
720         s->io.fd = fd;
721         s->io.events = events;
722         s->io.callback = callback;
723         s->userdata = userdata;
724         s->enabled = SD_EVENT_ON;
725
726         r = source_io_register(s, s->enabled, events);
727         if (r < 0) {
728                 source_free(s);
729                 return -errno;
730         }
731
732         *ret = s;
733         return 0;
734 }
735
736 static void initialize_perturb(sd_event *e) {
737         sd_id128_t bootid = {};
738
739         /* When we sleep for longer, we try to realign the wakeup to
740            the same time wihtin each minute/second/250ms, so that
741            events all across the system can be coalesced into a single
742            CPU wakeup. However, let's take some system-specific
743            randomness for this value, so that in a network of systems
744            with synced clocks timer events are distributed a
745            bit. Here, we calculate a perturbation usec offset from the
746            boot ID. */
747
748         if (_likely_(e->perturb != (usec_t) -1))
749                 return;
750
751         if (sd_id128_get_boot(&bootid) >= 0)
752                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
753 }
754
755 static int event_setup_timer_fd(
756                 sd_event *e,
757                 struct clock_data *d,
758                 clockid_t clock) {
759
760         struct epoll_event ev = {};
761         int r, fd;
762
763         assert(e);
764         assert(d);
765
766         if (_likely_(d->fd >= 0))
767                 return 0;
768
769         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
770         if (fd < 0)
771                 return -errno;
772
773         ev.events = EPOLLIN;
774         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
775
776         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
777         if (r < 0) {
778                 safe_close(fd);
779                 return -errno;
780         }
781
782         d->fd = fd;
783         return 0;
784 }
785
786 _public_ int sd_event_add_time(
787                 sd_event *e,
788                 sd_event_source **ret,
789                 clockid_t clock,
790                 uint64_t usec,
791                 uint64_t accuracy,
792                 sd_event_time_handler_t callback,
793                 void *userdata) {
794
795         EventSourceType type;
796         sd_event_source *s;
797         struct clock_data *d;
798         int r;
799
800         assert_return(e, -EINVAL);
801         assert_return(ret, -EINVAL);
802         assert_return(usec != (uint64_t) -1, -EINVAL);
803         assert_return(accuracy != (uint64_t) -1, -EINVAL);
804         assert_return(callback, -EINVAL);
805         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
806         assert_return(!event_pid_changed(e), -ECHILD);
807
808         type = clock_to_event_source_type(clock);
809         assert_return(type >= 0, -ENOTSUP);
810
811         d = event_get_clock_data(e, type);
812         assert(d);
813
814         if (!d->earliest) {
815                 d->earliest = prioq_new(earliest_time_prioq_compare);
816                 if (!d->earliest)
817                         return -ENOMEM;
818         }
819
820         if (!d->latest) {
821                 d->latest = prioq_new(latest_time_prioq_compare);
822                 if (!d->latest)
823                         return -ENOMEM;
824         }
825
826         if (d->fd < 0) {
827                 r = event_setup_timer_fd(e, d, clock);
828                 if (r < 0)
829                         return r;
830         }
831
832         s = source_new(e, type);
833         if (!s)
834                 return -ENOMEM;
835
836         s->time.next = usec;
837         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
838         s->time.callback = callback;
839         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
840         s->userdata = userdata;
841         s->enabled = SD_EVENT_ONESHOT;
842
843         r = prioq_put(d->earliest, s, &s->time.earliest_index);
844         if (r < 0)
845                 goto fail;
846
847         r = prioq_put(d->latest, s, &s->time.latest_index);
848         if (r < 0)
849                 goto fail;
850
851         *ret = s;
852         return 0;
853
854 fail:
855         source_free(s);
856         return r;
857 }
858
859 static int event_update_signal_fd(sd_event *e) {
860         struct epoll_event ev = {};
861         bool add_to_epoll;
862         int r;
863
864         assert(e);
865
866         add_to_epoll = e->signal_fd < 0;
867
868         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
869         if (r < 0)
870                 return -errno;
871
872         e->signal_fd = r;
873
874         if (!add_to_epoll)
875                 return 0;
876
877         ev.events = EPOLLIN;
878         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
879
880         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
881         if (r < 0) {
882                 e->signal_fd = safe_close(e->signal_fd);
883                 return -errno;
884         }
885
886         return 0;
887 }
888
889 _public_ int sd_event_add_signal(
890                 sd_event *e,
891                 sd_event_source **ret,
892                 int sig,
893                 sd_event_signal_handler_t callback,
894                 void *userdata) {
895
896         sd_event_source *s;
897         sigset_t ss;
898         int r;
899
900         assert_return(e, -EINVAL);
901         assert_return(sig > 0, -EINVAL);
902         assert_return(sig < _NSIG, -EINVAL);
903         assert_return(callback, -EINVAL);
904         assert_return(ret, -EINVAL);
905         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
906         assert_return(!event_pid_changed(e), -ECHILD);
907
908         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
909         if (r < 0)
910                 return -errno;
911
912         if (!sigismember(&ss, sig))
913                 return -EBUSY;
914
915         if (!e->signal_sources) {
916                 e->signal_sources = new0(sd_event_source*, _NSIG);
917                 if (!e->signal_sources)
918                         return -ENOMEM;
919         } else if (e->signal_sources[sig])
920                 return -EBUSY;
921
922         s = source_new(e, SOURCE_SIGNAL);
923         if (!s)
924                 return -ENOMEM;
925
926         s->signal.sig = sig;
927         s->signal.callback = callback;
928         s->userdata = userdata;
929         s->enabled = SD_EVENT_ON;
930
931         e->signal_sources[sig] = s;
932         assert_se(sigaddset(&e->sigset, sig) == 0);
933
934         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
935                 r = event_update_signal_fd(e);
936                 if (r < 0) {
937                         source_free(s);
938                         return r;
939                 }
940         }
941
942         *ret = s;
943         return 0;
944 }
945
946 _public_ int sd_event_add_child(
947                 sd_event *e,
948                 sd_event_source **ret,
949                 pid_t pid,
950                 int options,
951                 sd_event_child_handler_t callback,
952                 void *userdata) {
953
954         sd_event_source *s;
955         int r;
956
957         assert_return(e, -EINVAL);
958         assert_return(pid > 1, -EINVAL);
959         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
960         assert_return(options != 0, -EINVAL);
961         assert_return(callback, -EINVAL);
962         assert_return(ret, -EINVAL);
963         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
964         assert_return(!event_pid_changed(e), -ECHILD);
965
966         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
967         if (r < 0)
968                 return r;
969
970         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
971                 return -EBUSY;
972
973         s = source_new(e, SOURCE_CHILD);
974         if (!s)
975                 return -ENOMEM;
976
977         s->child.pid = pid;
978         s->child.options = options;
979         s->child.callback = callback;
980         s->userdata = userdata;
981         s->enabled = SD_EVENT_ONESHOT;
982
983         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
984         if (r < 0) {
985                 source_free(s);
986                 return r;
987         }
988
989         e->n_enabled_child_sources ++;
990
991         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
992
993         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
994                 r = event_update_signal_fd(e);
995                 if (r < 0) {
996                         source_free(s);
997                         return -errno;
998                 }
999         }
1000
1001         e->need_process_child = true;
1002
1003         *ret = s;
1004         return 0;
1005 }
1006
1007 _public_ int sd_event_add_defer(
1008                 sd_event *e,
1009                 sd_event_source **ret,
1010                 sd_event_handler_t callback,
1011                 void *userdata) {
1012
1013         sd_event_source *s;
1014         int r;
1015
1016         assert_return(e, -EINVAL);
1017         assert_return(callback, -EINVAL);
1018         assert_return(ret, -EINVAL);
1019         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1020         assert_return(!event_pid_changed(e), -ECHILD);
1021
1022         s = source_new(e, SOURCE_DEFER);
1023         if (!s)
1024                 return -ENOMEM;
1025
1026         s->defer.callback = callback;
1027         s->userdata = userdata;
1028         s->enabled = SD_EVENT_ONESHOT;
1029
1030         r = source_set_pending(s, true);
1031         if (r < 0) {
1032                 source_free(s);
1033                 return r;
1034         }
1035
1036         *ret = s;
1037         return 0;
1038 }
1039
1040 _public_ int sd_event_add_post(
1041                 sd_event *e,
1042                 sd_event_source **ret,
1043                 sd_event_handler_t callback,
1044                 void *userdata) {
1045
1046         sd_event_source *s;
1047         int r;
1048
1049         assert_return(e, -EINVAL);
1050         assert_return(callback, -EINVAL);
1051         assert_return(ret, -EINVAL);
1052         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1053         assert_return(!event_pid_changed(e), -ECHILD);
1054
1055         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1056         if (r < 0)
1057                 return r;
1058
1059         s = source_new(e, SOURCE_POST);
1060         if (!s)
1061                 return -ENOMEM;
1062
1063         s->post.callback = callback;
1064         s->userdata = userdata;
1065         s->enabled = SD_EVENT_ON;
1066
1067         r = set_put(e->post_sources, s);
1068         if (r < 0) {
1069                 source_free(s);
1070                 return r;
1071         }
1072
1073         *ret = s;
1074         return 0;
1075 }
1076
1077 _public_ int sd_event_add_exit(
1078                 sd_event *e,
1079                 sd_event_source **ret,
1080                 sd_event_handler_t callback,
1081                 void *userdata) {
1082
1083         sd_event_source *s;
1084         int r;
1085
1086         assert_return(e, -EINVAL);
1087         assert_return(callback, -EINVAL);
1088         assert_return(ret, -EINVAL);
1089         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1090         assert_return(!event_pid_changed(e), -ECHILD);
1091
1092         if (!e->exit) {
1093                 e->exit = prioq_new(exit_prioq_compare);
1094                 if (!e->exit)
1095                         return -ENOMEM;
1096         }
1097
1098         s = source_new(e, SOURCE_EXIT);
1099         if (!s)
1100                 return -ENOMEM;
1101
1102         s->exit.callback = callback;
1103         s->userdata = userdata;
1104         s->exit.prioq_index = PRIOQ_IDX_NULL;
1105         s->enabled = SD_EVENT_ONESHOT;
1106
1107         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1108         if (r < 0) {
1109                 source_free(s);
1110                 return r;
1111         }
1112
1113         *ret = s;
1114         return 0;
1115 }
1116
1117 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1118         assert_return(s, NULL);
1119
1120         assert(s->n_ref >= 1);
1121         s->n_ref++;
1122
1123         return s;
1124 }
1125
1126 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1127
1128         if (!s)
1129                 return NULL;
1130
1131         assert(s->n_ref >= 1);
1132         s->n_ref--;
1133
1134         if (s->n_ref <= 0) {
1135                 /* Here's a special hack: when we are called from a
1136                  * dispatch handler we won't free the event source
1137                  * immediately, but we will detach the fd from the
1138                  * epoll. This way it is safe for the caller to unref
1139                  * the event source and immediately close the fd, but
1140                  * we still retain a valid event source object after
1141                  * the callback. */
1142
1143                 if (s->dispatching) {
1144                         if (s->type == SOURCE_IO)
1145                                 source_io_unregister(s);
1146                 } else
1147                         source_free(s);
1148         }
1149
1150         return NULL;
1151 }
1152
1153 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1154         assert_return(s, NULL);
1155
1156         return s->event;
1157 }
1158
1159 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1160         assert_return(s, -EINVAL);
1161         assert_return(s->type != SOURCE_EXIT, -EDOM);
1162         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1163         assert_return(!event_pid_changed(s->event), -ECHILD);
1164
1165         return s->pending;
1166 }
1167
1168 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1169         assert_return(s, -EINVAL);
1170         assert_return(s->type == SOURCE_IO, -EDOM);
1171         assert_return(!event_pid_changed(s->event), -ECHILD);
1172
1173         return s->io.fd;
1174 }
1175
1176 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1177         int r;
1178
1179         assert_return(s, -EINVAL);
1180         assert_return(fd >= 0, -EINVAL);
1181         assert_return(s->type == SOURCE_IO, -EDOM);
1182         assert_return(!event_pid_changed(s->event), -ECHILD);
1183
1184         if (s->io.fd == fd)
1185                 return 0;
1186
1187         if (s->enabled == SD_EVENT_OFF) {
1188                 s->io.fd = fd;
1189                 s->io.registered = false;
1190         } else {
1191                 int saved_fd;
1192
1193                 saved_fd = s->io.fd;
1194                 assert(s->io.registered);
1195
1196                 s->io.fd = fd;
1197                 s->io.registered = false;
1198
1199                 r = source_io_register(s, s->enabled, s->io.events);
1200                 if (r < 0) {
1201                         s->io.fd = saved_fd;
1202                         s->io.registered = true;
1203                         return r;
1204                 }
1205
1206                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1207         }
1208
1209         return 0;
1210 }
1211
1212 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1213         assert_return(s, -EINVAL);
1214         assert_return(events, -EINVAL);
1215         assert_return(s->type == SOURCE_IO, -EDOM);
1216         assert_return(!event_pid_changed(s->event), -ECHILD);
1217
1218         *events = s->io.events;
1219         return 0;
1220 }
1221
1222 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1223         int r;
1224
1225         assert_return(s, -EINVAL);
1226         assert_return(s->type == SOURCE_IO, -EDOM);
1227         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1228         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1229         assert_return(!event_pid_changed(s->event), -ECHILD);
1230
1231         if (s->io.events == events)
1232                 return 0;
1233
1234         if (s->enabled != SD_EVENT_OFF) {
1235                 r = source_io_register(s, s->enabled, events);
1236                 if (r < 0)
1237                         return r;
1238         }
1239
1240         s->io.events = events;
1241         source_set_pending(s, false);
1242
1243         return 0;
1244 }
1245
1246 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1247         assert_return(s, -EINVAL);
1248         assert_return(revents, -EINVAL);
1249         assert_return(s->type == SOURCE_IO, -EDOM);
1250         assert_return(s->pending, -ENODATA);
1251         assert_return(!event_pid_changed(s->event), -ECHILD);
1252
1253         *revents = s->io.revents;
1254         return 0;
1255 }
1256
1257 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1258         assert_return(s, -EINVAL);
1259         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1260         assert_return(!event_pid_changed(s->event), -ECHILD);
1261
1262         return s->signal.sig;
1263 }
1264
1265 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1266         assert_return(s, -EINVAL);
1267         assert_return(!event_pid_changed(s->event), -ECHILD);
1268
1269         return s->priority;
1270 }
1271
1272 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1273         assert_return(s, -EINVAL);
1274         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1275         assert_return(!event_pid_changed(s->event), -ECHILD);
1276
1277         if (s->priority == priority)
1278                 return 0;
1279
1280         s->priority = priority;
1281
1282         if (s->pending)
1283                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1284
1285         if (s->prepare)
1286                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1287
1288         if (s->type == SOURCE_EXIT)
1289                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1290
1291         return 0;
1292 }
1293
1294 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1295         assert_return(s, -EINVAL);
1296         assert_return(m, -EINVAL);
1297         assert_return(!event_pid_changed(s->event), -ECHILD);
1298
1299         *m = s->enabled;
1300         return 0;
1301 }
1302
1303 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1304         int r;
1305
1306         assert_return(s, -EINVAL);
1307         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1308         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1309         assert_return(!event_pid_changed(s->event), -ECHILD);
1310
1311         if (s->enabled == m)
1312                 return 0;
1313
1314         if (m == SD_EVENT_OFF) {
1315
1316                 switch (s->type) {
1317
1318                 case SOURCE_IO:
1319                         r = source_io_unregister(s);
1320                         if (r < 0)
1321                                 return r;
1322
1323                         s->enabled = m;
1324                         break;
1325
1326                 case SOURCE_TIME_REALTIME:
1327                 case SOURCE_TIME_MONOTONIC:
1328                 case SOURCE_TIME_REALTIME_ALARM:
1329                 case SOURCE_TIME_BOOTTIME_ALARM: {
1330                         struct clock_data *d;
1331
1332                         s->enabled = m;
1333                         d = event_get_clock_data(s->event, s->type);
1334                         assert(d);
1335
1336                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1337                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1338                         break;
1339                 }
1340
1341                 case SOURCE_SIGNAL:
1342                         s->enabled = m;
1343                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1344                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1345                                 event_update_signal_fd(s->event);
1346                         }
1347
1348                         break;
1349
1350                 case SOURCE_CHILD:
1351                         s->enabled = m;
1352
1353                         assert(s->event->n_enabled_child_sources > 0);
1354                         s->event->n_enabled_child_sources--;
1355
1356                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1357                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1358                                 event_update_signal_fd(s->event);
1359                         }
1360
1361                         break;
1362
1363                 case SOURCE_EXIT:
1364                         s->enabled = m;
1365                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1366                         break;
1367
1368                 case SOURCE_DEFER:
1369                 case SOURCE_POST:
1370                         s->enabled = m;
1371                         break;
1372
1373                 default:
1374                         assert_not_reached("Wut? I shouldn't exist.");
1375                 }
1376
1377         } else {
1378                 switch (s->type) {
1379
1380                 case SOURCE_IO:
1381                         r = source_io_register(s, m, s->io.events);
1382                         if (r < 0)
1383                                 return r;
1384
1385                         s->enabled = m;
1386                         break;
1387
1388                 case SOURCE_TIME_REALTIME:
1389                 case SOURCE_TIME_MONOTONIC:
1390                 case SOURCE_TIME_REALTIME_ALARM:
1391                 case SOURCE_TIME_BOOTTIME_ALARM: {
1392                         struct clock_data *d;
1393
1394                         s->enabled = m;
1395                         d = event_get_clock_data(s->event, s->type);
1396                         assert(d);
1397
1398                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1399                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1400                         break;
1401                 }
1402
1403                 case SOURCE_SIGNAL:
1404                         s->enabled = m;
1405
1406                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1407                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1408                                 event_update_signal_fd(s->event);
1409                         }
1410                         break;
1411
1412                 case SOURCE_CHILD:
1413                         if (s->enabled == SD_EVENT_OFF) {
1414                                 s->event->n_enabled_child_sources++;
1415
1416                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1417                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1418                                         event_update_signal_fd(s->event);
1419                                 }
1420                         }
1421
1422                         s->enabled = m;
1423                         break;
1424
1425                 case SOURCE_EXIT:
1426                         s->enabled = m;
1427                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1428                         break;
1429
1430                 case SOURCE_DEFER:
1431                 case SOURCE_POST:
1432                         s->enabled = m;
1433                         break;
1434
1435                 default:
1436                         assert_not_reached("Wut? I shouldn't exist.");
1437                 }
1438         }
1439
1440         if (s->pending)
1441                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1442
1443         if (s->prepare)
1444                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1445
1446         return 0;
1447 }
1448
1449 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1450         assert_return(s, -EINVAL);
1451         assert_return(usec, -EINVAL);
1452         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1453         assert_return(!event_pid_changed(s->event), -ECHILD);
1454
1455         *usec = s->time.next;
1456         return 0;
1457 }
1458
1459 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1460         struct clock_data *d;
1461
1462         assert_return(s, -EINVAL);
1463         assert_return(usec != (uint64_t) -1, -EINVAL);
1464         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1465         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1466         assert_return(!event_pid_changed(s->event), -ECHILD);
1467
1468         s->time.next = usec;
1469
1470         source_set_pending(s, false);
1471
1472         d = event_get_clock_data(s->event, s->type);
1473         assert(d);
1474
1475         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1476         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1477
1478         return 0;
1479 }
1480
1481 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1482         assert_return(s, -EINVAL);
1483         assert_return(usec, -EINVAL);
1484         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1485         assert_return(!event_pid_changed(s->event), -ECHILD);
1486
1487         *usec = s->time.accuracy;
1488         return 0;
1489 }
1490
1491 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1492         struct clock_data *d;
1493
1494         assert_return(s, -EINVAL);
1495         assert_return(usec != (uint64_t) -1, -EINVAL);
1496         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1497         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1498         assert_return(!event_pid_changed(s->event), -ECHILD);
1499
1500         if (usec == 0)
1501                 usec = DEFAULT_ACCURACY_USEC;
1502
1503         s->time.accuracy = usec;
1504
1505         source_set_pending(s, false);
1506
1507         d = event_get_clock_data(s->event, s->type);
1508         assert(d);
1509
1510         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1511
1512         return 0;
1513 }
1514
1515 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1516         assert_return(s, -EINVAL);
1517         assert_return(clock, -EINVAL);
1518         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1519         assert_return(!event_pid_changed(s->event), -ECHILD);
1520
1521         *clock = event_source_type_to_clock(s->type);
1522         return 0;
1523 }
1524
1525 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1526         assert_return(s, -EINVAL);
1527         assert_return(pid, -EINVAL);
1528         assert_return(s->type == SOURCE_CHILD, -EDOM);
1529         assert_return(!event_pid_changed(s->event), -ECHILD);
1530
1531         *pid = s->child.pid;
1532         return 0;
1533 }
1534
1535 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1536         int r;
1537
1538         assert_return(s, -EINVAL);
1539         assert_return(s->type != SOURCE_EXIT, -EDOM);
1540         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1541         assert_return(!event_pid_changed(s->event), -ECHILD);
1542
1543         if (s->prepare == callback)
1544                 return 0;
1545
1546         if (callback && s->prepare) {
1547                 s->prepare = callback;
1548                 return 0;
1549         }
1550
1551         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1552         if (r < 0)
1553                 return r;
1554
1555         s->prepare = callback;
1556
1557         if (callback) {
1558                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1559                 if (r < 0)
1560                         return r;
1561         } else
1562                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1563
1564         return 0;
1565 }
1566
1567 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1568         assert_return(s, NULL);
1569
1570         return s->userdata;
1571 }
1572
1573 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1574         void *ret;
1575
1576         assert_return(s, NULL);
1577
1578         ret = s->userdata;
1579         s->userdata = userdata;
1580
1581         return ret;
1582 }
1583
1584 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1585         usec_t c;
1586         assert(e);
1587         assert(a <= b);
1588
1589         if (a <= 0)
1590                 return 0;
1591
1592         if (b <= a + 1)
1593                 return a;
1594
1595         initialize_perturb(e);
1596
1597         /*
1598           Find a good time to wake up again between times a and b. We
1599           have two goals here:
1600
1601           a) We want to wake up as seldom as possible, hence prefer
1602              later times over earlier times.
1603
1604           b) But if we have to wake up, then let's make sure to
1605              dispatch as much as possible on the entire system.
1606
1607           We implement this by waking up everywhere at the same time
1608           within any given minute if we can, synchronised via the
1609           perturbation value determined from the boot ID. If we can't,
1610           then we try to find the same spot in every 10s, then 1s and
1611           then 250ms step. Otherwise, we pick the last possible time
1612           to wake up.
1613         */
1614
1615         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1616         if (c >= b) {
1617                 if (_unlikely_(c < USEC_PER_MINUTE))
1618                         return b;
1619
1620                 c -= USEC_PER_MINUTE;
1621         }
1622
1623         if (c >= a)
1624                 return c;
1625
1626         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1627         if (c >= b) {
1628                 if (_unlikely_(c < USEC_PER_SEC*10))
1629                         return b;
1630
1631                 c -= USEC_PER_SEC*10;
1632         }
1633
1634         if (c >= a)
1635                 return c;
1636
1637         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1638         if (c >= b) {
1639                 if (_unlikely_(c < USEC_PER_SEC))
1640                         return b;
1641
1642                 c -= USEC_PER_SEC;
1643         }
1644
1645         if (c >= a)
1646                 return c;
1647
1648         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1649         if (c >= b) {
1650                 if (_unlikely_(c < USEC_PER_MSEC*250))
1651                         return b;
1652
1653                 c -= USEC_PER_MSEC*250;
1654         }
1655
1656         if (c >= a)
1657                 return c;
1658
1659         return b;
1660 }
1661
1662 static int event_arm_timer(
1663                 sd_event *e,
1664                 struct clock_data *d) {
1665
1666         struct itimerspec its = {};
1667         sd_event_source *a, *b;
1668         usec_t t;
1669         int r;
1670
1671         assert(e);
1672         assert(d);
1673
1674         a = prioq_peek(d->earliest);
1675         if (!a || a->enabled == SD_EVENT_OFF) {
1676
1677                 if (d->fd < 0)
1678                         return 0;
1679
1680                 if (d->next == (usec_t) -1)
1681                         return 0;
1682
1683                 /* disarm */
1684                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1685                 if (r < 0)
1686                         return r;
1687
1688                 d->next = (usec_t) -1;
1689                 return 0;
1690         }
1691
1692         b = prioq_peek(d->latest);
1693         assert_se(b && b->enabled != SD_EVENT_OFF);
1694
1695         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1696         if (d->next == t)
1697                 return 0;
1698
1699         assert_se(d->fd >= 0);
1700
1701         if (t == 0) {
1702                 /* We don' want to disarm here, just mean some time looooong ago. */
1703                 its.it_value.tv_sec = 0;
1704                 its.it_value.tv_nsec = 1;
1705         } else
1706                 timespec_store(&its.it_value, t);
1707
1708         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1709         if (r < 0)
1710                 return -errno;
1711
1712         d->next = t;
1713         return 0;
1714 }
1715
1716 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1717         assert(e);
1718         assert(s);
1719         assert(s->type == SOURCE_IO);
1720
1721         /* If the event source was already pending, we just OR in the
1722          * new revents, otherwise we reset the value. The ORing is
1723          * necessary to handle EPOLLONESHOT events properly where
1724          * readability might happen independently of writability, and
1725          * we need to keep track of both */
1726
1727         if (s->pending)
1728                 s->io.revents |= revents;
1729         else
1730                 s->io.revents = revents;
1731
1732         return source_set_pending(s, true);
1733 }
1734
1735 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1736         uint64_t x;
1737         ssize_t ss;
1738
1739         assert(e);
1740         assert(fd >= 0);
1741
1742         assert_return(events == EPOLLIN, -EIO);
1743
1744         ss = read(fd, &x, sizeof(x));
1745         if (ss < 0) {
1746                 if (errno == EAGAIN || errno == EINTR)
1747                         return 0;
1748
1749                 return -errno;
1750         }
1751
1752         if (_unlikely_(ss != sizeof(x)))
1753                 return -EIO;
1754
1755         if (next)
1756                 *next = (usec_t) -1;
1757
1758         return 0;
1759 }
1760
1761 static int process_timer(
1762                 sd_event *e,
1763                 usec_t n,
1764                 struct clock_data *d) {
1765
1766         sd_event_source *s;
1767         int r;
1768
1769         assert(e);
1770         assert(d);
1771
1772         for (;;) {
1773                 s = prioq_peek(d->earliest);
1774                 if (!s ||
1775                     s->time.next > n ||
1776                     s->enabled == SD_EVENT_OFF ||
1777                     s->pending)
1778                         break;
1779
1780                 r = source_set_pending(s, true);
1781                 if (r < 0)
1782                         return r;
1783
1784                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1785                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1786         }
1787
1788         return 0;
1789 }
1790
1791 static int process_child(sd_event *e) {
1792         sd_event_source *s;
1793         Iterator i;
1794         int r;
1795
1796         assert(e);
1797
1798         e->need_process_child = false;
1799
1800         /*
1801            So, this is ugly. We iteratively invoke waitid() with P_PID
1802            + WNOHANG for each PID we wait for, instead of using
1803            P_ALL. This is because we only want to get child
1804            information of very specific child processes, and not all
1805            of them. We might not have processed the SIGCHLD even of a
1806            previous invocation and we don't want to maintain a
1807            unbounded *per-child* event queue, hence we really don't
1808            want anything flushed out of the kernel's queue that we
1809            don't care about. Since this is O(n) this means that if you
1810            have a lot of processes you probably want to handle SIGCHLD
1811            yourself.
1812
1813            We do not reap the children here (by using WNOWAIT), this
1814            is only done after the event source is dispatched so that
1815            the callback still sees the process as a zombie.
1816         */
1817
1818         HASHMAP_FOREACH(s, e->child_sources, i) {
1819                 assert(s->type == SOURCE_CHILD);
1820
1821                 if (s->pending)
1822                         continue;
1823
1824                 if (s->enabled == SD_EVENT_OFF)
1825                         continue;
1826
1827                 zero(s->child.siginfo);
1828                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1829                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1830                 if (r < 0)
1831                         return -errno;
1832
1833                 if (s->child.siginfo.si_pid != 0) {
1834                         bool zombie =
1835                                 s->child.siginfo.si_code == CLD_EXITED ||
1836                                 s->child.siginfo.si_code == CLD_KILLED ||
1837                                 s->child.siginfo.si_code == CLD_DUMPED;
1838
1839                         if (!zombie && (s->child.options & WEXITED)) {
1840                                 /* If the child isn't dead then let's
1841                                  * immediately remove the state change
1842                                  * from the queue, since there's no
1843                                  * benefit in leaving it queued */
1844
1845                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1846                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1847                         }
1848
1849                         r = source_set_pending(s, true);
1850                         if (r < 0)
1851                                 return r;
1852                 }
1853         }
1854
1855         return 0;
1856 }
1857
1858 static int process_signal(sd_event *e, uint32_t events) {
1859         bool read_one = false;
1860         int r;
1861
1862         assert(e);
1863         assert(e->signal_sources);
1864
1865         assert_return(events == EPOLLIN, -EIO);
1866
1867         for (;;) {
1868                 struct signalfd_siginfo si;
1869                 ssize_t ss;
1870                 sd_event_source *s;
1871
1872                 ss = read(e->signal_fd, &si, sizeof(si));
1873                 if (ss < 0) {
1874                         if (errno == EAGAIN || errno == EINTR)
1875                                 return read_one;
1876
1877                         return -errno;
1878                 }
1879
1880                 if (_unlikely_(ss != sizeof(si)))
1881                         return -EIO;
1882
1883                 read_one = true;
1884
1885                 s = e->signal_sources[si.ssi_signo];
1886                 if (si.ssi_signo == SIGCHLD) {
1887                         r = process_child(e);
1888                         if (r < 0)
1889                                 return r;
1890                         if (r > 0 || !s)
1891                                 continue;
1892                 } else
1893                         if (!s)
1894                                 return -EIO;
1895
1896                 s->signal.siginfo = si;
1897                 r = source_set_pending(s, true);
1898                 if (r < 0)
1899                         return r;
1900         }
1901 }
1902
1903 static int source_dispatch(sd_event_source *s) {
1904         int r = 0;
1905
1906         assert(s);
1907         assert(s->pending || s->type == SOURCE_EXIT);
1908
1909         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1910                 r = source_set_pending(s, false);
1911                 if (r < 0)
1912                         return r;
1913         }
1914
1915         if (s->type != SOURCE_POST) {
1916                 sd_event_source *z;
1917                 Iterator i;
1918
1919                 /* If we execute a non-post source, let's mark all
1920                  * post sources as pending */
1921
1922                 SET_FOREACH(z, s->event->post_sources, i) {
1923                         if (z->enabled == SD_EVENT_OFF)
1924                                 continue;
1925
1926                         r = source_set_pending(z, true);
1927                         if (r < 0)
1928                                 return r;
1929                 }
1930         }
1931
1932         if (s->enabled == SD_EVENT_ONESHOT) {
1933                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1934                 if (r < 0)
1935                         return r;
1936         }
1937
1938         s->dispatching = true;
1939
1940         switch (s->type) {
1941
1942         case SOURCE_IO:
1943                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1944                 break;
1945
1946         case SOURCE_TIME_REALTIME:
1947         case SOURCE_TIME_MONOTONIC:
1948         case SOURCE_TIME_REALTIME_ALARM:
1949         case SOURCE_TIME_BOOTTIME_ALARM:
1950                 r = s->time.callback(s, s->time.next, s->userdata);
1951                 break;
1952
1953         case SOURCE_SIGNAL:
1954                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1955                 break;
1956
1957         case SOURCE_CHILD: {
1958                 bool zombie;
1959
1960                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1961                          s->child.siginfo.si_code == CLD_KILLED ||
1962                          s->child.siginfo.si_code == CLD_DUMPED;
1963
1964                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1965
1966                 /* Now, reap the PID for good. */
1967                 if (zombie)
1968                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1969
1970                 break;
1971         }
1972
1973         case SOURCE_DEFER:
1974                 r = s->defer.callback(s, s->userdata);
1975                 break;
1976
1977         case SOURCE_POST:
1978                 r = s->post.callback(s, s->userdata);
1979                 break;
1980
1981         case SOURCE_EXIT:
1982                 r = s->exit.callback(s, s->userdata);
1983                 break;
1984
1985         case SOURCE_WATCHDOG:
1986                 assert_not_reached("Wut? I shouldn't exist.");
1987         }
1988
1989         s->dispatching = false;
1990
1991         if (r < 0)
1992                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1993
1994         if (s->n_ref == 0)
1995                 source_free(s);
1996         else if (r < 0)
1997                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1998
1999         return 1;
2000 }
2001
2002 static int event_prepare(sd_event *e) {
2003         int r;
2004
2005         assert(e);
2006
2007         for (;;) {
2008                 sd_event_source *s;
2009
2010                 s = prioq_peek(e->prepare);
2011                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2012                         break;
2013
2014                 s->prepare_iteration = e->iteration;
2015                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2016                 if (r < 0)
2017                         return r;
2018
2019                 assert(s->prepare);
2020
2021                 s->dispatching = true;
2022                 r = s->prepare(s, s->userdata);
2023                 s->dispatching = false;
2024
2025                 if (r < 0)
2026                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2027
2028                 if (s->n_ref == 0)
2029                         source_free(s);
2030                 else if (r < 0)
2031                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2032         }
2033
2034         return 0;
2035 }
2036
2037 static int dispatch_exit(sd_event *e) {
2038         sd_event_source *p;
2039         int r;
2040
2041         assert(e);
2042
2043         p = prioq_peek(e->exit);
2044         if (!p || p->enabled == SD_EVENT_OFF) {
2045                 e->state = SD_EVENT_FINISHED;
2046                 return 0;
2047         }
2048
2049         sd_event_ref(e);
2050         e->iteration++;
2051         e->state = SD_EVENT_EXITING;
2052
2053         r = source_dispatch(p);
2054
2055         e->state = SD_EVENT_PASSIVE;
2056         sd_event_unref(e);
2057
2058         return r;
2059 }
2060
2061 static sd_event_source* event_next_pending(sd_event *e) {
2062         sd_event_source *p;
2063
2064         assert(e);
2065
2066         p = prioq_peek(e->pending);
2067         if (!p)
2068                 return NULL;
2069
2070         if (p->enabled == SD_EVENT_OFF)
2071                 return NULL;
2072
2073         return p;
2074 }
2075
2076 static int arm_watchdog(sd_event *e) {
2077         struct itimerspec its = {};
2078         usec_t t;
2079         int r;
2080
2081         assert(e);
2082         assert(e->watchdog_fd >= 0);
2083
2084         t = sleep_between(e,
2085                           e->watchdog_last + (e->watchdog_period / 2),
2086                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2087
2088         timespec_store(&its.it_value, t);
2089
2090         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2091         if (r < 0)
2092                 return -errno;
2093
2094         return 0;
2095 }
2096
2097 static int process_watchdog(sd_event *e) {
2098         assert(e);
2099
2100         if (!e->watchdog)
2101                 return 0;
2102
2103         /* Don't notify watchdog too often */
2104         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2105                 return 0;
2106
2107         sd_notify(false, "WATCHDOG=1");
2108         e->watchdog_last = e->timestamp.monotonic;
2109
2110         return arm_watchdog(e);
2111 }
2112
2113 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2114         struct epoll_event *ev_queue;
2115         unsigned ev_queue_max;
2116         sd_event_source *p;
2117         int r, i, m;
2118
2119         assert_return(e, -EINVAL);
2120         assert_return(!event_pid_changed(e), -ECHILD);
2121         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2122         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2123
2124         if (e->exit_requested)
2125                 return dispatch_exit(e);
2126
2127         sd_event_ref(e);
2128         e->iteration++;
2129         e->state = SD_EVENT_RUNNING;
2130
2131         r = event_prepare(e);
2132         if (r < 0)
2133                 goto finish;
2134
2135         r = event_arm_timer(e, &e->realtime);
2136         if (r < 0)
2137                 goto finish;
2138
2139         r = event_arm_timer(e, &e->monotonic);
2140         if (r < 0)
2141                 goto finish;
2142
2143         r = event_arm_timer(e, &e->realtime_alarm);
2144         if (r < 0)
2145                 goto finish;
2146
2147         r = event_arm_timer(e, &e->boottime_alarm);
2148         if (r < 0)
2149                 goto finish;
2150
2151         if (event_next_pending(e) || e->need_process_child)
2152                 timeout = 0;
2153
2154         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2155         ev_queue = newa(struct epoll_event, ev_queue_max);
2156
2157         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2158                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2159         if (m < 0) {
2160                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2161                 goto finish;
2162         }
2163
2164         dual_timestamp_get(&e->timestamp);
2165         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2166
2167         for (i = 0; i < m; i++) {
2168
2169                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2170                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2171                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2172                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2173                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2174                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2175                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2176                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2177                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2178                         r = process_signal(e, ev_queue[i].events);
2179                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2180                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2181                 else
2182                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2183
2184                 if (r < 0)
2185                         goto finish;
2186         }
2187
2188         r = process_watchdog(e);
2189         if (r < 0)
2190                 goto finish;
2191
2192         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2193         if (r < 0)
2194                 goto finish;
2195
2196         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2197         if (r < 0)
2198                 goto finish;
2199
2200         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2201         if (r < 0)
2202                 goto finish;
2203
2204         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2205         if (r < 0)
2206                 goto finish;
2207
2208         if (e->need_process_child) {
2209                 r = process_child(e);
2210                 if (r < 0)
2211                         goto finish;
2212         }
2213
2214         p = event_next_pending(e);
2215         if (!p) {
2216                 r = 1;
2217                 goto finish;
2218         }
2219
2220         r = source_dispatch(p);
2221
2222 finish:
2223         e->state = SD_EVENT_PASSIVE;
2224         sd_event_unref(e);
2225
2226         return r;
2227 }
2228
2229 _public_ int sd_event_loop(sd_event *e) {
2230         int r;
2231
2232         assert_return(e, -EINVAL);
2233         assert_return(!event_pid_changed(e), -ECHILD);
2234         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2235
2236         sd_event_ref(e);
2237
2238         while (e->state != SD_EVENT_FINISHED) {
2239                 r = sd_event_run(e, (uint64_t) -1);
2240                 if (r < 0)
2241                         goto finish;
2242         }
2243
2244         r = e->exit_code;
2245
2246 finish:
2247         sd_event_unref(e);
2248         return r;
2249 }
2250
2251 _public_ int sd_event_get_state(sd_event *e) {
2252         assert_return(e, -EINVAL);
2253         assert_return(!event_pid_changed(e), -ECHILD);
2254
2255         return e->state;
2256 }
2257
2258 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2259         assert_return(e, -EINVAL);
2260         assert_return(code, -EINVAL);
2261         assert_return(!event_pid_changed(e), -ECHILD);
2262
2263         if (!e->exit_requested)
2264                 return -ENODATA;
2265
2266         *code = e->exit_code;
2267         return 0;
2268 }
2269
2270 _public_ int sd_event_exit(sd_event *e, int code) {
2271         assert_return(e, -EINVAL);
2272         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2273         assert_return(!event_pid_changed(e), -ECHILD);
2274
2275         e->exit_requested = true;
2276         e->exit_code = code;
2277
2278         return 0;
2279 }
2280
2281 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2282         assert_return(e, -EINVAL);
2283         assert_return(usec, -EINVAL);
2284         assert_return(!event_pid_changed(e), -ECHILD);
2285
2286         /* If we haven't run yet, just get the actual time */
2287         if (!dual_timestamp_is_set(&e->timestamp))
2288                 return -ENODATA;
2289
2290         switch (clock) {
2291
2292         case CLOCK_REALTIME:
2293         case CLOCK_REALTIME_ALARM:
2294                 *usec = e->timestamp.realtime;
2295                 break;
2296
2297         case CLOCK_MONOTONIC:
2298                 *usec = e->timestamp.monotonic;
2299                 break;
2300
2301         case CLOCK_BOOTTIME_ALARM:
2302                 *usec = e->timestamp_boottime;
2303                 break;
2304         }
2305
2306         return 0;
2307 }
2308
2309 _public_ int sd_event_default(sd_event **ret) {
2310
2311         static thread_local sd_event *default_event = NULL;
2312         sd_event *e = NULL;
2313         int r;
2314
2315         if (!ret)
2316                 return !!default_event;
2317
2318         if (default_event) {
2319                 *ret = sd_event_ref(default_event);
2320                 return 0;
2321         }
2322
2323         r = sd_event_new(&e);
2324         if (r < 0)
2325                 return r;
2326
2327         e->default_event_ptr = &default_event;
2328         e->tid = gettid();
2329         default_event = e;
2330
2331         *ret = e;
2332         return 1;
2333 }
2334
2335 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2336         assert_return(e, -EINVAL);
2337         assert_return(tid, -EINVAL);
2338         assert_return(!event_pid_changed(e), -ECHILD);
2339
2340         if (e->tid != 0) {
2341                 *tid = e->tid;
2342                 return 0;
2343         }
2344
2345         return -ENXIO;
2346 }
2347
2348 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2349         int r;
2350
2351         assert_return(e, -EINVAL);
2352         assert_return(!event_pid_changed(e), -ECHILD);
2353
2354         if (e->watchdog == !!b)
2355                 return e->watchdog;
2356
2357         if (b) {
2358                 struct epoll_event ev = {};
2359
2360                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2361                 if (r <= 0)
2362                         return r;
2363
2364                 /* Issue first ping immediately */
2365                 sd_notify(false, "WATCHDOG=1");
2366                 e->watchdog_last = now(CLOCK_MONOTONIC);
2367
2368                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2369                 if (e->watchdog_fd < 0)
2370                         return -errno;
2371
2372                 r = arm_watchdog(e);
2373                 if (r < 0)
2374                         goto fail;
2375
2376                 ev.events = EPOLLIN;
2377                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2378
2379                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2380                 if (r < 0) {
2381                         r = -errno;
2382                         goto fail;
2383                 }
2384
2385         } else {
2386                 if (e->watchdog_fd >= 0) {
2387                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2388                         e->watchdog_fd = safe_close(e->watchdog_fd);
2389                 }
2390         }
2391
2392         e->watchdog = !!b;
2393         return e->watchdog;
2394
2395 fail:
2396         e->watchdog_fd = safe_close(e->watchdog_fd);
2397         return r;
2398 }
2399
2400 _public_ int sd_event_get_watchdog(sd_event *e) {
2401         assert_return(e, -EINVAL);
2402         assert_return(!event_pid_changed(e), -ECHILD);
2403
2404         return e->watchdog;
2405 }