chiark / gitweb /
d6549148bbfe6e090cc4b2d8a7d94ca9bdfa12d3
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         EventSourceType type:5;
70         int enabled:3;
71         bool pending:1;
72         bool dispatching:1;
73         bool floating:1;
74
75         int64_t priority;
76         unsigned pending_index;
77         unsigned prepare_index;
78         unsigned pending_iteration;
79         unsigned prepare_iteration;
80
81         LIST_FIELDS(sd_event_source, sources);
82
83         union {
84                 struct {
85                         sd_event_io_handler_t callback;
86                         int fd;
87                         uint32_t events;
88                         uint32_t revents;
89                         bool registered:1;
90                 } io;
91                 struct {
92                         sd_event_time_handler_t callback;
93                         usec_t next, accuracy;
94                         unsigned earliest_index;
95                         unsigned latest_index;
96                 } time;
97                 struct {
98                         sd_event_signal_handler_t callback;
99                         struct signalfd_siginfo siginfo;
100                         int sig;
101                 } signal;
102                 struct {
103                         sd_event_child_handler_t callback;
104                         siginfo_t siginfo;
105                         pid_t pid;
106                         int options;
107                 } child;
108                 struct {
109                         sd_event_handler_t callback;
110                 } defer;
111                 struct {
112                         sd_event_handler_t callback;
113                 } post;
114                 struct {
115                         sd_event_handler_t callback;
116                         unsigned prioq_index;
117                 } exit;
118         };
119 };
120
121 struct clock_data {
122         int fd;
123
124         /* For all clocks we maintain two priority queues each, one
125          * ordered for the earliest times the events may be
126          * dispatched, and one ordered by the latest times they must
127          * have been dispatched. The range between the top entries in
128          * the two prioqs is the time window we can freely schedule
129          * wakeups in */
130
131         Prioq *earliest;
132         Prioq *latest;
133         usec_t next;
134
135         bool needs_rearm:1;
136 };
137
138 struct sd_event {
139         unsigned n_ref;
140
141         int epoll_fd;
142         int signal_fd;
143         int watchdog_fd;
144
145         Prioq *pending;
146         Prioq *prepare;
147
148         /* timerfd_create() only supports these five clocks so far. We
149          * can add support for more clocks when the kernel learns to
150          * deal with them, too. */
151         struct clock_data realtime;
152         struct clock_data boottime;
153         struct clock_data monotonic;
154         struct clock_data realtime_alarm;
155         struct clock_data boottime_alarm;
156
157         usec_t perturb;
158
159         sigset_t sigset;
160         sd_event_source **signal_sources;
161
162         Hashmap *child_sources;
163         unsigned n_enabled_child_sources;
164
165         Set *post_sources;
166
167         Prioq *exit;
168
169         pid_t original_pid;
170
171         unsigned iteration;
172         dual_timestamp timestamp;
173         usec_t timestamp_boottime;
174         int state;
175
176         bool exit_requested:1;
177         bool need_process_child:1;
178         bool watchdog:1;
179
180         int exit_code;
181
182         pid_t tid;
183         sd_event **default_event_ptr;
184
185         usec_t watchdog_last, watchdog_period;
186
187         unsigned n_sources;
188
189         LIST_HEAD(sd_event_source, sources);
190 };
191
192 static void source_disconnect(sd_event_source *s);
193
194 static int pending_prioq_compare(const void *a, const void *b) {
195         const sd_event_source *x = a, *y = b;
196
197         assert(x->pending);
198         assert(y->pending);
199
200         /* Enabled ones first */
201         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
202                 return -1;
203         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
204                 return 1;
205
206         /* Lower priority values first */
207         if (x->priority < y->priority)
208                 return -1;
209         if (x->priority > y->priority)
210                 return 1;
211
212         /* Older entries first */
213         if (x->pending_iteration < y->pending_iteration)
214                 return -1;
215         if (x->pending_iteration > y->pending_iteration)
216                 return 1;
217
218         /* Stability for the rest */
219         if (x < y)
220                 return -1;
221         if (x > y)
222                 return 1;
223
224         return 0;
225 }
226
227 static int prepare_prioq_compare(const void *a, const void *b) {
228         const sd_event_source *x = a, *y = b;
229
230         assert(x->prepare);
231         assert(y->prepare);
232
233         /* Move most recently prepared ones last, so that we can stop
234          * preparing as soon as we hit one that has already been
235          * prepared in the current iteration */
236         if (x->prepare_iteration < y->prepare_iteration)
237                 return -1;
238         if (x->prepare_iteration > y->prepare_iteration)
239                 return 1;
240
241         /* Enabled ones first */
242         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
243                 return -1;
244         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
245                 return 1;
246
247         /* Lower priority values first */
248         if (x->priority < y->priority)
249                 return -1;
250         if (x->priority > y->priority)
251                 return 1;
252
253         /* Stability for the rest */
254         if (x < y)
255                 return -1;
256         if (x > y)
257                 return 1;
258
259         return 0;
260 }
261
262 static int earliest_time_prioq_compare(const void *a, const void *b) {
263         const sd_event_source *x = a, *y = b;
264
265         assert(EVENT_SOURCE_IS_TIME(x->type));
266         assert(x->type == y->type);
267
268         /* Enabled ones first */
269         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
270                 return -1;
271         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
272                 return 1;
273
274         /* Move the pending ones to the end */
275         if (!x->pending && y->pending)
276                 return -1;
277         if (x->pending && !y->pending)
278                 return 1;
279
280         /* Order by time */
281         if (x->time.next < y->time.next)
282                 return -1;
283         if (x->time.next > y->time.next)
284                 return 1;
285
286         /* Stability for the rest */
287         if (x < y)
288                 return -1;
289         if (x > y)
290                 return 1;
291
292         return 0;
293 }
294
295 static int latest_time_prioq_compare(const void *a, const void *b) {
296         const sd_event_source *x = a, *y = b;
297
298         assert(EVENT_SOURCE_IS_TIME(x->type));
299         assert(x->type == y->type);
300
301         /* Enabled ones first */
302         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
303                 return -1;
304         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
305                 return 1;
306
307         /* Move the pending ones to the end */
308         if (!x->pending && y->pending)
309                 return -1;
310         if (x->pending && !y->pending)
311                 return 1;
312
313         /* Order by time */
314         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
315                 return -1;
316         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
317                 return 1;
318
319         /* Stability for the rest */
320         if (x < y)
321                 return -1;
322         if (x > y)
323                 return 1;
324
325         return 0;
326 }
327
328 static int exit_prioq_compare(const void *a, const void *b) {
329         const sd_event_source *x = a, *y = b;
330
331         assert(x->type == SOURCE_EXIT);
332         assert(y->type == SOURCE_EXIT);
333
334         /* Enabled ones first */
335         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
336                 return -1;
337         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338                 return 1;
339
340         /* Lower priority values first */
341         if (x->priority < y->priority)
342                 return -1;
343         if (x->priority > y->priority)
344                 return 1;
345
346         /* Stability for the rest */
347         if (x < y)
348                 return -1;
349         if (x > y)
350                 return 1;
351
352         return 0;
353 }
354
355 static void free_clock_data(struct clock_data *d) {
356         assert(d);
357
358         safe_close(d->fd);
359         prioq_free(d->earliest);
360         prioq_free(d->latest);
361 }
362
363 static void event_free(sd_event *e) {
364         sd_event_source *s;
365
366         assert(e);
367
368         while ((s = e->sources)) {
369                 assert(s->floating);
370                 source_disconnect(s);
371                 sd_event_source_unref(s);
372         }
373
374         assert(e->n_sources == 0);
375
376         if (e->default_event_ptr)
377                 *(e->default_event_ptr) = NULL;
378
379         safe_close(e->epoll_fd);
380         safe_close(e->signal_fd);
381         safe_close(e->watchdog_fd);
382
383         free_clock_data(&e->realtime);
384         free_clock_data(&e->boottime);
385         free_clock_data(&e->monotonic);
386         free_clock_data(&e->realtime_alarm);
387         free_clock_data(&e->boottime_alarm);
388
389         prioq_free(e->pending);
390         prioq_free(e->prepare);
391         prioq_free(e->exit);
392
393         free(e->signal_sources);
394
395         hashmap_free(e->child_sources);
396         set_free(e->post_sources);
397         free(e);
398 }
399
400 _public_ int sd_event_new(sd_event** ret) {
401         sd_event *e;
402         int r;
403
404         assert_return(ret, -EINVAL);
405
406         e = new0(sd_event, 1);
407         if (!e)
408                 return -ENOMEM;
409
410         e->n_ref = 1;
411         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
412         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
413         e->original_pid = getpid();
414         e->perturb = USEC_INFINITY;
415
416         assert_se(sigemptyset(&e->sigset) == 0);
417
418         e->pending = prioq_new(pending_prioq_compare);
419         if (!e->pending) {
420                 r = -ENOMEM;
421                 goto fail;
422         }
423
424         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
425         if (e->epoll_fd < 0) {
426                 r = -errno;
427                 goto fail;
428         }
429
430         *ret = e;
431         return 0;
432
433 fail:
434         event_free(e);
435         return r;
436 }
437
438 _public_ sd_event* sd_event_ref(sd_event *e) {
439         assert_return(e, NULL);
440
441         assert(e->n_ref >= 1);
442         e->n_ref++;
443
444         return e;
445 }
446
447 _public_ sd_event* sd_event_unref(sd_event *e) {
448
449         if (!e)
450                 return NULL;
451
452         assert(e->n_ref >= 1);
453         e->n_ref--;
454
455         if (e->n_ref <= 0)
456                 event_free(e);
457
458         return NULL;
459 }
460
461 static bool event_pid_changed(sd_event *e) {
462         assert(e);
463
464         /* We don't support people creating am event loop and keeping
465          * it around over a fork(). Let's complain. */
466
467         return e->original_pid != getpid();
468 }
469
470 static int source_io_unregister(sd_event_source *s) {
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475
476         if (!s->io.registered)
477                 return 0;
478
479         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
480         if (r < 0)
481                 return -errno;
482
483         s->io.registered = false;
484         return 0;
485 }
486
487 static int source_io_register(
488                 sd_event_source *s,
489                 int enabled,
490                 uint32_t events) {
491
492         struct epoll_event ev = {};
493         int r;
494
495         assert(s);
496         assert(s->type == SOURCE_IO);
497         assert(enabled != SD_EVENT_OFF);
498
499         ev.events = events;
500         ev.data.ptr = s;
501
502         if (enabled == SD_EVENT_ONESHOT)
503                 ev.events |= EPOLLONESHOT;
504
505         if (s->io.registered)
506                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
507         else
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
509
510         if (r < 0)
511                 return -errno;
512
513         s->io.registered = true;
514
515         return 0;
516 }
517
518 static clockid_t event_source_type_to_clock(EventSourceType t) {
519
520         switch (t) {
521
522         case SOURCE_TIME_REALTIME:
523                 return CLOCK_REALTIME;
524
525         case SOURCE_TIME_BOOTTIME:
526                 return CLOCK_BOOTTIME;
527
528         case SOURCE_TIME_MONOTONIC:
529                 return CLOCK_MONOTONIC;
530
531         case SOURCE_TIME_REALTIME_ALARM:
532                 return CLOCK_REALTIME_ALARM;
533
534         case SOURCE_TIME_BOOTTIME_ALARM:
535                 return CLOCK_BOOTTIME_ALARM;
536
537         default:
538                 return (clockid_t) -1;
539         }
540 }
541
542 static EventSourceType clock_to_event_source_type(clockid_t clock) {
543
544         switch (clock) {
545
546         case CLOCK_REALTIME:
547                 return SOURCE_TIME_REALTIME;
548
549         case CLOCK_BOOTTIME:
550                 return SOURCE_TIME_BOOTTIME;
551
552         case CLOCK_MONOTONIC:
553                 return SOURCE_TIME_MONOTONIC;
554
555         case CLOCK_REALTIME_ALARM:
556                 return SOURCE_TIME_REALTIME_ALARM;
557
558         case CLOCK_BOOTTIME_ALARM:
559                 return SOURCE_TIME_BOOTTIME_ALARM;
560
561         default:
562                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
563         }
564 }
565
566 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
567         assert(e);
568
569         switch (t) {
570
571         case SOURCE_TIME_REALTIME:
572                 return &e->realtime;
573
574         case SOURCE_TIME_BOOTTIME:
575                 return &e->boottime;
576
577         case SOURCE_TIME_MONOTONIC:
578                 return &e->monotonic;
579
580         case SOURCE_TIME_REALTIME_ALARM:
581                 return &e->realtime_alarm;
582
583         case SOURCE_TIME_BOOTTIME_ALARM:
584                 return &e->boottime_alarm;
585
586         default:
587                 return NULL;
588         }
589 }
590
591 static void source_disconnect(sd_event_source *s) {
592         sd_event *event;
593
594         assert(s);
595
596         if (!s->event)
597                 return;
598
599         assert(s->event->n_sources > 0);
600
601         switch (s->type) {
602
603         case SOURCE_IO:
604                 if (s->io.fd >= 0)
605                         source_io_unregister(s);
606
607                 break;
608
609         case SOURCE_TIME_REALTIME:
610         case SOURCE_TIME_BOOTTIME:
611         case SOURCE_TIME_MONOTONIC:
612         case SOURCE_TIME_REALTIME_ALARM:
613         case SOURCE_TIME_BOOTTIME_ALARM: {
614                 struct clock_data *d;
615
616                 d = event_get_clock_data(s->event, s->type);
617                 assert(d);
618
619                 prioq_remove(d->earliest, s, &s->time.earliest_index);
620                 prioq_remove(d->latest, s, &s->time.latest_index);
621                 d->needs_rearm = true;
622                 break;
623         }
624
625         case SOURCE_SIGNAL:
626                 if (s->signal.sig > 0) {
627                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
628                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
629
630                         if (s->event->signal_sources)
631                                 s->event->signal_sources[s->signal.sig] = NULL;
632                 }
633
634                 break;
635
636         case SOURCE_CHILD:
637                 if (s->child.pid > 0) {
638                         if (s->enabled != SD_EVENT_OFF) {
639                                 assert(s->event->n_enabled_child_sources > 0);
640                                 s->event->n_enabled_child_sources--;
641                         }
642
643                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
644                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
645
646                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
647                 }
648
649                 break;
650
651         case SOURCE_DEFER:
652                 /* nothing */
653                 break;
654
655         case SOURCE_POST:
656                 set_remove(s->event->post_sources, s);
657                 break;
658
659         case SOURCE_EXIT:
660                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
661                 break;
662
663         default:
664                 assert_not_reached("Wut? I shouldn't exist.");
665         }
666
667         if (s->pending)
668                 prioq_remove(s->event->pending, s, &s->pending_index);
669
670         if (s->prepare)
671                 prioq_remove(s->event->prepare, s, &s->prepare_index);
672
673         event = s->event;
674
675         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
676         s->event = NULL;
677         LIST_REMOVE(sources, event->sources, s);
678         event->n_sources--;
679
680         if (!s->floating)
681                 sd_event_unref(event);
682 }
683
684 static void source_free(sd_event_source *s) {
685         assert(s);
686
687         source_disconnect(s);
688         free(s);
689 }
690
691 static int source_set_pending(sd_event_source *s, bool b) {
692         int r;
693
694         assert(s);
695         assert(s->type != SOURCE_EXIT);
696
697         if (s->pending == b)
698                 return 0;
699
700         s->pending = b;
701
702         if (b) {
703                 s->pending_iteration = s->event->iteration;
704
705                 r = prioq_put(s->event->pending, s, &s->pending_index);
706                 if (r < 0) {
707                         s->pending = false;
708                         return r;
709                 }
710         } else
711                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
712
713         if (EVENT_SOURCE_IS_TIME(s->type)) {
714                 struct clock_data *d;
715
716                 d = event_get_clock_data(s->event, s->type);
717                 assert(d);
718
719                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
720                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
721                 d->needs_rearm = true;
722         }
723
724         return 0;
725 }
726
727 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
728         sd_event_source *s;
729
730         assert(e);
731
732         s = new0(sd_event_source, 1);
733         if (!s)
734                 return NULL;
735
736         s->n_ref = 1;
737         s->event = e;
738         s->floating = floating;
739         s->type = type;
740         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
741
742         if (!floating)
743                 sd_event_ref(e);
744
745         LIST_PREPEND(sources, e->sources, s);
746         e->n_sources ++;
747
748         return s;
749 }
750
751 _public_ int sd_event_add_io(
752                 sd_event *e,
753                 sd_event_source **ret,
754                 int fd,
755                 uint32_t events,
756                 sd_event_io_handler_t callback,
757                 void *userdata) {
758
759         sd_event_source *s;
760         int r;
761
762         assert_return(e, -EINVAL);
763         assert_return(fd >= 0, -EINVAL);
764         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
765         assert_return(callback, -EINVAL);
766         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
767         assert_return(!event_pid_changed(e), -ECHILD);
768
769         s = source_new(e, !ret, SOURCE_IO);
770         if (!s)
771                 return -ENOMEM;
772
773         s->io.fd = fd;
774         s->io.events = events;
775         s->io.callback = callback;
776         s->userdata = userdata;
777         s->enabled = SD_EVENT_ON;
778
779         r = source_io_register(s, s->enabled, events);
780         if (r < 0) {
781                 source_free(s);
782                 return -errno;
783         }
784
785         if (ret)
786                 *ret = s;
787
788         return 0;
789 }
790
791 static void initialize_perturb(sd_event *e) {
792         sd_id128_t bootid = {};
793
794         /* When we sleep for longer, we try to realign the wakeup to
795            the same time wihtin each minute/second/250ms, so that
796            events all across the system can be coalesced into a single
797            CPU wakeup. However, let's take some system-specific
798            randomness for this value, so that in a network of systems
799            with synced clocks timer events are distributed a
800            bit. Here, we calculate a perturbation usec offset from the
801            boot ID. */
802
803         if (_likely_(e->perturb != USEC_INFINITY))
804                 return;
805
806         if (sd_id128_get_boot(&bootid) >= 0)
807                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
808 }
809
810 static int event_setup_timer_fd(
811                 sd_event *e,
812                 struct clock_data *d,
813                 clockid_t clock) {
814
815         struct epoll_event ev = {};
816         int r, fd;
817
818         assert(e);
819         assert(d);
820
821         if (_likely_(d->fd >= 0))
822                 return 0;
823
824         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
825         if (fd < 0)
826                 return -errno;
827
828         ev.events = EPOLLIN;
829         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
830
831         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
832         if (r < 0) {
833                 safe_close(fd);
834                 return -errno;
835         }
836
837         d->fd = fd;
838         return 0;
839 }
840
841 _public_ int sd_event_add_time(
842                 sd_event *e,
843                 sd_event_source **ret,
844                 clockid_t clock,
845                 uint64_t usec,
846                 uint64_t accuracy,
847                 sd_event_time_handler_t callback,
848                 void *userdata) {
849
850         EventSourceType type;
851         sd_event_source *s;
852         struct clock_data *d;
853         int r;
854
855         assert_return(e, -EINVAL);
856         assert_return(usec != (uint64_t) -1, -EINVAL);
857         assert_return(accuracy != (uint64_t) -1, -EINVAL);
858         assert_return(callback, -EINVAL);
859         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
860         assert_return(!event_pid_changed(e), -ECHILD);
861
862         type = clock_to_event_source_type(clock);
863         assert_return(type >= 0, -ENOTSUP);
864
865         d = event_get_clock_data(e, type);
866         assert(d);
867
868         if (!d->earliest) {
869                 d->earliest = prioq_new(earliest_time_prioq_compare);
870                 if (!d->earliest)
871                         return -ENOMEM;
872         }
873
874         if (!d->latest) {
875                 d->latest = prioq_new(latest_time_prioq_compare);
876                 if (!d->latest)
877                         return -ENOMEM;
878         }
879
880         if (d->fd < 0) {
881                 r = event_setup_timer_fd(e, d, clock);
882                 if (r < 0)
883                         return r;
884         }
885
886         s = source_new(e, !ret, type);
887         if (!s)
888                 return -ENOMEM;
889
890         s->time.next = usec;
891         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
892         s->time.callback = callback;
893         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
894         s->userdata = userdata;
895         s->enabled = SD_EVENT_ONESHOT;
896
897         r = prioq_put(d->earliest, s, &s->time.earliest_index);
898         if (r < 0)
899                 goto fail;
900
901         r = prioq_put(d->latest, s, &s->time.latest_index);
902         if (r < 0)
903                 goto fail;
904
905         d->needs_rearm = true;
906
907         if (ret)
908                 *ret = s;
909
910         return 0;
911
912 fail:
913         source_free(s);
914         return r;
915 }
916
917 static int event_update_signal_fd(sd_event *e) {
918         struct epoll_event ev = {};
919         bool add_to_epoll;
920         int r;
921
922         assert(e);
923
924         add_to_epoll = e->signal_fd < 0;
925
926         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
927         if (r < 0)
928                 return -errno;
929
930         e->signal_fd = r;
931
932         if (!add_to_epoll)
933                 return 0;
934
935         ev.events = EPOLLIN;
936         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
937
938         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
939         if (r < 0) {
940                 e->signal_fd = safe_close(e->signal_fd);
941                 return -errno;
942         }
943
944         return 0;
945 }
946
947 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
948         assert(s);
949
950         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
951 }
952
953 _public_ int sd_event_add_signal(
954                 sd_event *e,
955                 sd_event_source **ret,
956                 int sig,
957                 sd_event_signal_handler_t callback,
958                 void *userdata) {
959
960         sd_event_source *s;
961         sigset_t ss;
962         int r;
963
964         assert_return(e, -EINVAL);
965         assert_return(sig > 0, -EINVAL);
966         assert_return(sig < _NSIG, -EINVAL);
967         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
968         assert_return(!event_pid_changed(e), -ECHILD);
969
970         if (!callback)
971                 callback = signal_exit_callback;
972
973         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
974         if (r < 0)
975                 return -errno;
976
977         if (!sigismember(&ss, sig))
978                 return -EBUSY;
979
980         if (!e->signal_sources) {
981                 e->signal_sources = new0(sd_event_source*, _NSIG);
982                 if (!e->signal_sources)
983                         return -ENOMEM;
984         } else if (e->signal_sources[sig])
985                 return -EBUSY;
986
987         s = source_new(e, !ret, SOURCE_SIGNAL);
988         if (!s)
989                 return -ENOMEM;
990
991         s->signal.sig = sig;
992         s->signal.callback = callback;
993         s->userdata = userdata;
994         s->enabled = SD_EVENT_ON;
995
996         e->signal_sources[sig] = s;
997         assert_se(sigaddset(&e->sigset, sig) == 0);
998
999         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
1000                 r = event_update_signal_fd(e);
1001                 if (r < 0) {
1002                         source_free(s);
1003                         return r;
1004                 }
1005         }
1006
1007         if (ret)
1008                 *ret = s;
1009
1010         return 0;
1011 }
1012
1013 _public_ int sd_event_add_child(
1014                 sd_event *e,
1015                 sd_event_source **ret,
1016                 pid_t pid,
1017                 int options,
1018                 sd_event_child_handler_t callback,
1019                 void *userdata) {
1020
1021         sd_event_source *s;
1022         int r;
1023
1024         assert_return(e, -EINVAL);
1025         assert_return(pid > 1, -EINVAL);
1026         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1027         assert_return(options != 0, -EINVAL);
1028         assert_return(callback, -EINVAL);
1029         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1030         assert_return(!event_pid_changed(e), -ECHILD);
1031
1032         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
1033         if (r < 0)
1034                 return r;
1035
1036         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1037                 return -EBUSY;
1038
1039         s = source_new(e, !ret, SOURCE_CHILD);
1040         if (!s)
1041                 return -ENOMEM;
1042
1043         s->child.pid = pid;
1044         s->child.options = options;
1045         s->child.callback = callback;
1046         s->userdata = userdata;
1047         s->enabled = SD_EVENT_ONESHOT;
1048
1049         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1050         if (r < 0) {
1051                 source_free(s);
1052                 return r;
1053         }
1054
1055         e->n_enabled_child_sources ++;
1056
1057         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1058
1059         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1060                 r = event_update_signal_fd(e);
1061                 if (r < 0) {
1062                         source_free(s);
1063                         return -errno;
1064                 }
1065         }
1066
1067         e->need_process_child = true;
1068
1069         if (ret)
1070                 *ret = s;
1071
1072         return 0;
1073 }
1074
1075 _public_ int sd_event_add_defer(
1076                 sd_event *e,
1077                 sd_event_source **ret,
1078                 sd_event_handler_t callback,
1079                 void *userdata) {
1080
1081         sd_event_source *s;
1082         int r;
1083
1084         assert_return(e, -EINVAL);
1085         assert_return(callback, -EINVAL);
1086         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1087         assert_return(!event_pid_changed(e), -ECHILD);
1088
1089         s = source_new(e, !ret, SOURCE_DEFER);
1090         if (!s)
1091                 return -ENOMEM;
1092
1093         s->defer.callback = callback;
1094         s->userdata = userdata;
1095         s->enabled = SD_EVENT_ONESHOT;
1096
1097         r = source_set_pending(s, true);
1098         if (r < 0) {
1099                 source_free(s);
1100                 return r;
1101         }
1102
1103         if (ret)
1104                 *ret = s;
1105
1106         return 0;
1107 }
1108
1109 _public_ int sd_event_add_post(
1110                 sd_event *e,
1111                 sd_event_source **ret,
1112                 sd_event_handler_t callback,
1113                 void *userdata) {
1114
1115         sd_event_source *s;
1116         int r;
1117
1118         assert_return(e, -EINVAL);
1119         assert_return(callback, -EINVAL);
1120         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1121         assert_return(!event_pid_changed(e), -ECHILD);
1122
1123         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1124         if (r < 0)
1125                 return r;
1126
1127         s = source_new(e, !ret, SOURCE_POST);
1128         if (!s)
1129                 return -ENOMEM;
1130
1131         s->post.callback = callback;
1132         s->userdata = userdata;
1133         s->enabled = SD_EVENT_ON;
1134
1135         r = set_put(e->post_sources, s);
1136         if (r < 0) {
1137                 source_free(s);
1138                 return r;
1139         }
1140
1141         if (ret)
1142                 *ret = s;
1143
1144         return 0;
1145 }
1146
1147 _public_ int sd_event_add_exit(
1148                 sd_event *e,
1149                 sd_event_source **ret,
1150                 sd_event_handler_t callback,
1151                 void *userdata) {
1152
1153         sd_event_source *s;
1154         int r;
1155
1156         assert_return(e, -EINVAL);
1157         assert_return(callback, -EINVAL);
1158         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1159         assert_return(!event_pid_changed(e), -ECHILD);
1160
1161         if (!e->exit) {
1162                 e->exit = prioq_new(exit_prioq_compare);
1163                 if (!e->exit)
1164                         return -ENOMEM;
1165         }
1166
1167         s = source_new(e, !ret, SOURCE_EXIT);
1168         if (!s)
1169                 return -ENOMEM;
1170
1171         s->exit.callback = callback;
1172         s->userdata = userdata;
1173         s->exit.prioq_index = PRIOQ_IDX_NULL;
1174         s->enabled = SD_EVENT_ONESHOT;
1175
1176         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1177         if (r < 0) {
1178                 source_free(s);
1179                 return r;
1180         }
1181
1182         if (ret)
1183                 *ret = s;
1184
1185         return 0;
1186 }
1187
1188 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1189         assert_return(s, NULL);
1190
1191         assert(s->n_ref >= 1);
1192         s->n_ref++;
1193
1194         return s;
1195 }
1196
1197 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1198
1199         if (!s)
1200                 return NULL;
1201
1202         assert(s->n_ref >= 1);
1203         s->n_ref--;
1204
1205         if (s->n_ref <= 0) {
1206                 /* Here's a special hack: when we are called from a
1207                  * dispatch handler we won't free the event source
1208                  * immediately, but we will detach the fd from the
1209                  * epoll. This way it is safe for the caller to unref
1210                  * the event source and immediately close the fd, but
1211                  * we still retain a valid event source object after
1212                  * the callback. */
1213
1214                 if (s->dispatching) {
1215                         if (s->type == SOURCE_IO)
1216                                 source_io_unregister(s);
1217
1218                         source_disconnect(s);
1219                 } else
1220                         source_free(s);
1221         }
1222
1223         return NULL;
1224 }
1225
1226 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1227         assert_return(s, NULL);
1228
1229         return s->event;
1230 }
1231
1232 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1233         assert_return(s, -EINVAL);
1234         assert_return(s->type != SOURCE_EXIT, -EDOM);
1235         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1236         assert_return(!event_pid_changed(s->event), -ECHILD);
1237
1238         return s->pending;
1239 }
1240
1241 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1242         assert_return(s, -EINVAL);
1243         assert_return(s->type == SOURCE_IO, -EDOM);
1244         assert_return(!event_pid_changed(s->event), -ECHILD);
1245
1246         return s->io.fd;
1247 }
1248
1249 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1250         int r;
1251
1252         assert_return(s, -EINVAL);
1253         assert_return(fd >= 0, -EINVAL);
1254         assert_return(s->type == SOURCE_IO, -EDOM);
1255         assert_return(!event_pid_changed(s->event), -ECHILD);
1256
1257         if (s->io.fd == fd)
1258                 return 0;
1259
1260         if (s->enabled == SD_EVENT_OFF) {
1261                 s->io.fd = fd;
1262                 s->io.registered = false;
1263         } else {
1264                 int saved_fd;
1265
1266                 saved_fd = s->io.fd;
1267                 assert(s->io.registered);
1268
1269                 s->io.fd = fd;
1270                 s->io.registered = false;
1271
1272                 r = source_io_register(s, s->enabled, s->io.events);
1273                 if (r < 0) {
1274                         s->io.fd = saved_fd;
1275                         s->io.registered = true;
1276                         return r;
1277                 }
1278
1279                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1280         }
1281
1282         return 0;
1283 }
1284
1285 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1286         assert_return(s, -EINVAL);
1287         assert_return(events, -EINVAL);
1288         assert_return(s->type == SOURCE_IO, -EDOM);
1289         assert_return(!event_pid_changed(s->event), -ECHILD);
1290
1291         *events = s->io.events;
1292         return 0;
1293 }
1294
1295 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1296         int r;
1297
1298         assert_return(s, -EINVAL);
1299         assert_return(s->type == SOURCE_IO, -EDOM);
1300         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1301         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1302         assert_return(!event_pid_changed(s->event), -ECHILD);
1303
1304         /* edge-triggered updates are never skipped, so we can reset edges */
1305         if (s->io.events == events && !(events & EPOLLET))
1306                 return 0;
1307
1308         if (s->enabled != SD_EVENT_OFF) {
1309                 r = source_io_register(s, s->enabled, events);
1310                 if (r < 0)
1311                         return r;
1312         }
1313
1314         s->io.events = events;
1315         source_set_pending(s, false);
1316
1317         return 0;
1318 }
1319
1320 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1321         assert_return(s, -EINVAL);
1322         assert_return(revents, -EINVAL);
1323         assert_return(s->type == SOURCE_IO, -EDOM);
1324         assert_return(s->pending, -ENODATA);
1325         assert_return(!event_pid_changed(s->event), -ECHILD);
1326
1327         *revents = s->io.revents;
1328         return 0;
1329 }
1330
1331 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1332         assert_return(s, -EINVAL);
1333         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1334         assert_return(!event_pid_changed(s->event), -ECHILD);
1335
1336         return s->signal.sig;
1337 }
1338
1339 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1340         assert_return(s, -EINVAL);
1341         assert_return(!event_pid_changed(s->event), -ECHILD);
1342
1343         return s->priority;
1344 }
1345
1346 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1347         assert_return(s, -EINVAL);
1348         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1349         assert_return(!event_pid_changed(s->event), -ECHILD);
1350
1351         if (s->priority == priority)
1352                 return 0;
1353
1354         s->priority = priority;
1355
1356         if (s->pending)
1357                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1358
1359         if (s->prepare)
1360                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1361
1362         if (s->type == SOURCE_EXIT)
1363                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1364
1365         return 0;
1366 }
1367
1368 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1369         assert_return(s, -EINVAL);
1370         assert_return(m, -EINVAL);
1371         assert_return(!event_pid_changed(s->event), -ECHILD);
1372
1373         *m = s->enabled;
1374         return 0;
1375 }
1376
1377 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1378         int r;
1379
1380         assert_return(s, -EINVAL);
1381         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1382         assert_return(!event_pid_changed(s->event), -ECHILD);
1383
1384         /* If we are dead anyway, we are fine with turning off
1385          * sources, but everything else needs to fail. */
1386         if (s->event->state == SD_EVENT_FINISHED)
1387                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1388
1389         if (s->enabled == m)
1390                 return 0;
1391
1392         if (m == SD_EVENT_OFF) {
1393
1394                 switch (s->type) {
1395
1396                 case SOURCE_IO:
1397                         r = source_io_unregister(s);
1398                         if (r < 0)
1399                                 return r;
1400
1401                         s->enabled = m;
1402                         break;
1403
1404                 case SOURCE_TIME_REALTIME:
1405                 case SOURCE_TIME_BOOTTIME:
1406                 case SOURCE_TIME_MONOTONIC:
1407                 case SOURCE_TIME_REALTIME_ALARM:
1408                 case SOURCE_TIME_BOOTTIME_ALARM: {
1409                         struct clock_data *d;
1410
1411                         s->enabled = m;
1412                         d = event_get_clock_data(s->event, s->type);
1413                         assert(d);
1414
1415                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1416                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1417                         d->needs_rearm = true;
1418                         break;
1419                 }
1420
1421                 case SOURCE_SIGNAL:
1422                         s->enabled = m;
1423                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1424                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1425                                 event_update_signal_fd(s->event);
1426                         }
1427
1428                         break;
1429
1430                 case SOURCE_CHILD:
1431                         s->enabled = m;
1432
1433                         assert(s->event->n_enabled_child_sources > 0);
1434                         s->event->n_enabled_child_sources--;
1435
1436                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1437                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1438                                 event_update_signal_fd(s->event);
1439                         }
1440
1441                         break;
1442
1443                 case SOURCE_EXIT:
1444                         s->enabled = m;
1445                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1446                         break;
1447
1448                 case SOURCE_DEFER:
1449                 case SOURCE_POST:
1450                         s->enabled = m;
1451                         break;
1452
1453                 default:
1454                         assert_not_reached("Wut? I shouldn't exist.");
1455                 }
1456
1457         } else {
1458                 switch (s->type) {
1459
1460                 case SOURCE_IO:
1461                         r = source_io_register(s, m, s->io.events);
1462                         if (r < 0)
1463                                 return r;
1464
1465                         s->enabled = m;
1466                         break;
1467
1468                 case SOURCE_TIME_REALTIME:
1469                 case SOURCE_TIME_BOOTTIME:
1470                 case SOURCE_TIME_MONOTONIC:
1471                 case SOURCE_TIME_REALTIME_ALARM:
1472                 case SOURCE_TIME_BOOTTIME_ALARM: {
1473                         struct clock_data *d;
1474
1475                         s->enabled = m;
1476                         d = event_get_clock_data(s->event, s->type);
1477                         assert(d);
1478
1479                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1480                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1481                         d->needs_rearm = true;
1482                         break;
1483                 }
1484
1485                 case SOURCE_SIGNAL:
1486                         s->enabled = m;
1487
1488                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1489                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1490                                 event_update_signal_fd(s->event);
1491                         }
1492                         break;
1493
1494                 case SOURCE_CHILD:
1495                         if (s->enabled == SD_EVENT_OFF) {
1496                                 s->event->n_enabled_child_sources++;
1497
1498                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1499                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1500                                         event_update_signal_fd(s->event);
1501                                 }
1502                         }
1503
1504                         s->enabled = m;
1505                         break;
1506
1507                 case SOURCE_EXIT:
1508                         s->enabled = m;
1509                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1510                         break;
1511
1512                 case SOURCE_DEFER:
1513                 case SOURCE_POST:
1514                         s->enabled = m;
1515                         break;
1516
1517                 default:
1518                         assert_not_reached("Wut? I shouldn't exist.");
1519                 }
1520         }
1521
1522         if (s->pending)
1523                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1524
1525         if (s->prepare)
1526                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1527
1528         return 0;
1529 }
1530
1531 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1532         assert_return(s, -EINVAL);
1533         assert_return(usec, -EINVAL);
1534         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1535         assert_return(!event_pid_changed(s->event), -ECHILD);
1536
1537         *usec = s->time.next;
1538         return 0;
1539 }
1540
1541 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1542         struct clock_data *d;
1543
1544         assert_return(s, -EINVAL);
1545         assert_return(usec != (uint64_t) -1, -EINVAL);
1546         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1547         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1548         assert_return(!event_pid_changed(s->event), -ECHILD);
1549
1550         s->time.next = usec;
1551
1552         source_set_pending(s, false);
1553
1554         d = event_get_clock_data(s->event, s->type);
1555         assert(d);
1556
1557         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1558         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1559         d->needs_rearm = true;
1560
1561         return 0;
1562 }
1563
1564 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1565         assert_return(s, -EINVAL);
1566         assert_return(usec, -EINVAL);
1567         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1568         assert_return(!event_pid_changed(s->event), -ECHILD);
1569
1570         *usec = s->time.accuracy;
1571         return 0;
1572 }
1573
1574 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1575         struct clock_data *d;
1576
1577         assert_return(s, -EINVAL);
1578         assert_return(usec != (uint64_t) -1, -EINVAL);
1579         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1580         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1581         assert_return(!event_pid_changed(s->event), -ECHILD);
1582
1583         if (usec == 0)
1584                 usec = DEFAULT_ACCURACY_USEC;
1585
1586         s->time.accuracy = usec;
1587
1588         source_set_pending(s, false);
1589
1590         d = event_get_clock_data(s->event, s->type);
1591         assert(d);
1592
1593         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1594         d->needs_rearm = true;
1595
1596         return 0;
1597 }
1598
1599 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1600         assert_return(s, -EINVAL);
1601         assert_return(clock, -EINVAL);
1602         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1603         assert_return(!event_pid_changed(s->event), -ECHILD);
1604
1605         *clock = event_source_type_to_clock(s->type);
1606         return 0;
1607 }
1608
1609 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1610         assert_return(s, -EINVAL);
1611         assert_return(pid, -EINVAL);
1612         assert_return(s->type == SOURCE_CHILD, -EDOM);
1613         assert_return(!event_pid_changed(s->event), -ECHILD);
1614
1615         *pid = s->child.pid;
1616         return 0;
1617 }
1618
1619 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1620         int r;
1621
1622         assert_return(s, -EINVAL);
1623         assert_return(s->type != SOURCE_EXIT, -EDOM);
1624         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1625         assert_return(!event_pid_changed(s->event), -ECHILD);
1626
1627         if (s->prepare == callback)
1628                 return 0;
1629
1630         if (callback && s->prepare) {
1631                 s->prepare = callback;
1632                 return 0;
1633         }
1634
1635         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1636         if (r < 0)
1637                 return r;
1638
1639         s->prepare = callback;
1640
1641         if (callback) {
1642                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1643                 if (r < 0)
1644                         return r;
1645         } else
1646                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1647
1648         return 0;
1649 }
1650
1651 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1652         assert_return(s, NULL);
1653
1654         return s->userdata;
1655 }
1656
1657 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1658         void *ret;
1659
1660         assert_return(s, NULL);
1661
1662         ret = s->userdata;
1663         s->userdata = userdata;
1664
1665         return ret;
1666 }
1667
1668 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1669         usec_t c;
1670         assert(e);
1671         assert(a <= b);
1672
1673         if (a <= 0)
1674                 return 0;
1675
1676         if (b <= a + 1)
1677                 return a;
1678
1679         initialize_perturb(e);
1680
1681         /*
1682           Find a good time to wake up again between times a and b. We
1683           have two goals here:
1684
1685           a) We want to wake up as seldom as possible, hence prefer
1686              later times over earlier times.
1687
1688           b) But if we have to wake up, then let's make sure to
1689              dispatch as much as possible on the entire system.
1690
1691           We implement this by waking up everywhere at the same time
1692           within any given minute if we can, synchronised via the
1693           perturbation value determined from the boot ID. If we can't,
1694           then we try to find the same spot in every 10s, then 1s and
1695           then 250ms step. Otherwise, we pick the last possible time
1696           to wake up.
1697         */
1698
1699         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1700         if (c >= b) {
1701                 if (_unlikely_(c < USEC_PER_MINUTE))
1702                         return b;
1703
1704                 c -= USEC_PER_MINUTE;
1705         }
1706
1707         if (c >= a)
1708                 return c;
1709
1710         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1711         if (c >= b) {
1712                 if (_unlikely_(c < USEC_PER_SEC*10))
1713                         return b;
1714
1715                 c -= USEC_PER_SEC*10;
1716         }
1717
1718         if (c >= a)
1719                 return c;
1720
1721         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1722         if (c >= b) {
1723                 if (_unlikely_(c < USEC_PER_SEC))
1724                         return b;
1725
1726                 c -= USEC_PER_SEC;
1727         }
1728
1729         if (c >= a)
1730                 return c;
1731
1732         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1733         if (c >= b) {
1734                 if (_unlikely_(c < USEC_PER_MSEC*250))
1735                         return b;
1736
1737                 c -= USEC_PER_MSEC*250;
1738         }
1739
1740         if (c >= a)
1741                 return c;
1742
1743         return b;
1744 }
1745
1746 static int event_arm_timer(
1747                 sd_event *e,
1748                 struct clock_data *d) {
1749
1750         struct itimerspec its = {};
1751         sd_event_source *a, *b;
1752         usec_t t;
1753         int r;
1754
1755         assert(e);
1756         assert(d);
1757
1758         if (_likely_(!d->needs_rearm))
1759                 return 0;
1760         else
1761                 d->needs_rearm = false;
1762
1763         a = prioq_peek(d->earliest);
1764         if (!a || a->enabled == SD_EVENT_OFF) {
1765
1766                 if (d->fd < 0)
1767                         return 0;
1768
1769                 if (d->next == USEC_INFINITY)
1770                         return 0;
1771
1772                 /* disarm */
1773                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1774                 if (r < 0)
1775                         return r;
1776
1777                 d->next = USEC_INFINITY;
1778                 return 0;
1779         }
1780
1781         b = prioq_peek(d->latest);
1782         assert_se(b && b->enabled != SD_EVENT_OFF);
1783
1784         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1785         if (d->next == t)
1786                 return 0;
1787
1788         assert_se(d->fd >= 0);
1789
1790         if (t == 0) {
1791                 /* We don' want to disarm here, just mean some time looooong ago. */
1792                 its.it_value.tv_sec = 0;
1793                 its.it_value.tv_nsec = 1;
1794         } else
1795                 timespec_store(&its.it_value, t);
1796
1797         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1798         if (r < 0)
1799                 return -errno;
1800
1801         d->next = t;
1802         return 0;
1803 }
1804
1805 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1806         assert(e);
1807         assert(s);
1808         assert(s->type == SOURCE_IO);
1809
1810         /* If the event source was already pending, we just OR in the
1811          * new revents, otherwise we reset the value. The ORing is
1812          * necessary to handle EPOLLONESHOT events properly where
1813          * readability might happen independently of writability, and
1814          * we need to keep track of both */
1815
1816         if (s->pending)
1817                 s->io.revents |= revents;
1818         else
1819                 s->io.revents = revents;
1820
1821         return source_set_pending(s, true);
1822 }
1823
1824 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1825         uint64_t x;
1826         ssize_t ss;
1827
1828         assert(e);
1829         assert(fd >= 0);
1830
1831         assert_return(events == EPOLLIN, -EIO);
1832
1833         ss = read(fd, &x, sizeof(x));
1834         if (ss < 0) {
1835                 if (errno == EAGAIN || errno == EINTR)
1836                         return 0;
1837
1838                 return -errno;
1839         }
1840
1841         if (_unlikely_(ss != sizeof(x)))
1842                 return -EIO;
1843
1844         if (next)
1845                 *next = USEC_INFINITY;
1846
1847         return 0;
1848 }
1849
1850 static int process_timer(
1851                 sd_event *e,
1852                 usec_t n,
1853                 struct clock_data *d) {
1854
1855         sd_event_source *s;
1856         int r;
1857
1858         assert(e);
1859         assert(d);
1860
1861         for (;;) {
1862                 s = prioq_peek(d->earliest);
1863                 if (!s ||
1864                     s->time.next > n ||
1865                     s->enabled == SD_EVENT_OFF ||
1866                     s->pending)
1867                         break;
1868
1869                 r = source_set_pending(s, true);
1870                 if (r < 0)
1871                         return r;
1872
1873                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1874                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1875         }
1876
1877         return 0;
1878 }
1879
1880 static int process_child(sd_event *e) {
1881         sd_event_source *s;
1882         Iterator i;
1883         int r;
1884
1885         assert(e);
1886
1887         e->need_process_child = false;
1888
1889         /*
1890            So, this is ugly. We iteratively invoke waitid() with P_PID
1891            + WNOHANG for each PID we wait for, instead of using
1892            P_ALL. This is because we only want to get child
1893            information of very specific child processes, and not all
1894            of them. We might not have processed the SIGCHLD even of a
1895            previous invocation and we don't want to maintain a
1896            unbounded *per-child* event queue, hence we really don't
1897            want anything flushed out of the kernel's queue that we
1898            don't care about. Since this is O(n) this means that if you
1899            have a lot of processes you probably want to handle SIGCHLD
1900            yourself.
1901
1902            We do not reap the children here (by using WNOWAIT), this
1903            is only done after the event source is dispatched so that
1904            the callback still sees the process as a zombie.
1905         */
1906
1907         HASHMAP_FOREACH(s, e->child_sources, i) {
1908                 assert(s->type == SOURCE_CHILD);
1909
1910                 if (s->pending)
1911                         continue;
1912
1913                 if (s->enabled == SD_EVENT_OFF)
1914                         continue;
1915
1916                 zero(s->child.siginfo);
1917                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1918                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1919                 if (r < 0)
1920                         return -errno;
1921
1922                 if (s->child.siginfo.si_pid != 0) {
1923                         bool zombie =
1924                                 s->child.siginfo.si_code == CLD_EXITED ||
1925                                 s->child.siginfo.si_code == CLD_KILLED ||
1926                                 s->child.siginfo.si_code == CLD_DUMPED;
1927
1928                         if (!zombie && (s->child.options & WEXITED)) {
1929                                 /* If the child isn't dead then let's
1930                                  * immediately remove the state change
1931                                  * from the queue, since there's no
1932                                  * benefit in leaving it queued */
1933
1934                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1935                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1936                         }
1937
1938                         r = source_set_pending(s, true);
1939                         if (r < 0)
1940                                 return r;
1941                 }
1942         }
1943
1944         return 0;
1945 }
1946
1947 static int process_signal(sd_event *e, uint32_t events) {
1948         bool read_one = false;
1949         int r;
1950
1951         assert(e);
1952
1953         assert_return(events == EPOLLIN, -EIO);
1954
1955         for (;;) {
1956                 struct signalfd_siginfo si;
1957                 ssize_t ss;
1958                 sd_event_source *s = NULL;
1959
1960                 ss = read(e->signal_fd, &si, sizeof(si));
1961                 if (ss < 0) {
1962                         if (errno == EAGAIN || errno == EINTR)
1963                                 return read_one;
1964
1965                         return -errno;
1966                 }
1967
1968                 if (_unlikely_(ss != sizeof(si)))
1969                         return -EIO;
1970
1971                 read_one = true;
1972
1973                 if (si.ssi_signo == SIGCHLD) {
1974                         r = process_child(e);
1975                         if (r < 0)
1976                                 return r;
1977                         if (r > 0)
1978                                 continue;
1979                 }
1980
1981                 if (e->signal_sources)
1982                         s = e->signal_sources[si.ssi_signo];
1983
1984                 if (!s)
1985                         continue;
1986
1987                 s->signal.siginfo = si;
1988                 r = source_set_pending(s, true);
1989                 if (r < 0)
1990                         return r;
1991         }
1992 }
1993
1994 static int source_dispatch(sd_event_source *s) {
1995         int r = 0;
1996
1997         assert(s);
1998         assert(s->pending || s->type == SOURCE_EXIT);
1999
2000         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2001                 r = source_set_pending(s, false);
2002                 if (r < 0)
2003                         return r;
2004         }
2005
2006         if (s->type != SOURCE_POST) {
2007                 sd_event_source *z;
2008                 Iterator i;
2009
2010                 /* If we execute a non-post source, let's mark all
2011                  * post sources as pending */
2012
2013                 SET_FOREACH(z, s->event->post_sources, i) {
2014                         if (z->enabled == SD_EVENT_OFF)
2015                                 continue;
2016
2017                         r = source_set_pending(z, true);
2018                         if (r < 0)
2019                                 return r;
2020                 }
2021         }
2022
2023         if (s->enabled == SD_EVENT_ONESHOT) {
2024                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2025                 if (r < 0)
2026                         return r;
2027         }
2028
2029         s->dispatching = true;
2030
2031         switch (s->type) {
2032
2033         case SOURCE_IO:
2034                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2035                 break;
2036
2037         case SOURCE_TIME_REALTIME:
2038         case SOURCE_TIME_BOOTTIME:
2039         case SOURCE_TIME_MONOTONIC:
2040         case SOURCE_TIME_REALTIME_ALARM:
2041         case SOURCE_TIME_BOOTTIME_ALARM:
2042                 r = s->time.callback(s, s->time.next, s->userdata);
2043                 break;
2044
2045         case SOURCE_SIGNAL:
2046                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2047                 break;
2048
2049         case SOURCE_CHILD: {
2050                 bool zombie;
2051
2052                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2053                          s->child.siginfo.si_code == CLD_KILLED ||
2054                          s->child.siginfo.si_code == CLD_DUMPED;
2055
2056                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2057
2058                 /* Now, reap the PID for good. */
2059                 if (zombie)
2060                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2061
2062                 break;
2063         }
2064
2065         case SOURCE_DEFER:
2066                 r = s->defer.callback(s, s->userdata);
2067                 break;
2068
2069         case SOURCE_POST:
2070                 r = s->post.callback(s, s->userdata);
2071                 break;
2072
2073         case SOURCE_EXIT:
2074                 r = s->exit.callback(s, s->userdata);
2075                 break;
2076
2077         case SOURCE_WATCHDOG:
2078         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2079         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2080                 assert_not_reached("Wut? I shouldn't exist.");
2081         }
2082
2083         s->dispatching = false;
2084
2085         if (r < 0)
2086                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2087
2088         if (s->n_ref == 0)
2089                 source_free(s);
2090         else if (r < 0)
2091                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2092
2093         return 1;
2094 }
2095
2096 static int event_prepare(sd_event *e) {
2097         int r;
2098
2099         assert(e);
2100
2101         for (;;) {
2102                 sd_event_source *s;
2103
2104                 s = prioq_peek(e->prepare);
2105                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2106                         break;
2107
2108                 s->prepare_iteration = e->iteration;
2109                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2110                 if (r < 0)
2111                         return r;
2112
2113                 assert(s->prepare);
2114
2115                 s->dispatching = true;
2116                 r = s->prepare(s, s->userdata);
2117                 s->dispatching = false;
2118
2119                 if (r < 0)
2120                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2121
2122                 if (s->n_ref == 0)
2123                         source_free(s);
2124                 else if (r < 0)
2125                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2126         }
2127
2128         return 0;
2129 }
2130
2131 static int dispatch_exit(sd_event *e) {
2132         sd_event_source *p;
2133         int r;
2134
2135         assert(e);
2136
2137         p = prioq_peek(e->exit);
2138         if (!p || p->enabled == SD_EVENT_OFF) {
2139                 e->state = SD_EVENT_FINISHED;
2140                 return 0;
2141         }
2142
2143         sd_event_ref(e);
2144         e->iteration++;
2145         e->state = SD_EVENT_EXITING;
2146
2147         r = source_dispatch(p);
2148
2149         e->state = SD_EVENT_PASSIVE;
2150         sd_event_unref(e);
2151
2152         return r;
2153 }
2154
2155 static sd_event_source* event_next_pending(sd_event *e) {
2156         sd_event_source *p;
2157
2158         assert(e);
2159
2160         p = prioq_peek(e->pending);
2161         if (!p)
2162                 return NULL;
2163
2164         if (p->enabled == SD_EVENT_OFF)
2165                 return NULL;
2166
2167         return p;
2168 }
2169
2170 static int arm_watchdog(sd_event *e) {
2171         struct itimerspec its = {};
2172         usec_t t;
2173         int r;
2174
2175         assert(e);
2176         assert(e->watchdog_fd >= 0);
2177
2178         t = sleep_between(e,
2179                           e->watchdog_last + (e->watchdog_period / 2),
2180                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2181
2182         timespec_store(&its.it_value, t);
2183
2184         /* Make sure we never set the watchdog to 0, which tells the
2185          * kernel to disable it. */
2186         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2187                 its.it_value.tv_nsec = 1;
2188
2189         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2190         if (r < 0)
2191                 return -errno;
2192
2193         return 0;
2194 }
2195
2196 static int process_watchdog(sd_event *e) {
2197         assert(e);
2198
2199         if (!e->watchdog)
2200                 return 0;
2201
2202         /* Don't notify watchdog too often */
2203         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2204                 return 0;
2205
2206         sd_notify(false, "WATCHDOG=1");
2207         e->watchdog_last = e->timestamp.monotonic;
2208
2209         return arm_watchdog(e);
2210 }
2211
2212 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2213         struct epoll_event *ev_queue;
2214         unsigned ev_queue_max;
2215         sd_event_source *p;
2216         int r, i, m;
2217         bool timedout;
2218
2219         assert_return(e, -EINVAL);
2220         assert_return(!event_pid_changed(e), -ECHILD);
2221         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2222         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2223
2224         if (e->exit_requested)
2225                 return dispatch_exit(e);
2226
2227         sd_event_ref(e);
2228         e->iteration++;
2229         e->state = SD_EVENT_RUNNING;
2230
2231         r = event_prepare(e);
2232         if (r < 0)
2233                 goto finish;
2234
2235         r = event_arm_timer(e, &e->realtime);
2236         if (r < 0)
2237                 goto finish;
2238
2239         r = event_arm_timer(e, &e->boottime);
2240         if (r < 0)
2241                 goto finish;
2242
2243         r = event_arm_timer(e, &e->monotonic);
2244         if (r < 0)
2245                 goto finish;
2246
2247         r = event_arm_timer(e, &e->realtime_alarm);
2248         if (r < 0)
2249                 goto finish;
2250
2251         r = event_arm_timer(e, &e->boottime_alarm);
2252         if (r < 0)
2253                 goto finish;
2254
2255         if (event_next_pending(e) || e->need_process_child)
2256                 timeout = 0;
2257
2258         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2259         ev_queue = newa(struct epoll_event, ev_queue_max);
2260
2261         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2262                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2263         if (m < 0) {
2264                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2265                 goto finish;
2266         }
2267
2268         timedout = m == 0;
2269
2270         dual_timestamp_get(&e->timestamp);
2271         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2272
2273         for (i = 0; i < m; i++) {
2274
2275                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2276                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2277                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2278                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2279                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2280                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2281                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2282                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2283                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2284                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2285                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2286                         r = process_signal(e, ev_queue[i].events);
2287                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2288                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2289                 else
2290                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2291
2292                 if (r < 0)
2293                         goto finish;
2294         }
2295
2296         r = process_watchdog(e);
2297         if (r < 0)
2298                 goto finish;
2299
2300         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2301         if (r < 0)
2302                 goto finish;
2303
2304         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2305         if (r < 0)
2306                 goto finish;
2307
2308         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2309         if (r < 0)
2310                 goto finish;
2311
2312         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2313         if (r < 0)
2314                 goto finish;
2315
2316         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2317         if (r < 0)
2318                 goto finish;
2319
2320         if (e->need_process_child) {
2321                 r = process_child(e);
2322                 if (r < 0)
2323                         goto finish;
2324         }
2325
2326         p = event_next_pending(e);
2327         if (!p) {
2328                 r = !timedout;
2329                 goto finish;
2330         }
2331
2332         r = source_dispatch(p);
2333
2334 finish:
2335         e->state = SD_EVENT_PASSIVE;
2336         sd_event_unref(e);
2337
2338         return r;
2339 }
2340
2341 _public_ int sd_event_loop(sd_event *e) {
2342         int r;
2343
2344         assert_return(e, -EINVAL);
2345         assert_return(!event_pid_changed(e), -ECHILD);
2346         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2347
2348         sd_event_ref(e);
2349
2350         while (e->state != SD_EVENT_FINISHED) {
2351                 r = sd_event_run(e, (uint64_t) -1);
2352                 if (r < 0)
2353                         goto finish;
2354         }
2355
2356         r = e->exit_code;
2357
2358 finish:
2359         sd_event_unref(e);
2360         return r;
2361 }
2362
2363 _public_ int sd_event_get_state(sd_event *e) {
2364         assert_return(e, -EINVAL);
2365         assert_return(!event_pid_changed(e), -ECHILD);
2366
2367         return e->state;
2368 }
2369
2370 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2371         assert_return(e, -EINVAL);
2372         assert_return(code, -EINVAL);
2373         assert_return(!event_pid_changed(e), -ECHILD);
2374
2375         if (!e->exit_requested)
2376                 return -ENODATA;
2377
2378         *code = e->exit_code;
2379         return 0;
2380 }
2381
2382 _public_ int sd_event_exit(sd_event *e, int code) {
2383         assert_return(e, -EINVAL);
2384         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2385         assert_return(!event_pid_changed(e), -ECHILD);
2386
2387         e->exit_requested = true;
2388         e->exit_code = code;
2389
2390         return 0;
2391 }
2392
2393 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2394         assert_return(e, -EINVAL);
2395         assert_return(usec, -EINVAL);
2396         assert_return(!event_pid_changed(e), -ECHILD);
2397
2398         /* If we haven't run yet, just get the actual time */
2399         if (!dual_timestamp_is_set(&e->timestamp))
2400                 return -ENODATA;
2401
2402         switch (clock) {
2403
2404         case CLOCK_REALTIME:
2405         case CLOCK_REALTIME_ALARM:
2406                 *usec = e->timestamp.realtime;
2407                 break;
2408
2409         case CLOCK_MONOTONIC:
2410                 *usec = e->timestamp.monotonic;
2411                 break;
2412
2413         case CLOCK_BOOTTIME:
2414         case CLOCK_BOOTTIME_ALARM:
2415                 *usec = e->timestamp_boottime;
2416                 break;
2417         }
2418
2419         return 0;
2420 }
2421
2422 _public_ int sd_event_default(sd_event **ret) {
2423
2424         static thread_local sd_event *default_event = NULL;
2425         sd_event *e = NULL;
2426         int r;
2427
2428         if (!ret)
2429                 return !!default_event;
2430
2431         if (default_event) {
2432                 *ret = sd_event_ref(default_event);
2433                 return 0;
2434         }
2435
2436         r = sd_event_new(&e);
2437         if (r < 0)
2438                 return r;
2439
2440         e->default_event_ptr = &default_event;
2441         e->tid = gettid();
2442         default_event = e;
2443
2444         *ret = e;
2445         return 1;
2446 }
2447
2448 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2449         assert_return(e, -EINVAL);
2450         assert_return(tid, -EINVAL);
2451         assert_return(!event_pid_changed(e), -ECHILD);
2452
2453         if (e->tid != 0) {
2454                 *tid = e->tid;
2455                 return 0;
2456         }
2457
2458         return -ENXIO;
2459 }
2460
2461 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2462         int r;
2463
2464         assert_return(e, -EINVAL);
2465         assert_return(!event_pid_changed(e), -ECHILD);
2466
2467         if (e->watchdog == !!b)
2468                 return e->watchdog;
2469
2470         if (b) {
2471                 struct epoll_event ev = {};
2472
2473                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2474                 if (r <= 0)
2475                         return r;
2476
2477                 /* Issue first ping immediately */
2478                 sd_notify(false, "WATCHDOG=1");
2479                 e->watchdog_last = now(CLOCK_MONOTONIC);
2480
2481                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2482                 if (e->watchdog_fd < 0)
2483                         return -errno;
2484
2485                 r = arm_watchdog(e);
2486                 if (r < 0)
2487                         goto fail;
2488
2489                 ev.events = EPOLLIN;
2490                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2491
2492                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2493                 if (r < 0) {
2494                         r = -errno;
2495                         goto fail;
2496                 }
2497
2498         } else {
2499                 if (e->watchdog_fd >= 0) {
2500                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2501                         e->watchdog_fd = safe_close(e->watchdog_fd);
2502                 }
2503         }
2504
2505         e->watchdog = !!b;
2506         return e->watchdog;
2507
2508 fail:
2509         e->watchdog_fd = safe_close(e->watchdog_fd);
2510         return r;
2511 }
2512
2513 _public_ int sd_event_get_watchdog(sd_event *e) {
2514         assert_return(e, -EINVAL);
2515         assert_return(!event_pid_changed(e), -ECHILD);
2516
2517         return e->watchdog;
2518 }