chiark / gitweb /
sd-event: check the value of received signal
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *name;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static void source_disconnect(sd_event_source *s) {
594         sd_event *event;
595
596         assert(s);
597
598         if (!s->event)
599                 return;
600
601         assert(s->event->n_sources > 0);
602
603         switch (s->type) {
604
605         case SOURCE_IO:
606                 if (s->io.fd >= 0)
607                         source_io_unregister(s);
608
609                 break;
610
611         case SOURCE_TIME_REALTIME:
612         case SOURCE_TIME_BOOTTIME:
613         case SOURCE_TIME_MONOTONIC:
614         case SOURCE_TIME_REALTIME_ALARM:
615         case SOURCE_TIME_BOOTTIME_ALARM: {
616                 struct clock_data *d;
617
618                 d = event_get_clock_data(s->event, s->type);
619                 assert(d);
620
621                 prioq_remove(d->earliest, s, &s->time.earliest_index);
622                 prioq_remove(d->latest, s, &s->time.latest_index);
623                 d->needs_rearm = true;
624                 break;
625         }
626
627         case SOURCE_SIGNAL:
628                 if (s->signal.sig > 0) {
629                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
630                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
631
632                         if (s->event->signal_sources)
633                                 s->event->signal_sources[s->signal.sig] = NULL;
634                 }
635
636                 break;
637
638         case SOURCE_CHILD:
639                 if (s->child.pid > 0) {
640                         if (s->enabled != SD_EVENT_OFF) {
641                                 assert(s->event->n_enabled_child_sources > 0);
642                                 s->event->n_enabled_child_sources--;
643                         }
644
645                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
646                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
647
648                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
649                 }
650
651                 break;
652
653         case SOURCE_DEFER:
654                 /* nothing */
655                 break;
656
657         case SOURCE_POST:
658                 set_remove(s->event->post_sources, s);
659                 break;
660
661         case SOURCE_EXIT:
662                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
663                 break;
664
665         default:
666                 assert_not_reached("Wut? I shouldn't exist.");
667         }
668
669         if (s->pending)
670                 prioq_remove(s->event->pending, s, &s->pending_index);
671
672         if (s->prepare)
673                 prioq_remove(s->event->prepare, s, &s->prepare_index);
674
675         event = s->event;
676
677         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
678         s->event = NULL;
679         LIST_REMOVE(sources, event->sources, s);
680         event->n_sources--;
681
682         if (!s->floating)
683                 sd_event_unref(event);
684 }
685
686 static void source_free(sd_event_source *s) {
687         assert(s);
688
689         source_disconnect(s);
690         free(s->name);
691         free(s);
692 }
693
694 static int source_set_pending(sd_event_source *s, bool b) {
695         int r;
696
697         assert(s);
698         assert(s->type != SOURCE_EXIT);
699
700         if (s->pending == b)
701                 return 0;
702
703         s->pending = b;
704
705         if (b) {
706                 s->pending_iteration = s->event->iteration;
707
708                 r = prioq_put(s->event->pending, s, &s->pending_index);
709                 if (r < 0) {
710                         s->pending = false;
711                         return r;
712                 }
713         } else
714                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
715
716         if (EVENT_SOURCE_IS_TIME(s->type)) {
717                 struct clock_data *d;
718
719                 d = event_get_clock_data(s->event, s->type);
720                 assert(d);
721
722                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
723                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
724                 d->needs_rearm = true;
725         }
726
727         return 0;
728 }
729
730 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
731         sd_event_source *s;
732
733         assert(e);
734
735         s = new0(sd_event_source, 1);
736         if (!s)
737                 return NULL;
738
739         s->n_ref = 1;
740         s->event = e;
741         s->floating = floating;
742         s->type = type;
743         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
744
745         if (!floating)
746                 sd_event_ref(e);
747
748         LIST_PREPEND(sources, e->sources, s);
749         e->n_sources ++;
750
751         return s;
752 }
753
754 _public_ int sd_event_add_io(
755                 sd_event *e,
756                 sd_event_source **ret,
757                 int fd,
758                 uint32_t events,
759                 sd_event_io_handler_t callback,
760                 void *userdata) {
761
762         sd_event_source *s;
763         int r;
764
765         assert_return(e, -EINVAL);
766         assert_return(fd >= 0, -EINVAL);
767         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
768         assert_return(callback, -EINVAL);
769         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
770         assert_return(!event_pid_changed(e), -ECHILD);
771
772         s = source_new(e, !ret, SOURCE_IO);
773         if (!s)
774                 return -ENOMEM;
775
776         s->io.fd = fd;
777         s->io.events = events;
778         s->io.callback = callback;
779         s->userdata = userdata;
780         s->enabled = SD_EVENT_ON;
781
782         r = source_io_register(s, s->enabled, events);
783         if (r < 0) {
784                 source_free(s);
785                 return r;
786         }
787
788         if (ret)
789                 *ret = s;
790
791         return 0;
792 }
793
794 static void initialize_perturb(sd_event *e) {
795         sd_id128_t bootid = {};
796
797         /* When we sleep for longer, we try to realign the wakeup to
798            the same time wihtin each minute/second/250ms, so that
799            events all across the system can be coalesced into a single
800            CPU wakeup. However, let's take some system-specific
801            randomness for this value, so that in a network of systems
802            with synced clocks timer events are distributed a
803            bit. Here, we calculate a perturbation usec offset from the
804            boot ID. */
805
806         if (_likely_(e->perturb != USEC_INFINITY))
807                 return;
808
809         if (sd_id128_get_boot(&bootid) >= 0)
810                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
811 }
812
813 static int event_setup_timer_fd(
814                 sd_event *e,
815                 struct clock_data *d,
816                 clockid_t clock) {
817
818         struct epoll_event ev = {};
819         int r, fd;
820
821         assert(e);
822         assert(d);
823
824         if (_likely_(d->fd >= 0))
825                 return 0;
826
827         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
828         if (fd < 0)
829                 return -errno;
830
831         ev.events = EPOLLIN;
832         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
833
834         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
835         if (r < 0) {
836                 safe_close(fd);
837                 return -errno;
838         }
839
840         d->fd = fd;
841         return 0;
842 }
843
844 _public_ int sd_event_add_time(
845                 sd_event *e,
846                 sd_event_source **ret,
847                 clockid_t clock,
848                 uint64_t usec,
849                 uint64_t accuracy,
850                 sd_event_time_handler_t callback,
851                 void *userdata) {
852
853         EventSourceType type;
854         sd_event_source *s;
855         struct clock_data *d;
856         int r;
857
858         assert_return(e, -EINVAL);
859         assert_return(usec != (uint64_t) -1, -EINVAL);
860         assert_return(accuracy != (uint64_t) -1, -EINVAL);
861         assert_return(callback, -EINVAL);
862         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
863         assert_return(!event_pid_changed(e), -ECHILD);
864
865         type = clock_to_event_source_type(clock);
866         assert_return(type >= 0, -ENOTSUP);
867
868         d = event_get_clock_data(e, type);
869         assert(d);
870
871         if (!d->earliest) {
872                 d->earliest = prioq_new(earliest_time_prioq_compare);
873                 if (!d->earliest)
874                         return -ENOMEM;
875         }
876
877         if (!d->latest) {
878                 d->latest = prioq_new(latest_time_prioq_compare);
879                 if (!d->latest)
880                         return -ENOMEM;
881         }
882
883         if (d->fd < 0) {
884                 r = event_setup_timer_fd(e, d, clock);
885                 if (r < 0)
886                         return r;
887         }
888
889         s = source_new(e, !ret, type);
890         if (!s)
891                 return -ENOMEM;
892
893         s->time.next = usec;
894         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
895         s->time.callback = callback;
896         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
897         s->userdata = userdata;
898         s->enabled = SD_EVENT_ONESHOT;
899
900         d->needs_rearm = true;
901
902         r = prioq_put(d->earliest, s, &s->time.earliest_index);
903         if (r < 0)
904                 goto fail;
905
906         r = prioq_put(d->latest, s, &s->time.latest_index);
907         if (r < 0)
908                 goto fail;
909
910         if (ret)
911                 *ret = s;
912
913         return 0;
914
915 fail:
916         source_free(s);
917         return r;
918 }
919
920 static int event_update_signal_fd(sd_event *e) {
921         struct epoll_event ev = {};
922         bool add_to_epoll;
923         int r;
924
925         assert(e);
926
927         add_to_epoll = e->signal_fd < 0;
928
929         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
930         if (r < 0)
931                 return -errno;
932
933         e->signal_fd = r;
934
935         if (!add_to_epoll)
936                 return 0;
937
938         ev.events = EPOLLIN;
939         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
940
941         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
942         if (r < 0) {
943                 e->signal_fd = safe_close(e->signal_fd);
944                 return -errno;
945         }
946
947         return 0;
948 }
949
950 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
951         assert(s);
952
953         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
954 }
955
956 _public_ int sd_event_add_signal(
957                 sd_event *e,
958                 sd_event_source **ret,
959                 int sig,
960                 sd_event_signal_handler_t callback,
961                 void *userdata) {
962
963         sd_event_source *s;
964         sigset_t ss;
965         int r;
966
967         assert_return(e, -EINVAL);
968         assert_return(sig > 0, -EINVAL);
969         assert_return(sig < _NSIG, -EINVAL);
970         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
971         assert_return(!event_pid_changed(e), -ECHILD);
972
973         if (!callback)
974                 callback = signal_exit_callback;
975
976         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
977         if (r < 0)
978                 return -errno;
979
980         if (!sigismember(&ss, sig))
981                 return -EBUSY;
982
983         if (!e->signal_sources) {
984                 e->signal_sources = new0(sd_event_source*, _NSIG);
985                 if (!e->signal_sources)
986                         return -ENOMEM;
987         } else if (e->signal_sources[sig])
988                 return -EBUSY;
989
990         s = source_new(e, !ret, SOURCE_SIGNAL);
991         if (!s)
992                 return -ENOMEM;
993
994         s->signal.sig = sig;
995         s->signal.callback = callback;
996         s->userdata = userdata;
997         s->enabled = SD_EVENT_ON;
998
999         e->signal_sources[sig] = s;
1000         assert_se(sigaddset(&e->sigset, sig) == 0);
1001
1002         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
1003                 r = event_update_signal_fd(e);
1004                 if (r < 0) {
1005                         source_free(s);
1006                         return r;
1007                 }
1008         }
1009
1010         if (ret)
1011                 *ret = s;
1012
1013         return 0;
1014 }
1015
1016 _public_ int sd_event_add_child(
1017                 sd_event *e,
1018                 sd_event_source **ret,
1019                 pid_t pid,
1020                 int options,
1021                 sd_event_child_handler_t callback,
1022                 void *userdata) {
1023
1024         sd_event_source *s;
1025         int r;
1026
1027         assert_return(e, -EINVAL);
1028         assert_return(pid > 1, -EINVAL);
1029         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1030         assert_return(options != 0, -EINVAL);
1031         assert_return(callback, -EINVAL);
1032         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1033         assert_return(!event_pid_changed(e), -ECHILD);
1034
1035         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1036         if (r < 0)
1037                 return r;
1038
1039         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1040                 return -EBUSY;
1041
1042         s = source_new(e, !ret, SOURCE_CHILD);
1043         if (!s)
1044                 return -ENOMEM;
1045
1046         s->child.pid = pid;
1047         s->child.options = options;
1048         s->child.callback = callback;
1049         s->userdata = userdata;
1050         s->enabled = SD_EVENT_ONESHOT;
1051
1052         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1053         if (r < 0) {
1054                 source_free(s);
1055                 return r;
1056         }
1057
1058         e->n_enabled_child_sources ++;
1059
1060         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1061
1062         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1063                 r = event_update_signal_fd(e);
1064                 if (r < 0) {
1065                         source_free(s);
1066                         return r;
1067                 }
1068         }
1069
1070         e->need_process_child = true;
1071
1072         if (ret)
1073                 *ret = s;
1074
1075         return 0;
1076 }
1077
1078 _public_ int sd_event_add_defer(
1079                 sd_event *e,
1080                 sd_event_source **ret,
1081                 sd_event_handler_t callback,
1082                 void *userdata) {
1083
1084         sd_event_source *s;
1085         int r;
1086
1087         assert_return(e, -EINVAL);
1088         assert_return(callback, -EINVAL);
1089         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1090         assert_return(!event_pid_changed(e), -ECHILD);
1091
1092         s = source_new(e, !ret, SOURCE_DEFER);
1093         if (!s)
1094                 return -ENOMEM;
1095
1096         s->defer.callback = callback;
1097         s->userdata = userdata;
1098         s->enabled = SD_EVENT_ONESHOT;
1099
1100         r = source_set_pending(s, true);
1101         if (r < 0) {
1102                 source_free(s);
1103                 return r;
1104         }
1105
1106         if (ret)
1107                 *ret = s;
1108
1109         return 0;
1110 }
1111
1112 _public_ int sd_event_add_post(
1113                 sd_event *e,
1114                 sd_event_source **ret,
1115                 sd_event_handler_t callback,
1116                 void *userdata) {
1117
1118         sd_event_source *s;
1119         int r;
1120
1121         assert_return(e, -EINVAL);
1122         assert_return(callback, -EINVAL);
1123         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1124         assert_return(!event_pid_changed(e), -ECHILD);
1125
1126         r = set_ensure_allocated(&e->post_sources, NULL);
1127         if (r < 0)
1128                 return r;
1129
1130         s = source_new(e, !ret, SOURCE_POST);
1131         if (!s)
1132                 return -ENOMEM;
1133
1134         s->post.callback = callback;
1135         s->userdata = userdata;
1136         s->enabled = SD_EVENT_ON;
1137
1138         r = set_put(e->post_sources, s);
1139         if (r < 0) {
1140                 source_free(s);
1141                 return r;
1142         }
1143
1144         if (ret)
1145                 *ret = s;
1146
1147         return 0;
1148 }
1149
1150 _public_ int sd_event_add_exit(
1151                 sd_event *e,
1152                 sd_event_source **ret,
1153                 sd_event_handler_t callback,
1154                 void *userdata) {
1155
1156         sd_event_source *s;
1157         int r;
1158
1159         assert_return(e, -EINVAL);
1160         assert_return(callback, -EINVAL);
1161         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1162         assert_return(!event_pid_changed(e), -ECHILD);
1163
1164         if (!e->exit) {
1165                 e->exit = prioq_new(exit_prioq_compare);
1166                 if (!e->exit)
1167                         return -ENOMEM;
1168         }
1169
1170         s = source_new(e, !ret, SOURCE_EXIT);
1171         if (!s)
1172                 return -ENOMEM;
1173
1174         s->exit.callback = callback;
1175         s->userdata = userdata;
1176         s->exit.prioq_index = PRIOQ_IDX_NULL;
1177         s->enabled = SD_EVENT_ONESHOT;
1178
1179         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1180         if (r < 0) {
1181                 source_free(s);
1182                 return r;
1183         }
1184
1185         if (ret)
1186                 *ret = s;
1187
1188         return 0;
1189 }
1190
1191 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1192         assert_return(s, NULL);
1193
1194         assert(s->n_ref >= 1);
1195         s->n_ref++;
1196
1197         return s;
1198 }
1199
1200 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1201
1202         if (!s)
1203                 return NULL;
1204
1205         assert(s->n_ref >= 1);
1206         s->n_ref--;
1207
1208         if (s->n_ref <= 0) {
1209                 /* Here's a special hack: when we are called from a
1210                  * dispatch handler we won't free the event source
1211                  * immediately, but we will detach the fd from the
1212                  * epoll. This way it is safe for the caller to unref
1213                  * the event source and immediately close the fd, but
1214                  * we still retain a valid event source object after
1215                  * the callback. */
1216
1217                 if (s->dispatching) {
1218                         if (s->type == SOURCE_IO)
1219                                 source_io_unregister(s);
1220
1221                         source_disconnect(s);
1222                 } else
1223                         source_free(s);
1224         }
1225
1226         return NULL;
1227 }
1228
1229 _public_ int sd_event_source_set_name(sd_event_source *s, const char *name) {
1230         assert_return(s, -EINVAL);
1231
1232         return free_and_strdup(&s->name, name);
1233 }
1234
1235 _public_ int sd_event_source_get_name(sd_event_source *s, const char **name) {
1236         assert_return(s, -EINVAL);
1237         assert_return(name, -EINVAL);
1238
1239         *name = s->name;
1240
1241         return 0;
1242 }
1243
1244 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1245         assert_return(s, NULL);
1246
1247         return s->event;
1248 }
1249
1250 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1251         assert_return(s, -EINVAL);
1252         assert_return(s->type != SOURCE_EXIT, -EDOM);
1253         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1254         assert_return(!event_pid_changed(s->event), -ECHILD);
1255
1256         return s->pending;
1257 }
1258
1259 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1260         assert_return(s, -EINVAL);
1261         assert_return(s->type == SOURCE_IO, -EDOM);
1262         assert_return(!event_pid_changed(s->event), -ECHILD);
1263
1264         return s->io.fd;
1265 }
1266
1267 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1268         int r;
1269
1270         assert_return(s, -EINVAL);
1271         assert_return(fd >= 0, -EINVAL);
1272         assert_return(s->type == SOURCE_IO, -EDOM);
1273         assert_return(!event_pid_changed(s->event), -ECHILD);
1274
1275         if (s->io.fd == fd)
1276                 return 0;
1277
1278         if (s->enabled == SD_EVENT_OFF) {
1279                 s->io.fd = fd;
1280                 s->io.registered = false;
1281         } else {
1282                 int saved_fd;
1283
1284                 saved_fd = s->io.fd;
1285                 assert(s->io.registered);
1286
1287                 s->io.fd = fd;
1288                 s->io.registered = false;
1289
1290                 r = source_io_register(s, s->enabled, s->io.events);
1291                 if (r < 0) {
1292                         s->io.fd = saved_fd;
1293                         s->io.registered = true;
1294                         return r;
1295                 }
1296
1297                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1298         }
1299
1300         return 0;
1301 }
1302
1303 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1304         assert_return(s, -EINVAL);
1305         assert_return(events, -EINVAL);
1306         assert_return(s->type == SOURCE_IO, -EDOM);
1307         assert_return(!event_pid_changed(s->event), -ECHILD);
1308
1309         *events = s->io.events;
1310         return 0;
1311 }
1312
1313 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1314         int r;
1315
1316         assert_return(s, -EINVAL);
1317         assert_return(s->type == SOURCE_IO, -EDOM);
1318         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1319         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1320         assert_return(!event_pid_changed(s->event), -ECHILD);
1321
1322         /* edge-triggered updates are never skipped, so we can reset edges */
1323         if (s->io.events == events && !(events & EPOLLET))
1324                 return 0;
1325
1326         if (s->enabled != SD_EVENT_OFF) {
1327                 r = source_io_register(s, s->enabled, events);
1328                 if (r < 0)
1329                         return r;
1330         }
1331
1332         s->io.events = events;
1333         source_set_pending(s, false);
1334
1335         return 0;
1336 }
1337
1338 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1339         assert_return(s, -EINVAL);
1340         assert_return(revents, -EINVAL);
1341         assert_return(s->type == SOURCE_IO, -EDOM);
1342         assert_return(s->pending, -ENODATA);
1343         assert_return(!event_pid_changed(s->event), -ECHILD);
1344
1345         *revents = s->io.revents;
1346         return 0;
1347 }
1348
1349 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1350         assert_return(s, -EINVAL);
1351         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1352         assert_return(!event_pid_changed(s->event), -ECHILD);
1353
1354         return s->signal.sig;
1355 }
1356
1357 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1358         assert_return(s, -EINVAL);
1359         assert_return(!event_pid_changed(s->event), -ECHILD);
1360
1361         return s->priority;
1362 }
1363
1364 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1365         assert_return(s, -EINVAL);
1366         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1367         assert_return(!event_pid_changed(s->event), -ECHILD);
1368
1369         if (s->priority == priority)
1370                 return 0;
1371
1372         s->priority = priority;
1373
1374         if (s->pending)
1375                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1376
1377         if (s->prepare)
1378                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1379
1380         if (s->type == SOURCE_EXIT)
1381                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1382
1383         return 0;
1384 }
1385
1386 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1387         assert_return(s, -EINVAL);
1388         assert_return(m, -EINVAL);
1389         assert_return(!event_pid_changed(s->event), -ECHILD);
1390
1391         *m = s->enabled;
1392         return 0;
1393 }
1394
1395 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1396         int r;
1397
1398         assert_return(s, -EINVAL);
1399         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1400         assert_return(!event_pid_changed(s->event), -ECHILD);
1401
1402         /* If we are dead anyway, we are fine with turning off
1403          * sources, but everything else needs to fail. */
1404         if (s->event->state == SD_EVENT_FINISHED)
1405                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1406
1407         if (s->enabled == m)
1408                 return 0;
1409
1410         if (m == SD_EVENT_OFF) {
1411
1412                 switch (s->type) {
1413
1414                 case SOURCE_IO:
1415                         r = source_io_unregister(s);
1416                         if (r < 0)
1417                                 return r;
1418
1419                         s->enabled = m;
1420                         break;
1421
1422                 case SOURCE_TIME_REALTIME:
1423                 case SOURCE_TIME_BOOTTIME:
1424                 case SOURCE_TIME_MONOTONIC:
1425                 case SOURCE_TIME_REALTIME_ALARM:
1426                 case SOURCE_TIME_BOOTTIME_ALARM: {
1427                         struct clock_data *d;
1428
1429                         s->enabled = m;
1430                         d = event_get_clock_data(s->event, s->type);
1431                         assert(d);
1432
1433                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1434                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1435                         d->needs_rearm = true;
1436                         break;
1437                 }
1438
1439                 case SOURCE_SIGNAL:
1440                         s->enabled = m;
1441                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1442                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1443                                 event_update_signal_fd(s->event);
1444                         }
1445
1446                         break;
1447
1448                 case SOURCE_CHILD:
1449                         s->enabled = m;
1450
1451                         assert(s->event->n_enabled_child_sources > 0);
1452                         s->event->n_enabled_child_sources--;
1453
1454                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1455                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1456                                 event_update_signal_fd(s->event);
1457                         }
1458
1459                         break;
1460
1461                 case SOURCE_EXIT:
1462                         s->enabled = m;
1463                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1464                         break;
1465
1466                 case SOURCE_DEFER:
1467                 case SOURCE_POST:
1468                         s->enabled = m;
1469                         break;
1470
1471                 default:
1472                         assert_not_reached("Wut? I shouldn't exist.");
1473                 }
1474
1475         } else {
1476                 switch (s->type) {
1477
1478                 case SOURCE_IO:
1479                         r = source_io_register(s, m, s->io.events);
1480                         if (r < 0)
1481                                 return r;
1482
1483                         s->enabled = m;
1484                         break;
1485
1486                 case SOURCE_TIME_REALTIME:
1487                 case SOURCE_TIME_BOOTTIME:
1488                 case SOURCE_TIME_MONOTONIC:
1489                 case SOURCE_TIME_REALTIME_ALARM:
1490                 case SOURCE_TIME_BOOTTIME_ALARM: {
1491                         struct clock_data *d;
1492
1493                         s->enabled = m;
1494                         d = event_get_clock_data(s->event, s->type);
1495                         assert(d);
1496
1497                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1498                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1499                         d->needs_rearm = true;
1500                         break;
1501                 }
1502
1503                 case SOURCE_SIGNAL:
1504                         s->enabled = m;
1505
1506                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1507                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1508                                 event_update_signal_fd(s->event);
1509                         }
1510                         break;
1511
1512                 case SOURCE_CHILD:
1513                         if (s->enabled == SD_EVENT_OFF) {
1514                                 s->event->n_enabled_child_sources++;
1515
1516                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1517                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1518                                         event_update_signal_fd(s->event);
1519                                 }
1520                         }
1521
1522                         s->enabled = m;
1523                         break;
1524
1525                 case SOURCE_EXIT:
1526                         s->enabled = m;
1527                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1528                         break;
1529
1530                 case SOURCE_DEFER:
1531                 case SOURCE_POST:
1532                         s->enabled = m;
1533                         break;
1534
1535                 default:
1536                         assert_not_reached("Wut? I shouldn't exist.");
1537                 }
1538         }
1539
1540         if (s->pending)
1541                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1542
1543         if (s->prepare)
1544                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1545
1546         return 0;
1547 }
1548
1549 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1550         assert_return(s, -EINVAL);
1551         assert_return(usec, -EINVAL);
1552         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1553         assert_return(!event_pid_changed(s->event), -ECHILD);
1554
1555         *usec = s->time.next;
1556         return 0;
1557 }
1558
1559 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1560         struct clock_data *d;
1561
1562         assert_return(s, -EINVAL);
1563         assert_return(usec != (uint64_t) -1, -EINVAL);
1564         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1565         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1566         assert_return(!event_pid_changed(s->event), -ECHILD);
1567
1568         s->time.next = usec;
1569
1570         source_set_pending(s, false);
1571
1572         d = event_get_clock_data(s->event, s->type);
1573         assert(d);
1574
1575         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1576         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1577         d->needs_rearm = true;
1578
1579         return 0;
1580 }
1581
1582 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1583         assert_return(s, -EINVAL);
1584         assert_return(usec, -EINVAL);
1585         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1586         assert_return(!event_pid_changed(s->event), -ECHILD);
1587
1588         *usec = s->time.accuracy;
1589         return 0;
1590 }
1591
1592 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1593         struct clock_data *d;
1594
1595         assert_return(s, -EINVAL);
1596         assert_return(usec != (uint64_t) -1, -EINVAL);
1597         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1598         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1599         assert_return(!event_pid_changed(s->event), -ECHILD);
1600
1601         if (usec == 0)
1602                 usec = DEFAULT_ACCURACY_USEC;
1603
1604         s->time.accuracy = usec;
1605
1606         source_set_pending(s, false);
1607
1608         d = event_get_clock_data(s->event, s->type);
1609         assert(d);
1610
1611         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1612         d->needs_rearm = true;
1613
1614         return 0;
1615 }
1616
1617 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1618         assert_return(s, -EINVAL);
1619         assert_return(clock, -EINVAL);
1620         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1621         assert_return(!event_pid_changed(s->event), -ECHILD);
1622
1623         *clock = event_source_type_to_clock(s->type);
1624         return 0;
1625 }
1626
1627 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1628         assert_return(s, -EINVAL);
1629         assert_return(pid, -EINVAL);
1630         assert_return(s->type == SOURCE_CHILD, -EDOM);
1631         assert_return(!event_pid_changed(s->event), -ECHILD);
1632
1633         *pid = s->child.pid;
1634         return 0;
1635 }
1636
1637 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1638         int r;
1639
1640         assert_return(s, -EINVAL);
1641         assert_return(s->type != SOURCE_EXIT, -EDOM);
1642         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1643         assert_return(!event_pid_changed(s->event), -ECHILD);
1644
1645         if (s->prepare == callback)
1646                 return 0;
1647
1648         if (callback && s->prepare) {
1649                 s->prepare = callback;
1650                 return 0;
1651         }
1652
1653         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1654         if (r < 0)
1655                 return r;
1656
1657         s->prepare = callback;
1658
1659         if (callback) {
1660                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1661                 if (r < 0)
1662                         return r;
1663         } else
1664                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1665
1666         return 0;
1667 }
1668
1669 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1670         assert_return(s, NULL);
1671
1672         return s->userdata;
1673 }
1674
1675 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1676         void *ret;
1677
1678         assert_return(s, NULL);
1679
1680         ret = s->userdata;
1681         s->userdata = userdata;
1682
1683         return ret;
1684 }
1685
1686 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1687         usec_t c;
1688         assert(e);
1689         assert(a <= b);
1690
1691         if (a <= 0)
1692                 return 0;
1693
1694         if (b <= a + 1)
1695                 return a;
1696
1697         initialize_perturb(e);
1698
1699         /*
1700           Find a good time to wake up again between times a and b. We
1701           have two goals here:
1702
1703           a) We want to wake up as seldom as possible, hence prefer
1704              later times over earlier times.
1705
1706           b) But if we have to wake up, then let's make sure to
1707              dispatch as much as possible on the entire system.
1708
1709           We implement this by waking up everywhere at the same time
1710           within any given minute if we can, synchronised via the
1711           perturbation value determined from the boot ID. If we can't,
1712           then we try to find the same spot in every 10s, then 1s and
1713           then 250ms step. Otherwise, we pick the last possible time
1714           to wake up.
1715         */
1716
1717         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1718         if (c >= b) {
1719                 if (_unlikely_(c < USEC_PER_MINUTE))
1720                         return b;
1721
1722                 c -= USEC_PER_MINUTE;
1723         }
1724
1725         if (c >= a)
1726                 return c;
1727
1728         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1729         if (c >= b) {
1730                 if (_unlikely_(c < USEC_PER_SEC*10))
1731                         return b;
1732
1733                 c -= USEC_PER_SEC*10;
1734         }
1735
1736         if (c >= a)
1737                 return c;
1738
1739         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1740         if (c >= b) {
1741                 if (_unlikely_(c < USEC_PER_SEC))
1742                         return b;
1743
1744                 c -= USEC_PER_SEC;
1745         }
1746
1747         if (c >= a)
1748                 return c;
1749
1750         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1751         if (c >= b) {
1752                 if (_unlikely_(c < USEC_PER_MSEC*250))
1753                         return b;
1754
1755                 c -= USEC_PER_MSEC*250;
1756         }
1757
1758         if (c >= a)
1759                 return c;
1760
1761         return b;
1762 }
1763
1764 static int event_arm_timer(
1765                 sd_event *e,
1766                 struct clock_data *d) {
1767
1768         struct itimerspec its = {};
1769         sd_event_source *a, *b;
1770         usec_t t;
1771         int r;
1772
1773         assert(e);
1774         assert(d);
1775
1776         if (!d->needs_rearm)
1777                 return 0;
1778         else
1779                 d->needs_rearm = false;
1780
1781         a = prioq_peek(d->earliest);
1782         if (!a || a->enabled == SD_EVENT_OFF) {
1783
1784                 if (d->fd < 0)
1785                         return 0;
1786
1787                 if (d->next == USEC_INFINITY)
1788                         return 0;
1789
1790                 /* disarm */
1791                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1792                 if (r < 0)
1793                         return r;
1794
1795                 d->next = USEC_INFINITY;
1796                 return 0;
1797         }
1798
1799         b = prioq_peek(d->latest);
1800         assert_se(b && b->enabled != SD_EVENT_OFF);
1801
1802         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1803         if (d->next == t)
1804                 return 0;
1805
1806         assert_se(d->fd >= 0);
1807
1808         if (t == 0) {
1809                 /* We don' want to disarm here, just mean some time looooong ago. */
1810                 its.it_value.tv_sec = 0;
1811                 its.it_value.tv_nsec = 1;
1812         } else
1813                 timespec_store(&its.it_value, t);
1814
1815         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1816         if (r < 0)
1817                 return -errno;
1818
1819         d->next = t;
1820         return 0;
1821 }
1822
1823 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1824         assert(e);
1825         assert(s);
1826         assert(s->type == SOURCE_IO);
1827
1828         /* If the event source was already pending, we just OR in the
1829          * new revents, otherwise we reset the value. The ORing is
1830          * necessary to handle EPOLLONESHOT events properly where
1831          * readability might happen independently of writability, and
1832          * we need to keep track of both */
1833
1834         if (s->pending)
1835                 s->io.revents |= revents;
1836         else
1837                 s->io.revents = revents;
1838
1839         return source_set_pending(s, true);
1840 }
1841
1842 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1843         uint64_t x;
1844         ssize_t ss;
1845
1846         assert(e);
1847         assert(fd >= 0);
1848
1849         assert_return(events == EPOLLIN, -EIO);
1850
1851         ss = read(fd, &x, sizeof(x));
1852         if (ss < 0) {
1853                 if (errno == EAGAIN || errno == EINTR)
1854                         return 0;
1855
1856                 return -errno;
1857         }
1858
1859         if (_unlikely_(ss != sizeof(x)))
1860                 return -EIO;
1861
1862         if (next)
1863                 *next = USEC_INFINITY;
1864
1865         return 0;
1866 }
1867
1868 static int process_timer(
1869                 sd_event *e,
1870                 usec_t n,
1871                 struct clock_data *d) {
1872
1873         sd_event_source *s;
1874         int r;
1875
1876         assert(e);
1877         assert(d);
1878
1879         for (;;) {
1880                 s = prioq_peek(d->earliest);
1881                 if (!s ||
1882                     s->time.next > n ||
1883                     s->enabled == SD_EVENT_OFF ||
1884                     s->pending)
1885                         break;
1886
1887                 r = source_set_pending(s, true);
1888                 if (r < 0)
1889                         return r;
1890
1891                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1892                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1893                 d->needs_rearm = true;
1894         }
1895
1896         return 0;
1897 }
1898
1899 static int process_child(sd_event *e) {
1900         sd_event_source *s;
1901         Iterator i;
1902         int r;
1903
1904         assert(e);
1905
1906         e->need_process_child = false;
1907
1908         /*
1909            So, this is ugly. We iteratively invoke waitid() with P_PID
1910            + WNOHANG for each PID we wait for, instead of using
1911            P_ALL. This is because we only want to get child
1912            information of very specific child processes, and not all
1913            of them. We might not have processed the SIGCHLD even of a
1914            previous invocation and we don't want to maintain a
1915            unbounded *per-child* event queue, hence we really don't
1916            want anything flushed out of the kernel's queue that we
1917            don't care about. Since this is O(n) this means that if you
1918            have a lot of processes you probably want to handle SIGCHLD
1919            yourself.
1920
1921            We do not reap the children here (by using WNOWAIT), this
1922            is only done after the event source is dispatched so that
1923            the callback still sees the process as a zombie.
1924         */
1925
1926         HASHMAP_FOREACH(s, e->child_sources, i) {
1927                 assert(s->type == SOURCE_CHILD);
1928
1929                 if (s->pending)
1930                         continue;
1931
1932                 if (s->enabled == SD_EVENT_OFF)
1933                         continue;
1934
1935                 zero(s->child.siginfo);
1936                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1937                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1938                 if (r < 0)
1939                         return -errno;
1940
1941                 if (s->child.siginfo.si_pid != 0) {
1942                         bool zombie =
1943                                 s->child.siginfo.si_code == CLD_EXITED ||
1944                                 s->child.siginfo.si_code == CLD_KILLED ||
1945                                 s->child.siginfo.si_code == CLD_DUMPED;
1946
1947                         if (!zombie && (s->child.options & WEXITED)) {
1948                                 /* If the child isn't dead then let's
1949                                  * immediately remove the state change
1950                                  * from the queue, since there's no
1951                                  * benefit in leaving it queued */
1952
1953                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1954                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1955                         }
1956
1957                         r = source_set_pending(s, true);
1958                         if (r < 0)
1959                                 return r;
1960                 }
1961         }
1962
1963         return 0;
1964 }
1965
1966 static int process_signal(sd_event *e, uint32_t events) {
1967         bool read_one = false;
1968         int r;
1969
1970         assert(e);
1971
1972         assert_return(events == EPOLLIN, -EIO);
1973
1974         for (;;) {
1975                 struct signalfd_siginfo si;
1976                 ssize_t n;
1977                 sd_event_source *s = NULL;
1978
1979                 n = read(e->signal_fd, &si, sizeof(si));
1980                 if (n < 0) {
1981                         if (errno == EAGAIN || errno == EINTR)
1982                                 return read_one;
1983
1984                         return -errno;
1985                 }
1986
1987                 if (_unlikely_(n != sizeof(si)))
1988                         return -EIO;
1989
1990                 assert(si.ssi_signo < _NSIG);
1991
1992                 read_one = true;
1993
1994                 if (si.ssi_signo == SIGCHLD) {
1995                         r = process_child(e);
1996                         if (r < 0)
1997                                 return r;
1998                         if (r > 0)
1999                                 continue;
2000                 }
2001
2002                 if (e->signal_sources)
2003                         s = e->signal_sources[si.ssi_signo];
2004
2005                 if (!s)
2006                         continue;
2007
2008                 s->signal.siginfo = si;
2009                 r = source_set_pending(s, true);
2010                 if (r < 0)
2011                         return r;
2012         }
2013 }
2014
2015 static int source_dispatch(sd_event_source *s) {
2016         int r = 0;
2017
2018         assert(s);
2019         assert(s->pending || s->type == SOURCE_EXIT);
2020
2021         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2022                 r = source_set_pending(s, false);
2023                 if (r < 0)
2024                         return r;
2025         }
2026
2027         if (s->type != SOURCE_POST) {
2028                 sd_event_source *z;
2029                 Iterator i;
2030
2031                 /* If we execute a non-post source, let's mark all
2032                  * post sources as pending */
2033
2034                 SET_FOREACH(z, s->event->post_sources, i) {
2035                         if (z->enabled == SD_EVENT_OFF)
2036                                 continue;
2037
2038                         r = source_set_pending(z, true);
2039                         if (r < 0)
2040                                 return r;
2041                 }
2042         }
2043
2044         if (s->enabled == SD_EVENT_ONESHOT) {
2045                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2046                 if (r < 0)
2047                         return r;
2048         }
2049
2050         s->dispatching = true;
2051
2052         switch (s->type) {
2053
2054         case SOURCE_IO:
2055                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2056                 break;
2057
2058         case SOURCE_TIME_REALTIME:
2059         case SOURCE_TIME_BOOTTIME:
2060         case SOURCE_TIME_MONOTONIC:
2061         case SOURCE_TIME_REALTIME_ALARM:
2062         case SOURCE_TIME_BOOTTIME_ALARM:
2063                 r = s->time.callback(s, s->time.next, s->userdata);
2064                 break;
2065
2066         case SOURCE_SIGNAL:
2067                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2068                 break;
2069
2070         case SOURCE_CHILD: {
2071                 bool zombie;
2072
2073                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2074                          s->child.siginfo.si_code == CLD_KILLED ||
2075                          s->child.siginfo.si_code == CLD_DUMPED;
2076
2077                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2078
2079                 /* Now, reap the PID for good. */
2080                 if (zombie)
2081                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2082
2083                 break;
2084         }
2085
2086         case SOURCE_DEFER:
2087                 r = s->defer.callback(s, s->userdata);
2088                 break;
2089
2090         case SOURCE_POST:
2091                 r = s->post.callback(s, s->userdata);
2092                 break;
2093
2094         case SOURCE_EXIT:
2095                 r = s->exit.callback(s, s->userdata);
2096                 break;
2097
2098         case SOURCE_WATCHDOG:
2099         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2100         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2101                 assert_not_reached("Wut? I shouldn't exist.");
2102         }
2103
2104         s->dispatching = false;
2105
2106         if (r < 0) {
2107                 if (s->name)
2108                         log_debug("Event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2109                 else
2110                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2111         }
2112
2113         if (s->n_ref == 0)
2114                 source_free(s);
2115         else if (r < 0)
2116                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2117
2118         return 1;
2119 }
2120
2121 static int event_prepare(sd_event *e) {
2122         int r;
2123
2124         assert(e);
2125
2126         for (;;) {
2127                 sd_event_source *s;
2128
2129                 s = prioq_peek(e->prepare);
2130                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2131                         break;
2132
2133                 s->prepare_iteration = e->iteration;
2134                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2135                 if (r < 0)
2136                         return r;
2137
2138                 assert(s->prepare);
2139
2140                 s->dispatching = true;
2141                 r = s->prepare(s, s->userdata);
2142                 s->dispatching = false;
2143
2144                 if (r < 0) {
2145                         if (s->name)
2146                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2147                         else
2148                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2149                 }
2150
2151                 if (s->n_ref == 0)
2152                         source_free(s);
2153                 else if (r < 0)
2154                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2155         }
2156
2157         return 0;
2158 }
2159
2160 static int dispatch_exit(sd_event *e) {
2161         sd_event_source *p;
2162         int r;
2163
2164         assert(e);
2165
2166         p = prioq_peek(e->exit);
2167         if (!p || p->enabled == SD_EVENT_OFF) {
2168                 e->state = SD_EVENT_FINISHED;
2169                 return 0;
2170         }
2171
2172         sd_event_ref(e);
2173         e->iteration++;
2174         e->state = SD_EVENT_EXITING;
2175
2176         r = source_dispatch(p);
2177
2178         e->state = SD_EVENT_PASSIVE;
2179         sd_event_unref(e);
2180
2181         return r;
2182 }
2183
2184 static sd_event_source* event_next_pending(sd_event *e) {
2185         sd_event_source *p;
2186
2187         assert(e);
2188
2189         p = prioq_peek(e->pending);
2190         if (!p)
2191                 return NULL;
2192
2193         if (p->enabled == SD_EVENT_OFF)
2194                 return NULL;
2195
2196         return p;
2197 }
2198
2199 static int arm_watchdog(sd_event *e) {
2200         struct itimerspec its = {};
2201         usec_t t;
2202         int r;
2203
2204         assert(e);
2205         assert(e->watchdog_fd >= 0);
2206
2207         t = sleep_between(e,
2208                           e->watchdog_last + (e->watchdog_period / 2),
2209                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2210
2211         timespec_store(&its.it_value, t);
2212
2213         /* Make sure we never set the watchdog to 0, which tells the
2214          * kernel to disable it. */
2215         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2216                 its.it_value.tv_nsec = 1;
2217
2218         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2219         if (r < 0)
2220                 return -errno;
2221
2222         return 0;
2223 }
2224
2225 static int process_watchdog(sd_event *e) {
2226         assert(e);
2227
2228         if (!e->watchdog)
2229                 return 0;
2230
2231         /* Don't notify watchdog too often */
2232         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2233                 return 0;
2234
2235         sd_notify(false, "WATCHDOG=1");
2236         e->watchdog_last = e->timestamp.monotonic;
2237
2238         return arm_watchdog(e);
2239 }
2240
2241 _public_ int sd_event_prepare(sd_event *e) {
2242         int r;
2243
2244         assert_return(e, -EINVAL);
2245         assert_return(!event_pid_changed(e), -ECHILD);
2246         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2247         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2248
2249         if (e->exit_requested)
2250                 goto pending;
2251
2252         e->iteration++;
2253
2254         r = event_prepare(e);
2255         if (r < 0)
2256                 return r;
2257
2258         r = event_arm_timer(e, &e->realtime);
2259         if (r < 0)
2260                 return r;
2261
2262         r = event_arm_timer(e, &e->boottime);
2263         if (r < 0)
2264                 return r;
2265
2266         r = event_arm_timer(e, &e->monotonic);
2267         if (r < 0)
2268                 return r;
2269
2270         r = event_arm_timer(e, &e->realtime_alarm);
2271         if (r < 0)
2272                 return r;
2273
2274         r = event_arm_timer(e, &e->boottime_alarm);
2275         if (r < 0)
2276                 return r;
2277
2278         if (event_next_pending(e) || e->need_process_child)
2279                 goto pending;
2280
2281         e->state = SD_EVENT_PREPARED;
2282
2283         return 0;
2284
2285 pending:
2286         e->state = SD_EVENT_PREPARED;
2287         r = sd_event_wait(e, 0);
2288         if (r == 0)
2289                 e->state = SD_EVENT_PREPARED;
2290
2291         return r;
2292 }
2293
2294 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2295         struct epoll_event *ev_queue;
2296         unsigned ev_queue_max;
2297         int r, m, i;
2298
2299         assert_return(e, -EINVAL);
2300         assert_return(!event_pid_changed(e), -ECHILD);
2301         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2302         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2303
2304         if (e->exit_requested) {
2305                 e->state = SD_EVENT_PENDING;
2306                 return 1;
2307         }
2308
2309         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2310         ev_queue = newa(struct epoll_event, ev_queue_max);
2311
2312         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2313                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2314         if (m < 0) {
2315                 if (errno == EINTR) {
2316                         e->state = SD_EVENT_PENDING;
2317                         return 1;
2318                 }
2319
2320                 r = -errno;
2321
2322                 goto finish;
2323         }
2324
2325         dual_timestamp_get(&e->timestamp);
2326         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2327
2328         for (i = 0; i < m; i++) {
2329
2330                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2331                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2332                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2333                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2334                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2335                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2336                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2337                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2338                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2339                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2340                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2341                         r = process_signal(e, ev_queue[i].events);
2342                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2343                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2344                 else
2345                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2346
2347                 if (r < 0)
2348                         goto finish;
2349         }
2350
2351         r = process_watchdog(e);
2352         if (r < 0)
2353                 goto finish;
2354
2355         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2356         if (r < 0)
2357                 goto finish;
2358
2359         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2360         if (r < 0)
2361                 goto finish;
2362
2363         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2364         if (r < 0)
2365                 goto finish;
2366
2367         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2368         if (r < 0)
2369                 goto finish;
2370
2371         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2372         if (r < 0)
2373                 goto finish;
2374
2375         if (e->need_process_child) {
2376                 r = process_child(e);
2377                 if (r < 0)
2378                         goto finish;
2379         }
2380
2381         if (event_next_pending(e)) {
2382                 e->state = SD_EVENT_PENDING;
2383
2384                 return 1;
2385         }
2386
2387         r = 0;
2388
2389 finish:
2390         e->state = SD_EVENT_PASSIVE;
2391
2392         return r;
2393 }
2394
2395 _public_ int sd_event_dispatch(sd_event *e) {
2396         sd_event_source *p;
2397         int r;
2398
2399         assert_return(e, -EINVAL);
2400         assert_return(!event_pid_changed(e), -ECHILD);
2401         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2402         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2403
2404         if (e->exit_requested)
2405                 return dispatch_exit(e);
2406
2407         p = event_next_pending(e);
2408         if (p) {
2409                 sd_event_ref(e);
2410
2411                 e->state = SD_EVENT_RUNNING;
2412                 r = source_dispatch(p);
2413                 e->state = SD_EVENT_PASSIVE;
2414
2415                 sd_event_unref(e);
2416
2417                 return r;
2418         }
2419
2420         e->state = SD_EVENT_PASSIVE;
2421
2422         return 1;
2423 }
2424
2425 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2426         int r;
2427
2428         assert_return(e, -EINVAL);
2429         assert_return(!event_pid_changed(e), -ECHILD);
2430         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2431         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2432
2433         r = sd_event_prepare(e);
2434         if (r > 0)
2435                 return sd_event_dispatch(e);
2436         else if (r < 0)
2437                 return r;
2438
2439         r = sd_event_wait(e, timeout);
2440         if (r > 0)
2441                 return sd_event_dispatch(e);
2442         else
2443                 return r;
2444 }
2445
2446 _public_ int sd_event_loop(sd_event *e) {
2447         int r;
2448
2449         assert_return(e, -EINVAL);
2450         assert_return(!event_pid_changed(e), -ECHILD);
2451         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2452
2453         sd_event_ref(e);
2454
2455         while (e->state != SD_EVENT_FINISHED) {
2456                 r = sd_event_run(e, (uint64_t) -1);
2457                 if (r < 0)
2458                         goto finish;
2459         }
2460
2461         r = e->exit_code;
2462
2463 finish:
2464         sd_event_unref(e);
2465         return r;
2466 }
2467
2468 _public_ int sd_event_get_fd(sd_event *e) {
2469
2470         assert_return(e, -EINVAL);
2471         assert_return(!event_pid_changed(e), -ECHILD);
2472
2473         return e->epoll_fd;
2474 }
2475
2476 _public_ int sd_event_get_state(sd_event *e) {
2477         assert_return(e, -EINVAL);
2478         assert_return(!event_pid_changed(e), -ECHILD);
2479
2480         return e->state;
2481 }
2482
2483 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2484         assert_return(e, -EINVAL);
2485         assert_return(code, -EINVAL);
2486         assert_return(!event_pid_changed(e), -ECHILD);
2487
2488         if (!e->exit_requested)
2489                 return -ENODATA;
2490
2491         *code = e->exit_code;
2492         return 0;
2493 }
2494
2495 _public_ int sd_event_exit(sd_event *e, int code) {
2496         assert_return(e, -EINVAL);
2497         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2498         assert_return(!event_pid_changed(e), -ECHILD);
2499
2500         e->exit_requested = true;
2501         e->exit_code = code;
2502
2503         return 0;
2504 }
2505
2506 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2507         assert_return(e, -EINVAL);
2508         assert_return(usec, -EINVAL);
2509         assert_return(!event_pid_changed(e), -ECHILD);
2510
2511         /* If we haven't run yet, just get the actual time */
2512         if (!dual_timestamp_is_set(&e->timestamp))
2513                 return -ENODATA;
2514
2515         switch (clock) {
2516
2517         case CLOCK_REALTIME:
2518         case CLOCK_REALTIME_ALARM:
2519                 *usec = e->timestamp.realtime;
2520                 break;
2521
2522         case CLOCK_MONOTONIC:
2523                 *usec = e->timestamp.monotonic;
2524                 break;
2525
2526         case CLOCK_BOOTTIME:
2527         case CLOCK_BOOTTIME_ALARM:
2528                 *usec = e->timestamp_boottime;
2529                 break;
2530         }
2531
2532         return 0;
2533 }
2534
2535 _public_ int sd_event_default(sd_event **ret) {
2536
2537         static thread_local sd_event *default_event = NULL;
2538         sd_event *e = NULL;
2539         int r;
2540
2541         if (!ret)
2542                 return !!default_event;
2543
2544         if (default_event) {
2545                 *ret = sd_event_ref(default_event);
2546                 return 0;
2547         }
2548
2549         r = sd_event_new(&e);
2550         if (r < 0)
2551                 return r;
2552
2553         e->default_event_ptr = &default_event;
2554         e->tid = gettid();
2555         default_event = e;
2556
2557         *ret = e;
2558         return 1;
2559 }
2560
2561 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2562         assert_return(e, -EINVAL);
2563         assert_return(tid, -EINVAL);
2564         assert_return(!event_pid_changed(e), -ECHILD);
2565
2566         if (e->tid != 0) {
2567                 *tid = e->tid;
2568                 return 0;
2569         }
2570
2571         return -ENXIO;
2572 }
2573
2574 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2575         int r;
2576
2577         assert_return(e, -EINVAL);
2578         assert_return(!event_pid_changed(e), -ECHILD);
2579
2580         if (e->watchdog == !!b)
2581                 return e->watchdog;
2582
2583         if (b) {
2584                 struct epoll_event ev = {};
2585
2586                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2587                 if (r <= 0)
2588                         return r;
2589
2590                 /* Issue first ping immediately */
2591                 sd_notify(false, "WATCHDOG=1");
2592                 e->watchdog_last = now(CLOCK_MONOTONIC);
2593
2594                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2595                 if (e->watchdog_fd < 0)
2596                         return -errno;
2597
2598                 r = arm_watchdog(e);
2599                 if (r < 0)
2600                         goto fail;
2601
2602                 ev.events = EPOLLIN;
2603                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2604
2605                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2606                 if (r < 0) {
2607                         r = -errno;
2608                         goto fail;
2609                 }
2610
2611         } else {
2612                 if (e->watchdog_fd >= 0) {
2613                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2614                         e->watchdog_fd = safe_close(e->watchdog_fd);
2615                 }
2616         }
2617
2618         e->watchdog = !!b;
2619         return e->watchdog;
2620
2621 fail:
2622         e->watchdog_fd = safe_close(e->watchdog_fd);
2623         return r;
2624 }
2625
2626 _public_ int sd_event_get_watchdog(sd_event *e) {
2627         assert_return(e, -EINVAL);
2628         assert_return(!event_pid_changed(e), -ECHILD);
2629
2630         return e->watchdog;
2631 }