chiark / gitweb /
c5f062b3e00409bd4f2d20dc67cfc4ae2e87a4b7
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *name;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static void source_disconnect(sd_event_source *s) {
602         sd_event *event;
603
604         assert(s);
605
606         if (!s->event)
607                 return;
608
609         assert(s->event->n_sources > 0);
610
611         switch (s->type) {
612
613         case SOURCE_IO:
614                 if (s->io.fd >= 0)
615                         source_io_unregister(s);
616
617                 break;
618
619         case SOURCE_TIME_REALTIME:
620         case SOURCE_TIME_BOOTTIME:
621         case SOURCE_TIME_MONOTONIC:
622         case SOURCE_TIME_REALTIME_ALARM:
623         case SOURCE_TIME_BOOTTIME_ALARM: {
624                 struct clock_data *d;
625
626                 d = event_get_clock_data(s->event, s->type);
627                 assert(d);
628
629                 prioq_remove(d->earliest, s, &s->time.earliest_index);
630                 prioq_remove(d->latest, s, &s->time.latest_index);
631                 d->needs_rearm = true;
632                 break;
633         }
634
635         case SOURCE_SIGNAL:
636                 if (s->signal.sig > 0) {
637                         if (s->event->signal_sources)
638                                 s->event->signal_sources[s->signal.sig] = NULL;
639
640                         /* If the signal was on and now it is off... */
641                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
642                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
643                         }
644                 }
645
646                 break;
647
648         case SOURCE_CHILD:
649                 if (s->child.pid > 0) {
650                         if (s->enabled != SD_EVENT_OFF) {
651                                 assert(s->event->n_enabled_child_sources > 0);
652                                 s->event->n_enabled_child_sources--;
653
654                                 /* We know the signal was on, if it is off now... */
655                                 if (!need_signal(s->event, SIGCHLD)) {
656                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
657                                 }
658                         }
659
660                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
661                 }
662
663                 break;
664
665         case SOURCE_DEFER:
666                 /* nothing */
667                 break;
668
669         case SOURCE_POST:
670                 set_remove(s->event->post_sources, s);
671                 break;
672
673         case SOURCE_EXIT:
674                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
675                 break;
676
677         default:
678                 assert_not_reached("Wut? I shouldn't exist.");
679         }
680
681         if (s->pending)
682                 prioq_remove(s->event->pending, s, &s->pending_index);
683
684         if (s->prepare)
685                 prioq_remove(s->event->prepare, s, &s->prepare_index);
686
687         event = s->event;
688
689         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
690         s->event = NULL;
691         LIST_REMOVE(sources, event->sources, s);
692         event->n_sources--;
693
694         if (!s->floating)
695                 sd_event_unref(event);
696 }
697
698 static void source_free(sd_event_source *s) {
699         assert(s);
700
701         source_disconnect(s);
702         free(s->name);
703         free(s);
704 }
705
706 static int source_set_pending(sd_event_source *s, bool b) {
707         int r;
708
709         assert(s);
710         assert(s->type != SOURCE_EXIT);
711
712         if (s->pending == b)
713                 return 0;
714
715         s->pending = b;
716
717         if (b) {
718                 s->pending_iteration = s->event->iteration;
719
720                 r = prioq_put(s->event->pending, s, &s->pending_index);
721                 if (r < 0) {
722                         s->pending = false;
723                         return r;
724                 }
725         } else
726                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
727
728         if (EVENT_SOURCE_IS_TIME(s->type)) {
729                 struct clock_data *d;
730
731                 d = event_get_clock_data(s->event, s->type);
732                 assert(d);
733
734                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
735                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
736                 d->needs_rearm = true;
737         }
738
739         return 0;
740 }
741
742 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
743         sd_event_source *s;
744
745         assert(e);
746
747         s = new0(sd_event_source, 1);
748         if (!s)
749                 return NULL;
750
751         s->n_ref = 1;
752         s->event = e;
753         s->floating = floating;
754         s->type = type;
755         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
756
757         if (!floating)
758                 sd_event_ref(e);
759
760         LIST_PREPEND(sources, e->sources, s);
761         e->n_sources ++;
762
763         return s;
764 }
765
766 _public_ int sd_event_add_io(
767                 sd_event *e,
768                 sd_event_source **ret,
769                 int fd,
770                 uint32_t events,
771                 sd_event_io_handler_t callback,
772                 void *userdata) {
773
774         sd_event_source *s;
775         int r;
776
777         assert_return(e, -EINVAL);
778         assert_return(fd >= 0, -EINVAL);
779         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
780         assert_return(callback, -EINVAL);
781         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
782         assert_return(!event_pid_changed(e), -ECHILD);
783
784         s = source_new(e, !ret, SOURCE_IO);
785         if (!s)
786                 return -ENOMEM;
787
788         s->io.fd = fd;
789         s->io.events = events;
790         s->io.callback = callback;
791         s->userdata = userdata;
792         s->enabled = SD_EVENT_ON;
793
794         r = source_io_register(s, s->enabled, events);
795         if (r < 0) {
796                 source_free(s);
797                 return r;
798         }
799
800         if (ret)
801                 *ret = s;
802
803         return 0;
804 }
805
806 static void initialize_perturb(sd_event *e) {
807         sd_id128_t bootid = {};
808
809         /* When we sleep for longer, we try to realign the wakeup to
810            the same time wihtin each minute/second/250ms, so that
811            events all across the system can be coalesced into a single
812            CPU wakeup. However, let's take some system-specific
813            randomness for this value, so that in a network of systems
814            with synced clocks timer events are distributed a
815            bit. Here, we calculate a perturbation usec offset from the
816            boot ID. */
817
818         if (_likely_(e->perturb != USEC_INFINITY))
819                 return;
820
821         if (sd_id128_get_boot(&bootid) >= 0)
822                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
823 }
824
825 static int event_setup_timer_fd(
826                 sd_event *e,
827                 struct clock_data *d,
828                 clockid_t clock) {
829
830         struct epoll_event ev = {};
831         int r, fd;
832
833         assert(e);
834         assert(d);
835
836         if (_likely_(d->fd >= 0))
837                 return 0;
838
839         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
840         if (fd < 0)
841                 return -errno;
842
843         ev.events = EPOLLIN;
844         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
845
846         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
847         if (r < 0) {
848                 safe_close(fd);
849                 return -errno;
850         }
851
852         d->fd = fd;
853         return 0;
854 }
855
856 _public_ int sd_event_add_time(
857                 sd_event *e,
858                 sd_event_source **ret,
859                 clockid_t clock,
860                 uint64_t usec,
861                 uint64_t accuracy,
862                 sd_event_time_handler_t callback,
863                 void *userdata) {
864
865         EventSourceType type;
866         sd_event_source *s;
867         struct clock_data *d;
868         int r;
869
870         assert_return(e, -EINVAL);
871         assert_return(usec != (uint64_t) -1, -EINVAL);
872         assert_return(accuracy != (uint64_t) -1, -EINVAL);
873         assert_return(callback, -EINVAL);
874         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
875         assert_return(!event_pid_changed(e), -ECHILD);
876
877         type = clock_to_event_source_type(clock);
878         assert_return(type >= 0, -ENOTSUP);
879
880         d = event_get_clock_data(e, type);
881         assert(d);
882
883         if (!d->earliest) {
884                 d->earliest = prioq_new(earliest_time_prioq_compare);
885                 if (!d->earliest)
886                         return -ENOMEM;
887         }
888
889         if (!d->latest) {
890                 d->latest = prioq_new(latest_time_prioq_compare);
891                 if (!d->latest)
892                         return -ENOMEM;
893         }
894
895         if (d->fd < 0) {
896                 r = event_setup_timer_fd(e, d, clock);
897                 if (r < 0)
898                         return r;
899         }
900
901         s = source_new(e, !ret, type);
902         if (!s)
903                 return -ENOMEM;
904
905         s->time.next = usec;
906         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
907         s->time.callback = callback;
908         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
909         s->userdata = userdata;
910         s->enabled = SD_EVENT_ONESHOT;
911
912         d->needs_rearm = true;
913
914         r = prioq_put(d->earliest, s, &s->time.earliest_index);
915         if (r < 0)
916                 goto fail;
917
918         r = prioq_put(d->latest, s, &s->time.latest_index);
919         if (r < 0)
920                 goto fail;
921
922         if (ret)
923                 *ret = s;
924
925         return 0;
926
927 fail:
928         source_free(s);
929         return r;
930 }
931
932 static int event_update_signal_fd(sd_event *e) {
933         struct epoll_event ev = {};
934         bool add_to_epoll;
935         int r;
936
937         assert(e);
938
939         add_to_epoll = e->signal_fd < 0;
940
941         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
942         if (r < 0)
943                 return -errno;
944
945         e->signal_fd = r;
946
947         if (!add_to_epoll)
948                 return 0;
949
950         ev.events = EPOLLIN;
951         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
952
953         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
954         if (r < 0) {
955                 e->signal_fd = safe_close(e->signal_fd);
956                 return -errno;
957         }
958
959         return 0;
960 }
961
962 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
963         assert(s);
964
965         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
966 }
967
968 _public_ int sd_event_add_signal(
969                 sd_event *e,
970                 sd_event_source **ret,
971                 int sig,
972                 sd_event_signal_handler_t callback,
973                 void *userdata) {
974
975         sd_event_source *s;
976         sigset_t ss;
977         int r;
978         bool previous;
979
980         assert_return(e, -EINVAL);
981         assert_return(sig > 0, -EINVAL);
982         assert_return(sig < _NSIG, -EINVAL);
983         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
984         assert_return(!event_pid_changed(e), -ECHILD);
985
986         if (!callback)
987                 callback = signal_exit_callback;
988
989         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
990         if (r < 0)
991                 return -errno;
992
993         if (!sigismember(&ss, sig))
994                 return -EBUSY;
995
996         if (!e->signal_sources) {
997                 e->signal_sources = new0(sd_event_source*, _NSIG);
998                 if (!e->signal_sources)
999                         return -ENOMEM;
1000         } else if (e->signal_sources[sig])
1001                 return -EBUSY;
1002
1003         previous = need_signal(e, sig);
1004
1005         s = source_new(e, !ret, SOURCE_SIGNAL);
1006         if (!s)
1007                 return -ENOMEM;
1008
1009         s->signal.sig = sig;
1010         s->signal.callback = callback;
1011         s->userdata = userdata;
1012         s->enabled = SD_EVENT_ON;
1013
1014         e->signal_sources[sig] = s;
1015
1016         if (!previous) {
1017                 assert_se(sigaddset(&e->sigset, sig) == 0);
1018
1019                 r = event_update_signal_fd(e);
1020                 if (r < 0) {
1021                         source_free(s);
1022                         return r;
1023                 }
1024         }
1025
1026         if (ret)
1027                 *ret = s;
1028
1029         return 0;
1030 }
1031
1032 _public_ int sd_event_add_child(
1033                 sd_event *e,
1034                 sd_event_source **ret,
1035                 pid_t pid,
1036                 int options,
1037                 sd_event_child_handler_t callback,
1038                 void *userdata) {
1039
1040         sd_event_source *s;
1041         int r;
1042         bool previous;
1043
1044         assert_return(e, -EINVAL);
1045         assert_return(pid > 1, -EINVAL);
1046         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1047         assert_return(options != 0, -EINVAL);
1048         assert_return(callback, -EINVAL);
1049         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1050         assert_return(!event_pid_changed(e), -ECHILD);
1051
1052         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1053         if (r < 0)
1054                 return r;
1055
1056         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1057                 return -EBUSY;
1058
1059         previous = need_signal(e, SIGCHLD);
1060
1061         s = source_new(e, !ret, SOURCE_CHILD);
1062         if (!s)
1063                 return -ENOMEM;
1064
1065         s->child.pid = pid;
1066         s->child.options = options;
1067         s->child.callback = callback;
1068         s->userdata = userdata;
1069         s->enabled = SD_EVENT_ONESHOT;
1070
1071         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1072         if (r < 0) {
1073                 source_free(s);
1074                 return r;
1075         }
1076
1077         e->n_enabled_child_sources ++;
1078
1079         if (!previous) {
1080                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1081
1082                 r = event_update_signal_fd(e);
1083                 if (r < 0) {
1084                         source_free(s);
1085                         return r;
1086                 }
1087         }
1088
1089         e->need_process_child = true;
1090
1091         if (ret)
1092                 *ret = s;
1093
1094         return 0;
1095 }
1096
1097 _public_ int sd_event_add_defer(
1098                 sd_event *e,
1099                 sd_event_source **ret,
1100                 sd_event_handler_t callback,
1101                 void *userdata) {
1102
1103         sd_event_source *s;
1104         int r;
1105
1106         assert_return(e, -EINVAL);
1107         assert_return(callback, -EINVAL);
1108         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1109         assert_return(!event_pid_changed(e), -ECHILD);
1110
1111         s = source_new(e, !ret, SOURCE_DEFER);
1112         if (!s)
1113                 return -ENOMEM;
1114
1115         s->defer.callback = callback;
1116         s->userdata = userdata;
1117         s->enabled = SD_EVENT_ONESHOT;
1118
1119         r = source_set_pending(s, true);
1120         if (r < 0) {
1121                 source_free(s);
1122                 return r;
1123         }
1124
1125         if (ret)
1126                 *ret = s;
1127
1128         return 0;
1129 }
1130
1131 _public_ int sd_event_add_post(
1132                 sd_event *e,
1133                 sd_event_source **ret,
1134                 sd_event_handler_t callback,
1135                 void *userdata) {
1136
1137         sd_event_source *s;
1138         int r;
1139
1140         assert_return(e, -EINVAL);
1141         assert_return(callback, -EINVAL);
1142         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1143         assert_return(!event_pid_changed(e), -ECHILD);
1144
1145         r = set_ensure_allocated(&e->post_sources, NULL);
1146         if (r < 0)
1147                 return r;
1148
1149         s = source_new(e, !ret, SOURCE_POST);
1150         if (!s)
1151                 return -ENOMEM;
1152
1153         s->post.callback = callback;
1154         s->userdata = userdata;
1155         s->enabled = SD_EVENT_ON;
1156
1157         r = set_put(e->post_sources, s);
1158         if (r < 0) {
1159                 source_free(s);
1160                 return r;
1161         }
1162
1163         if (ret)
1164                 *ret = s;
1165
1166         return 0;
1167 }
1168
1169 _public_ int sd_event_add_exit(
1170                 sd_event *e,
1171                 sd_event_source **ret,
1172                 sd_event_handler_t callback,
1173                 void *userdata) {
1174
1175         sd_event_source *s;
1176         int r;
1177
1178         assert_return(e, -EINVAL);
1179         assert_return(callback, -EINVAL);
1180         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1181         assert_return(!event_pid_changed(e), -ECHILD);
1182
1183         if (!e->exit) {
1184                 e->exit = prioq_new(exit_prioq_compare);
1185                 if (!e->exit)
1186                         return -ENOMEM;
1187         }
1188
1189         s = source_new(e, !ret, SOURCE_EXIT);
1190         if (!s)
1191                 return -ENOMEM;
1192
1193         s->exit.callback = callback;
1194         s->userdata = userdata;
1195         s->exit.prioq_index = PRIOQ_IDX_NULL;
1196         s->enabled = SD_EVENT_ONESHOT;
1197
1198         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1199         if (r < 0) {
1200                 source_free(s);
1201                 return r;
1202         }
1203
1204         if (ret)
1205                 *ret = s;
1206
1207         return 0;
1208 }
1209
1210 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1211         assert_return(s, NULL);
1212
1213         assert(s->n_ref >= 1);
1214         s->n_ref++;
1215
1216         return s;
1217 }
1218
1219 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1220
1221         if (!s)
1222                 return NULL;
1223
1224         assert(s->n_ref >= 1);
1225         s->n_ref--;
1226
1227         if (s->n_ref <= 0) {
1228                 /* Here's a special hack: when we are called from a
1229                  * dispatch handler we won't free the event source
1230                  * immediately, but we will detach the fd from the
1231                  * epoll. This way it is safe for the caller to unref
1232                  * the event source and immediately close the fd, but
1233                  * we still retain a valid event source object after
1234                  * the callback. */
1235
1236                 if (s->dispatching) {
1237                         if (s->type == SOURCE_IO)
1238                                 source_io_unregister(s);
1239
1240                         source_disconnect(s);
1241                 } else
1242                         source_free(s);
1243         }
1244
1245         return NULL;
1246 }
1247
1248 _public_ int sd_event_source_set_name(sd_event_source *s, const char *name) {
1249         assert_return(s, -EINVAL);
1250
1251         return free_and_strdup(&s->name, name);
1252 }
1253
1254 _public_ int sd_event_source_get_name(sd_event_source *s, const char **name) {
1255         assert_return(s, -EINVAL);
1256         assert_return(name, -EINVAL);
1257
1258         *name = s->name;
1259
1260         return 0;
1261 }
1262
1263 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1264         assert_return(s, NULL);
1265
1266         return s->event;
1267 }
1268
1269 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1270         assert_return(s, -EINVAL);
1271         assert_return(s->type != SOURCE_EXIT, -EDOM);
1272         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1273         assert_return(!event_pid_changed(s->event), -ECHILD);
1274
1275         return s->pending;
1276 }
1277
1278 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1279         assert_return(s, -EINVAL);
1280         assert_return(s->type == SOURCE_IO, -EDOM);
1281         assert_return(!event_pid_changed(s->event), -ECHILD);
1282
1283         return s->io.fd;
1284 }
1285
1286 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1287         int r;
1288
1289         assert_return(s, -EINVAL);
1290         assert_return(fd >= 0, -EINVAL);
1291         assert_return(s->type == SOURCE_IO, -EDOM);
1292         assert_return(!event_pid_changed(s->event), -ECHILD);
1293
1294         if (s->io.fd == fd)
1295                 return 0;
1296
1297         if (s->enabled == SD_EVENT_OFF) {
1298                 s->io.fd = fd;
1299                 s->io.registered = false;
1300         } else {
1301                 int saved_fd;
1302
1303                 saved_fd = s->io.fd;
1304                 assert(s->io.registered);
1305
1306                 s->io.fd = fd;
1307                 s->io.registered = false;
1308
1309                 r = source_io_register(s, s->enabled, s->io.events);
1310                 if (r < 0) {
1311                         s->io.fd = saved_fd;
1312                         s->io.registered = true;
1313                         return r;
1314                 }
1315
1316                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1317         }
1318
1319         return 0;
1320 }
1321
1322 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1323         assert_return(s, -EINVAL);
1324         assert_return(events, -EINVAL);
1325         assert_return(s->type == SOURCE_IO, -EDOM);
1326         assert_return(!event_pid_changed(s->event), -ECHILD);
1327
1328         *events = s->io.events;
1329         return 0;
1330 }
1331
1332 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1333         int r;
1334
1335         assert_return(s, -EINVAL);
1336         assert_return(s->type == SOURCE_IO, -EDOM);
1337         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1338         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1339         assert_return(!event_pid_changed(s->event), -ECHILD);
1340
1341         /* edge-triggered updates are never skipped, so we can reset edges */
1342         if (s->io.events == events && !(events & EPOLLET))
1343                 return 0;
1344
1345         if (s->enabled != SD_EVENT_OFF) {
1346                 r = source_io_register(s, s->enabled, events);
1347                 if (r < 0)
1348                         return r;
1349         }
1350
1351         s->io.events = events;
1352         source_set_pending(s, false);
1353
1354         return 0;
1355 }
1356
1357 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1358         assert_return(s, -EINVAL);
1359         assert_return(revents, -EINVAL);
1360         assert_return(s->type == SOURCE_IO, -EDOM);
1361         assert_return(s->pending, -ENODATA);
1362         assert_return(!event_pid_changed(s->event), -ECHILD);
1363
1364         *revents = s->io.revents;
1365         return 0;
1366 }
1367
1368 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1369         assert_return(s, -EINVAL);
1370         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1371         assert_return(!event_pid_changed(s->event), -ECHILD);
1372
1373         return s->signal.sig;
1374 }
1375
1376 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1377         assert_return(s, -EINVAL);
1378         assert_return(!event_pid_changed(s->event), -ECHILD);
1379
1380         return s->priority;
1381 }
1382
1383 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1384         assert_return(s, -EINVAL);
1385         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1386         assert_return(!event_pid_changed(s->event), -ECHILD);
1387
1388         if (s->priority == priority)
1389                 return 0;
1390
1391         s->priority = priority;
1392
1393         if (s->pending)
1394                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1395
1396         if (s->prepare)
1397                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1398
1399         if (s->type == SOURCE_EXIT)
1400                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1401
1402         return 0;
1403 }
1404
1405 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1406         assert_return(s, -EINVAL);
1407         assert_return(m, -EINVAL);
1408         assert_return(!event_pid_changed(s->event), -ECHILD);
1409
1410         *m = s->enabled;
1411         return 0;
1412 }
1413
1414 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1415         int r;
1416
1417         assert_return(s, -EINVAL);
1418         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1419         assert_return(!event_pid_changed(s->event), -ECHILD);
1420
1421         /* If we are dead anyway, we are fine with turning off
1422          * sources, but everything else needs to fail. */
1423         if (s->event->state == SD_EVENT_FINISHED)
1424                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1425
1426         if (s->enabled == m)
1427                 return 0;
1428
1429         if (m == SD_EVENT_OFF) {
1430
1431                 switch (s->type) {
1432
1433                 case SOURCE_IO:
1434                         r = source_io_unregister(s);
1435                         if (r < 0)
1436                                 return r;
1437
1438                         s->enabled = m;
1439                         break;
1440
1441                 case SOURCE_TIME_REALTIME:
1442                 case SOURCE_TIME_BOOTTIME:
1443                 case SOURCE_TIME_MONOTONIC:
1444                 case SOURCE_TIME_REALTIME_ALARM:
1445                 case SOURCE_TIME_BOOTTIME_ALARM: {
1446                         struct clock_data *d;
1447
1448                         s->enabled = m;
1449                         d = event_get_clock_data(s->event, s->type);
1450                         assert(d);
1451
1452                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1453                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1454                         d->needs_rearm = true;
1455                         break;
1456                 }
1457
1458                 case SOURCE_SIGNAL:
1459                         assert(need_signal(s->event, s->signal.sig));
1460
1461                         s->enabled = m;
1462
1463                         if (!need_signal(s->event, s->signal.sig)) {
1464                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1465
1466                                 (void) event_update_signal_fd(s->event);
1467                                 /* If disabling failed, we might get a spurious event,
1468                                  * but otherwise nothing bad should happen. */
1469                         }
1470
1471                         break;
1472
1473                 case SOURCE_CHILD:
1474                         assert(need_signal(s->event, SIGCHLD));
1475
1476                         s->enabled = m;
1477
1478                         assert(s->event->n_enabled_child_sources > 0);
1479                         s->event->n_enabled_child_sources--;
1480
1481                         if (!need_signal(s->event, SIGCHLD)) {
1482                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1483
1484                                 (void) event_update_signal_fd(s->event);
1485                         }
1486
1487                         break;
1488
1489                 case SOURCE_EXIT:
1490                         s->enabled = m;
1491                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1492                         break;
1493
1494                 case SOURCE_DEFER:
1495                 case SOURCE_POST:
1496                         s->enabled = m;
1497                         break;
1498
1499                 default:
1500                         assert_not_reached("Wut? I shouldn't exist.");
1501                 }
1502
1503         } else {
1504                 switch (s->type) {
1505
1506                 case SOURCE_IO:
1507                         r = source_io_register(s, m, s->io.events);
1508                         if (r < 0)
1509                                 return r;
1510
1511                         s->enabled = m;
1512                         break;
1513
1514                 case SOURCE_TIME_REALTIME:
1515                 case SOURCE_TIME_BOOTTIME:
1516                 case SOURCE_TIME_MONOTONIC:
1517                 case SOURCE_TIME_REALTIME_ALARM:
1518                 case SOURCE_TIME_BOOTTIME_ALARM: {
1519                         struct clock_data *d;
1520
1521                         s->enabled = m;
1522                         d = event_get_clock_data(s->event, s->type);
1523                         assert(d);
1524
1525                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1526                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1527                         d->needs_rearm = true;
1528                         break;
1529                 }
1530
1531                 case SOURCE_SIGNAL:
1532                         /* Check status before enabling. */
1533                         if (!need_signal(s->event, s->signal.sig)) {
1534                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1535
1536                                 r = event_update_signal_fd(s->event);
1537                                 if (r < 0) {
1538                                         s->enabled = SD_EVENT_OFF;
1539                                         return r;
1540                                 }
1541                         }
1542
1543                         s->enabled = m;
1544                         break;
1545
1546                 case SOURCE_CHILD:
1547                         /* Check status before enabling. */
1548                         if (s->enabled == SD_EVENT_OFF) {
1549                                 if (!need_signal(s->event, SIGCHLD)) {
1550                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1551
1552                                         r = event_update_signal_fd(s->event);
1553                                         if (r < 0) {
1554                                                 s->enabled = SD_EVENT_OFF;
1555                                                 return r;
1556                                         }
1557                                 }
1558
1559                                 s->event->n_enabled_child_sources++;
1560                         }
1561
1562                         s->enabled = m;
1563                         break;
1564
1565                 case SOURCE_EXIT:
1566                         s->enabled = m;
1567                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1568                         break;
1569
1570                 case SOURCE_DEFER:
1571                 case SOURCE_POST:
1572                         s->enabled = m;
1573                         break;
1574
1575                 default:
1576                         assert_not_reached("Wut? I shouldn't exist.");
1577                 }
1578         }
1579
1580         if (s->pending)
1581                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1582
1583         if (s->prepare)
1584                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1585
1586         return 0;
1587 }
1588
1589 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1590         assert_return(s, -EINVAL);
1591         assert_return(usec, -EINVAL);
1592         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1593         assert_return(!event_pid_changed(s->event), -ECHILD);
1594
1595         *usec = s->time.next;
1596         return 0;
1597 }
1598
1599 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1600         struct clock_data *d;
1601
1602         assert_return(s, -EINVAL);
1603         assert_return(usec != (uint64_t) -1, -EINVAL);
1604         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1605         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1606         assert_return(!event_pid_changed(s->event), -ECHILD);
1607
1608         s->time.next = usec;
1609
1610         source_set_pending(s, false);
1611
1612         d = event_get_clock_data(s->event, s->type);
1613         assert(d);
1614
1615         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1616         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1617         d->needs_rearm = true;
1618
1619         return 0;
1620 }
1621
1622 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1623         assert_return(s, -EINVAL);
1624         assert_return(usec, -EINVAL);
1625         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1626         assert_return(!event_pid_changed(s->event), -ECHILD);
1627
1628         *usec = s->time.accuracy;
1629         return 0;
1630 }
1631
1632 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1633         struct clock_data *d;
1634
1635         assert_return(s, -EINVAL);
1636         assert_return(usec != (uint64_t) -1, -EINVAL);
1637         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1638         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1639         assert_return(!event_pid_changed(s->event), -ECHILD);
1640
1641         if (usec == 0)
1642                 usec = DEFAULT_ACCURACY_USEC;
1643
1644         s->time.accuracy = usec;
1645
1646         source_set_pending(s, false);
1647
1648         d = event_get_clock_data(s->event, s->type);
1649         assert(d);
1650
1651         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1652         d->needs_rearm = true;
1653
1654         return 0;
1655 }
1656
1657 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1658         assert_return(s, -EINVAL);
1659         assert_return(clock, -EINVAL);
1660         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1661         assert_return(!event_pid_changed(s->event), -ECHILD);
1662
1663         *clock = event_source_type_to_clock(s->type);
1664         return 0;
1665 }
1666
1667 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1668         assert_return(s, -EINVAL);
1669         assert_return(pid, -EINVAL);
1670         assert_return(s->type == SOURCE_CHILD, -EDOM);
1671         assert_return(!event_pid_changed(s->event), -ECHILD);
1672
1673         *pid = s->child.pid;
1674         return 0;
1675 }
1676
1677 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1678         int r;
1679
1680         assert_return(s, -EINVAL);
1681         assert_return(s->type != SOURCE_EXIT, -EDOM);
1682         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1683         assert_return(!event_pid_changed(s->event), -ECHILD);
1684
1685         if (s->prepare == callback)
1686                 return 0;
1687
1688         if (callback && s->prepare) {
1689                 s->prepare = callback;
1690                 return 0;
1691         }
1692
1693         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1694         if (r < 0)
1695                 return r;
1696
1697         s->prepare = callback;
1698
1699         if (callback) {
1700                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1701                 if (r < 0)
1702                         return r;
1703         } else
1704                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1705
1706         return 0;
1707 }
1708
1709 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1710         assert_return(s, NULL);
1711
1712         return s->userdata;
1713 }
1714
1715 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1716         void *ret;
1717
1718         assert_return(s, NULL);
1719
1720         ret = s->userdata;
1721         s->userdata = userdata;
1722
1723         return ret;
1724 }
1725
1726 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1727         usec_t c;
1728         assert(e);
1729         assert(a <= b);
1730
1731         if (a <= 0)
1732                 return 0;
1733
1734         if (b <= a + 1)
1735                 return a;
1736
1737         initialize_perturb(e);
1738
1739         /*
1740           Find a good time to wake up again between times a and b. We
1741           have two goals here:
1742
1743           a) We want to wake up as seldom as possible, hence prefer
1744              later times over earlier times.
1745
1746           b) But if we have to wake up, then let's make sure to
1747              dispatch as much as possible on the entire system.
1748
1749           We implement this by waking up everywhere at the same time
1750           within any given minute if we can, synchronised via the
1751           perturbation value determined from the boot ID. If we can't,
1752           then we try to find the same spot in every 10s, then 1s and
1753           then 250ms step. Otherwise, we pick the last possible time
1754           to wake up.
1755         */
1756
1757         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1758         if (c >= b) {
1759                 if (_unlikely_(c < USEC_PER_MINUTE))
1760                         return b;
1761
1762                 c -= USEC_PER_MINUTE;
1763         }
1764
1765         if (c >= a)
1766                 return c;
1767
1768         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1769         if (c >= b) {
1770                 if (_unlikely_(c < USEC_PER_SEC*10))
1771                         return b;
1772
1773                 c -= USEC_PER_SEC*10;
1774         }
1775
1776         if (c >= a)
1777                 return c;
1778
1779         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1780         if (c >= b) {
1781                 if (_unlikely_(c < USEC_PER_SEC))
1782                         return b;
1783
1784                 c -= USEC_PER_SEC;
1785         }
1786
1787         if (c >= a)
1788                 return c;
1789
1790         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1791         if (c >= b) {
1792                 if (_unlikely_(c < USEC_PER_MSEC*250))
1793                         return b;
1794
1795                 c -= USEC_PER_MSEC*250;
1796         }
1797
1798         if (c >= a)
1799                 return c;
1800
1801         return b;
1802 }
1803
1804 static int event_arm_timer(
1805                 sd_event *e,
1806                 struct clock_data *d) {
1807
1808         struct itimerspec its = {};
1809         sd_event_source *a, *b;
1810         usec_t t;
1811         int r;
1812
1813         assert(e);
1814         assert(d);
1815
1816         if (!d->needs_rearm)
1817                 return 0;
1818         else
1819                 d->needs_rearm = false;
1820
1821         a = prioq_peek(d->earliest);
1822         if (!a || a->enabled == SD_EVENT_OFF) {
1823
1824                 if (d->fd < 0)
1825                         return 0;
1826
1827                 if (d->next == USEC_INFINITY)
1828                         return 0;
1829
1830                 /* disarm */
1831                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1832                 if (r < 0)
1833                         return r;
1834
1835                 d->next = USEC_INFINITY;
1836                 return 0;
1837         }
1838
1839         b = prioq_peek(d->latest);
1840         assert_se(b && b->enabled != SD_EVENT_OFF);
1841
1842         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1843         if (d->next == t)
1844                 return 0;
1845
1846         assert_se(d->fd >= 0);
1847
1848         if (t == 0) {
1849                 /* We don' want to disarm here, just mean some time looooong ago. */
1850                 its.it_value.tv_sec = 0;
1851                 its.it_value.tv_nsec = 1;
1852         } else
1853                 timespec_store(&its.it_value, t);
1854
1855         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1856         if (r < 0)
1857                 return -errno;
1858
1859         d->next = t;
1860         return 0;
1861 }
1862
1863 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1864         assert(e);
1865         assert(s);
1866         assert(s->type == SOURCE_IO);
1867
1868         /* If the event source was already pending, we just OR in the
1869          * new revents, otherwise we reset the value. The ORing is
1870          * necessary to handle EPOLLONESHOT events properly where
1871          * readability might happen independently of writability, and
1872          * we need to keep track of both */
1873
1874         if (s->pending)
1875                 s->io.revents |= revents;
1876         else
1877                 s->io.revents = revents;
1878
1879         return source_set_pending(s, true);
1880 }
1881
1882 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1883         uint64_t x;
1884         ssize_t ss;
1885
1886         assert(e);
1887         assert(fd >= 0);
1888
1889         assert_return(events == EPOLLIN, -EIO);
1890
1891         ss = read(fd, &x, sizeof(x));
1892         if (ss < 0) {
1893                 if (errno == EAGAIN || errno == EINTR)
1894                         return 0;
1895
1896                 return -errno;
1897         }
1898
1899         if (_unlikely_(ss != sizeof(x)))
1900                 return -EIO;
1901
1902         if (next)
1903                 *next = USEC_INFINITY;
1904
1905         return 0;
1906 }
1907
1908 static int process_timer(
1909                 sd_event *e,
1910                 usec_t n,
1911                 struct clock_data *d) {
1912
1913         sd_event_source *s;
1914         int r;
1915
1916         assert(e);
1917         assert(d);
1918
1919         for (;;) {
1920                 s = prioq_peek(d->earliest);
1921                 if (!s ||
1922                     s->time.next > n ||
1923                     s->enabled == SD_EVENT_OFF ||
1924                     s->pending)
1925                         break;
1926
1927                 r = source_set_pending(s, true);
1928                 if (r < 0)
1929                         return r;
1930
1931                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1932                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1933                 d->needs_rearm = true;
1934         }
1935
1936         return 0;
1937 }
1938
1939 static int process_child(sd_event *e) {
1940         sd_event_source *s;
1941         Iterator i;
1942         int r;
1943
1944         assert(e);
1945
1946         e->need_process_child = false;
1947
1948         /*
1949            So, this is ugly. We iteratively invoke waitid() with P_PID
1950            + WNOHANG for each PID we wait for, instead of using
1951            P_ALL. This is because we only want to get child
1952            information of very specific child processes, and not all
1953            of them. We might not have processed the SIGCHLD even of a
1954            previous invocation and we don't want to maintain a
1955            unbounded *per-child* event queue, hence we really don't
1956            want anything flushed out of the kernel's queue that we
1957            don't care about. Since this is O(n) this means that if you
1958            have a lot of processes you probably want to handle SIGCHLD
1959            yourself.
1960
1961            We do not reap the children here (by using WNOWAIT), this
1962            is only done after the event source is dispatched so that
1963            the callback still sees the process as a zombie.
1964         */
1965
1966         HASHMAP_FOREACH(s, e->child_sources, i) {
1967                 assert(s->type == SOURCE_CHILD);
1968
1969                 if (s->pending)
1970                         continue;
1971
1972                 if (s->enabled == SD_EVENT_OFF)
1973                         continue;
1974
1975                 zero(s->child.siginfo);
1976                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1977                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1978                 if (r < 0)
1979                         return -errno;
1980
1981                 if (s->child.siginfo.si_pid != 0) {
1982                         bool zombie =
1983                                 s->child.siginfo.si_code == CLD_EXITED ||
1984                                 s->child.siginfo.si_code == CLD_KILLED ||
1985                                 s->child.siginfo.si_code == CLD_DUMPED;
1986
1987                         if (!zombie && (s->child.options & WEXITED)) {
1988                                 /* If the child isn't dead then let's
1989                                  * immediately remove the state change
1990                                  * from the queue, since there's no
1991                                  * benefit in leaving it queued */
1992
1993                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1994                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1995                         }
1996
1997                         r = source_set_pending(s, true);
1998                         if (r < 0)
1999                                 return r;
2000                 }
2001         }
2002
2003         return 0;
2004 }
2005
2006 static int process_signal(sd_event *e, uint32_t events) {
2007         bool read_one = false;
2008         int r;
2009
2010         assert(e);
2011
2012         assert_return(events == EPOLLIN, -EIO);
2013
2014         for (;;) {
2015                 struct signalfd_siginfo si;
2016                 ssize_t n;
2017                 sd_event_source *s = NULL;
2018
2019                 n = read(e->signal_fd, &si, sizeof(si));
2020                 if (n < 0) {
2021                         if (errno == EAGAIN || errno == EINTR)
2022                                 return read_one;
2023
2024                         return -errno;
2025                 }
2026
2027                 if (_unlikely_(n != sizeof(si)))
2028                         return -EIO;
2029
2030                 assert(si.ssi_signo < _NSIG);
2031
2032                 read_one = true;
2033
2034                 if (si.ssi_signo == SIGCHLD) {
2035                         r = process_child(e);
2036                         if (r < 0)
2037                                 return r;
2038                         if (r > 0)
2039                                 continue;
2040                 }
2041
2042                 if (e->signal_sources)
2043                         s = e->signal_sources[si.ssi_signo];
2044
2045                 if (!s)
2046                         continue;
2047
2048                 s->signal.siginfo = si;
2049                 r = source_set_pending(s, true);
2050                 if (r < 0)
2051                         return r;
2052         }
2053 }
2054
2055 static int source_dispatch(sd_event_source *s) {
2056         int r = 0;
2057
2058         assert(s);
2059         assert(s->pending || s->type == SOURCE_EXIT);
2060
2061         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2062                 r = source_set_pending(s, false);
2063                 if (r < 0)
2064                         return r;
2065         }
2066
2067         if (s->type != SOURCE_POST) {
2068                 sd_event_source *z;
2069                 Iterator i;
2070
2071                 /* If we execute a non-post source, let's mark all
2072                  * post sources as pending */
2073
2074                 SET_FOREACH(z, s->event->post_sources, i) {
2075                         if (z->enabled == SD_EVENT_OFF)
2076                                 continue;
2077
2078                         r = source_set_pending(z, true);
2079                         if (r < 0)
2080                                 return r;
2081                 }
2082         }
2083
2084         if (s->enabled == SD_EVENT_ONESHOT) {
2085                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2086                 if (r < 0)
2087                         return r;
2088         }
2089
2090         s->dispatching = true;
2091
2092         switch (s->type) {
2093
2094         case SOURCE_IO:
2095                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2096                 break;
2097
2098         case SOURCE_TIME_REALTIME:
2099         case SOURCE_TIME_BOOTTIME:
2100         case SOURCE_TIME_MONOTONIC:
2101         case SOURCE_TIME_REALTIME_ALARM:
2102         case SOURCE_TIME_BOOTTIME_ALARM:
2103                 r = s->time.callback(s, s->time.next, s->userdata);
2104                 break;
2105
2106         case SOURCE_SIGNAL:
2107                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2108                 break;
2109
2110         case SOURCE_CHILD: {
2111                 bool zombie;
2112
2113                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2114                          s->child.siginfo.si_code == CLD_KILLED ||
2115                          s->child.siginfo.si_code == CLD_DUMPED;
2116
2117                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2118
2119                 /* Now, reap the PID for good. */
2120                 if (zombie)
2121                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2122
2123                 break;
2124         }
2125
2126         case SOURCE_DEFER:
2127                 r = s->defer.callback(s, s->userdata);
2128                 break;
2129
2130         case SOURCE_POST:
2131                 r = s->post.callback(s, s->userdata);
2132                 break;
2133
2134         case SOURCE_EXIT:
2135                 r = s->exit.callback(s, s->userdata);
2136                 break;
2137
2138         case SOURCE_WATCHDOG:
2139         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2140         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2141                 assert_not_reached("Wut? I shouldn't exist.");
2142         }
2143
2144         s->dispatching = false;
2145
2146         if (r < 0) {
2147                 if (s->name)
2148                         log_debug("Event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2149                 else
2150                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2151         }
2152
2153         if (s->n_ref == 0)
2154                 source_free(s);
2155         else if (r < 0)
2156                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2157
2158         return 1;
2159 }
2160
2161 static int event_prepare(sd_event *e) {
2162         int r;
2163
2164         assert(e);
2165
2166         for (;;) {
2167                 sd_event_source *s;
2168
2169                 s = prioq_peek(e->prepare);
2170                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2171                         break;
2172
2173                 s->prepare_iteration = e->iteration;
2174                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2175                 if (r < 0)
2176                         return r;
2177
2178                 assert(s->prepare);
2179
2180                 s->dispatching = true;
2181                 r = s->prepare(s, s->userdata);
2182                 s->dispatching = false;
2183
2184                 if (r < 0) {
2185                         if (s->name)
2186                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2187                         else
2188                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2189                 }
2190
2191                 if (s->n_ref == 0)
2192                         source_free(s);
2193                 else if (r < 0)
2194                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2195         }
2196
2197         return 0;
2198 }
2199
2200 static int dispatch_exit(sd_event *e) {
2201         sd_event_source *p;
2202         int r;
2203
2204         assert(e);
2205
2206         p = prioq_peek(e->exit);
2207         if (!p || p->enabled == SD_EVENT_OFF) {
2208                 e->state = SD_EVENT_FINISHED;
2209                 return 0;
2210         }
2211
2212         sd_event_ref(e);
2213         e->iteration++;
2214         e->state = SD_EVENT_EXITING;
2215
2216         r = source_dispatch(p);
2217
2218         e->state = SD_EVENT_PASSIVE;
2219         sd_event_unref(e);
2220
2221         return r;
2222 }
2223
2224 static sd_event_source* event_next_pending(sd_event *e) {
2225         sd_event_source *p;
2226
2227         assert(e);
2228
2229         p = prioq_peek(e->pending);
2230         if (!p)
2231                 return NULL;
2232
2233         if (p->enabled == SD_EVENT_OFF)
2234                 return NULL;
2235
2236         return p;
2237 }
2238
2239 static int arm_watchdog(sd_event *e) {
2240         struct itimerspec its = {};
2241         usec_t t;
2242         int r;
2243
2244         assert(e);
2245         assert(e->watchdog_fd >= 0);
2246
2247         t = sleep_between(e,
2248                           e->watchdog_last + (e->watchdog_period / 2),
2249                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2250
2251         timespec_store(&its.it_value, t);
2252
2253         /* Make sure we never set the watchdog to 0, which tells the
2254          * kernel to disable it. */
2255         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2256                 its.it_value.tv_nsec = 1;
2257
2258         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2259         if (r < 0)
2260                 return -errno;
2261
2262         return 0;
2263 }
2264
2265 static int process_watchdog(sd_event *e) {
2266         assert(e);
2267
2268         if (!e->watchdog)
2269                 return 0;
2270
2271         /* Don't notify watchdog too often */
2272         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2273                 return 0;
2274
2275         sd_notify(false, "WATCHDOG=1");
2276         e->watchdog_last = e->timestamp.monotonic;
2277
2278         return arm_watchdog(e);
2279 }
2280
2281 _public_ int sd_event_prepare(sd_event *e) {
2282         int r;
2283
2284         assert_return(e, -EINVAL);
2285         assert_return(!event_pid_changed(e), -ECHILD);
2286         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2287         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2288
2289         if (e->exit_requested)
2290                 goto pending;
2291
2292         e->iteration++;
2293
2294         r = event_prepare(e);
2295         if (r < 0)
2296                 return r;
2297
2298         r = event_arm_timer(e, &e->realtime);
2299         if (r < 0)
2300                 return r;
2301
2302         r = event_arm_timer(e, &e->boottime);
2303         if (r < 0)
2304                 return r;
2305
2306         r = event_arm_timer(e, &e->monotonic);
2307         if (r < 0)
2308                 return r;
2309
2310         r = event_arm_timer(e, &e->realtime_alarm);
2311         if (r < 0)
2312                 return r;
2313
2314         r = event_arm_timer(e, &e->boottime_alarm);
2315         if (r < 0)
2316                 return r;
2317
2318         if (event_next_pending(e) || e->need_process_child)
2319                 goto pending;
2320
2321         e->state = SD_EVENT_PREPARED;
2322
2323         return 0;
2324
2325 pending:
2326         e->state = SD_EVENT_PREPARED;
2327         r = sd_event_wait(e, 0);
2328         if (r == 0)
2329                 e->state = SD_EVENT_PREPARED;
2330
2331         return r;
2332 }
2333
2334 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2335         struct epoll_event *ev_queue;
2336         unsigned ev_queue_max;
2337         int r, m, i;
2338
2339         assert_return(e, -EINVAL);
2340         assert_return(!event_pid_changed(e), -ECHILD);
2341         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2342         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2343
2344         if (e->exit_requested) {
2345                 e->state = SD_EVENT_PENDING;
2346                 return 1;
2347         }
2348
2349         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2350         ev_queue = newa(struct epoll_event, ev_queue_max);
2351
2352         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2353                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2354         if (m < 0) {
2355                 if (errno == EINTR) {
2356                         e->state = SD_EVENT_PENDING;
2357                         return 1;
2358                 }
2359
2360                 r = -errno;
2361
2362                 goto finish;
2363         }
2364
2365         dual_timestamp_get(&e->timestamp);
2366         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2367
2368         for (i = 0; i < m; i++) {
2369
2370                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2371                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2372                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2373                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2374                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2375                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2376                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2377                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2378                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2379                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2380                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2381                         r = process_signal(e, ev_queue[i].events);
2382                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2383                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2384                 else
2385                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2386
2387                 if (r < 0)
2388                         goto finish;
2389         }
2390
2391         r = process_watchdog(e);
2392         if (r < 0)
2393                 goto finish;
2394
2395         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2396         if (r < 0)
2397                 goto finish;
2398
2399         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2400         if (r < 0)
2401                 goto finish;
2402
2403         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2404         if (r < 0)
2405                 goto finish;
2406
2407         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2408         if (r < 0)
2409                 goto finish;
2410
2411         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2412         if (r < 0)
2413                 goto finish;
2414
2415         if (e->need_process_child) {
2416                 r = process_child(e);
2417                 if (r < 0)
2418                         goto finish;
2419         }
2420
2421         if (event_next_pending(e)) {
2422                 e->state = SD_EVENT_PENDING;
2423
2424                 return 1;
2425         }
2426
2427         r = 0;
2428
2429 finish:
2430         e->state = SD_EVENT_PASSIVE;
2431
2432         return r;
2433 }
2434
2435 _public_ int sd_event_dispatch(sd_event *e) {
2436         sd_event_source *p;
2437         int r;
2438
2439         assert_return(e, -EINVAL);
2440         assert_return(!event_pid_changed(e), -ECHILD);
2441         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2442         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2443
2444         if (e->exit_requested)
2445                 return dispatch_exit(e);
2446
2447         p = event_next_pending(e);
2448         if (p) {
2449                 sd_event_ref(e);
2450
2451                 e->state = SD_EVENT_RUNNING;
2452                 r = source_dispatch(p);
2453                 e->state = SD_EVENT_PASSIVE;
2454
2455                 sd_event_unref(e);
2456
2457                 return r;
2458         }
2459
2460         e->state = SD_EVENT_PASSIVE;
2461
2462         return 1;
2463 }
2464
2465 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2466         int r;
2467
2468         assert_return(e, -EINVAL);
2469         assert_return(!event_pid_changed(e), -ECHILD);
2470         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2471         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2472
2473         r = sd_event_prepare(e);
2474         if (r > 0)
2475                 return sd_event_dispatch(e);
2476         else if (r < 0)
2477                 return r;
2478
2479         r = sd_event_wait(e, timeout);
2480         if (r > 0)
2481                 return sd_event_dispatch(e);
2482         else
2483                 return r;
2484 }
2485
2486 _public_ int sd_event_loop(sd_event *e) {
2487         int r;
2488
2489         assert_return(e, -EINVAL);
2490         assert_return(!event_pid_changed(e), -ECHILD);
2491         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2492
2493         sd_event_ref(e);
2494
2495         while (e->state != SD_EVENT_FINISHED) {
2496                 r = sd_event_run(e, (uint64_t) -1);
2497                 if (r < 0)
2498                         goto finish;
2499         }
2500
2501         r = e->exit_code;
2502
2503 finish:
2504         sd_event_unref(e);
2505         return r;
2506 }
2507
2508 _public_ int sd_event_get_fd(sd_event *e) {
2509
2510         assert_return(e, -EINVAL);
2511         assert_return(!event_pid_changed(e), -ECHILD);
2512
2513         return e->epoll_fd;
2514 }
2515
2516 _public_ int sd_event_get_state(sd_event *e) {
2517         assert_return(e, -EINVAL);
2518         assert_return(!event_pid_changed(e), -ECHILD);
2519
2520         return e->state;
2521 }
2522
2523 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2524         assert_return(e, -EINVAL);
2525         assert_return(code, -EINVAL);
2526         assert_return(!event_pid_changed(e), -ECHILD);
2527
2528         if (!e->exit_requested)
2529                 return -ENODATA;
2530
2531         *code = e->exit_code;
2532         return 0;
2533 }
2534
2535 _public_ int sd_event_exit(sd_event *e, int code) {
2536         assert_return(e, -EINVAL);
2537         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2538         assert_return(!event_pid_changed(e), -ECHILD);
2539
2540         e->exit_requested = true;
2541         e->exit_code = code;
2542
2543         return 0;
2544 }
2545
2546 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2547         assert_return(e, -EINVAL);
2548         assert_return(usec, -EINVAL);
2549         assert_return(!event_pid_changed(e), -ECHILD);
2550
2551         /* If we haven't run yet, just get the actual time */
2552         if (!dual_timestamp_is_set(&e->timestamp))
2553                 return -ENODATA;
2554
2555         switch (clock) {
2556
2557         case CLOCK_REALTIME:
2558         case CLOCK_REALTIME_ALARM:
2559                 *usec = e->timestamp.realtime;
2560                 break;
2561
2562         case CLOCK_MONOTONIC:
2563                 *usec = e->timestamp.monotonic;
2564                 break;
2565
2566         case CLOCK_BOOTTIME:
2567         case CLOCK_BOOTTIME_ALARM:
2568                 *usec = e->timestamp_boottime;
2569                 break;
2570         }
2571
2572         return 0;
2573 }
2574
2575 _public_ int sd_event_default(sd_event **ret) {
2576
2577         static thread_local sd_event *default_event = NULL;
2578         sd_event *e = NULL;
2579         int r;
2580
2581         if (!ret)
2582                 return !!default_event;
2583
2584         if (default_event) {
2585                 *ret = sd_event_ref(default_event);
2586                 return 0;
2587         }
2588
2589         r = sd_event_new(&e);
2590         if (r < 0)
2591                 return r;
2592
2593         e->default_event_ptr = &default_event;
2594         e->tid = gettid();
2595         default_event = e;
2596
2597         *ret = e;
2598         return 1;
2599 }
2600
2601 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2602         assert_return(e, -EINVAL);
2603         assert_return(tid, -EINVAL);
2604         assert_return(!event_pid_changed(e), -ECHILD);
2605
2606         if (e->tid != 0) {
2607                 *tid = e->tid;
2608                 return 0;
2609         }
2610
2611         return -ENXIO;
2612 }
2613
2614 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2615         int r;
2616
2617         assert_return(e, -EINVAL);
2618         assert_return(!event_pid_changed(e), -ECHILD);
2619
2620         if (e->watchdog == !!b)
2621                 return e->watchdog;
2622
2623         if (b) {
2624                 struct epoll_event ev = {};
2625
2626                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2627                 if (r <= 0)
2628                         return r;
2629
2630                 /* Issue first ping immediately */
2631                 sd_notify(false, "WATCHDOG=1");
2632                 e->watchdog_last = now(CLOCK_MONOTONIC);
2633
2634                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2635                 if (e->watchdog_fd < 0)
2636                         return -errno;
2637
2638                 r = arm_watchdog(e);
2639                 if (r < 0)
2640                         goto fail;
2641
2642                 ev.events = EPOLLIN;
2643                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2644
2645                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2646                 if (r < 0) {
2647                         r = -errno;
2648                         goto fail;
2649                 }
2650
2651         } else {
2652                 if (e->watchdog_fd >= 0) {
2653                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2654                         e->watchdog_fd = safe_close(e->watchdog_fd);
2655                 }
2656         }
2657
2658         e->watchdog = !!b;
2659         return e->watchdog;
2660
2661 fail:
2662         e->watchdog_fd = safe_close(e->watchdog_fd);
2663         return r;
2664 }
2665
2666 _public_ int sd_event_get_watchdog(sd_event *e) {
2667         assert_return(e, -EINVAL);
2668         assert_return(!event_pid_changed(e), -ECHILD);
2669
2670         return e->watchdog;
2671 }