chiark / gitweb /
618d1b915ce7343955b15fcf05f6a6d390d763b1
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36
37 #include "sd-event.h"
38
39 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
40
41 typedef enum EventSourceType {
42         SOURCE_IO,
43         SOURCE_TIME_REALTIME,
44         SOURCE_TIME_BOOTTIME,
45         SOURCE_TIME_MONOTONIC,
46         SOURCE_TIME_REALTIME_ALARM,
47         SOURCE_TIME_BOOTTIME_ALARM,
48         SOURCE_SIGNAL,
49         SOURCE_CHILD,
50         SOURCE_DEFER,
51         SOURCE_POST,
52         SOURCE_EXIT,
53         SOURCE_WATCHDOG,
54         _SOURCE_EVENT_SOURCE_TYPE_MAX,
55         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
56 } EventSourceType;
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event_source {
61         unsigned n_ref;
62
63         sd_event *event;
64         void *userdata;
65         sd_event_handler_t prepare;
66
67         char *description;
68
69         EventSourceType type:5;
70         int enabled:3;
71         bool pending:1;
72         bool dispatching:1;
73         bool floating:1;
74
75         int64_t priority;
76         unsigned pending_index;
77         unsigned prepare_index;
78         unsigned pending_iteration;
79         unsigned prepare_iteration;
80
81         LIST_FIELDS(sd_event_source, sources);
82
83         union {
84                 struct {
85                         sd_event_io_handler_t callback;
86                         int fd;
87                         uint32_t events;
88                         uint32_t revents;
89                         bool registered:1;
90                 } io;
91                 struct {
92                         sd_event_time_handler_t callback;
93                         usec_t next, accuracy;
94                         unsigned earliest_index;
95                         unsigned latest_index;
96                 } time;
97                 struct {
98                         sd_event_signal_handler_t callback;
99                         struct signalfd_siginfo siginfo;
100                         int sig;
101                 } signal;
102                 struct {
103                         sd_event_child_handler_t callback;
104                         siginfo_t siginfo;
105                         pid_t pid;
106                         int options;
107                 } child;
108                 struct {
109                         sd_event_handler_t callback;
110                 } defer;
111                 struct {
112                         sd_event_handler_t callback;
113                 } post;
114                 struct {
115                         sd_event_handler_t callback;
116                         unsigned prioq_index;
117                 } exit;
118         };
119 };
120
121 struct clock_data {
122         int fd;
123
124         /* For all clocks we maintain two priority queues each, one
125          * ordered for the earliest times the events may be
126          * dispatched, and one ordered by the latest times they must
127          * have been dispatched. The range between the top entries in
128          * the two prioqs is the time window we can freely schedule
129          * wakeups in */
130
131         Prioq *earliest;
132         Prioq *latest;
133         usec_t next;
134
135         bool needs_rearm:1;
136 };
137
138 struct sd_event {
139         unsigned n_ref;
140
141         int epoll_fd;
142         int signal_fd;
143         int watchdog_fd;
144
145         Prioq *pending;
146         Prioq *prepare;
147
148         /* timerfd_create() only supports these five clocks so far. We
149          * can add support for more clocks when the kernel learns to
150          * deal with them, too. */
151         struct clock_data realtime;
152         struct clock_data boottime;
153         struct clock_data monotonic;
154         struct clock_data realtime_alarm;
155         struct clock_data boottime_alarm;
156
157         usec_t perturb;
158
159         sigset_t sigset;
160         sd_event_source **signal_sources;
161
162         Hashmap *child_sources;
163         unsigned n_enabled_child_sources;
164
165         Set *post_sources;
166
167         Prioq *exit;
168
169         pid_t original_pid;
170
171         unsigned iteration;
172         dual_timestamp timestamp;
173         usec_t timestamp_boottime;
174         int state;
175
176         bool exit_requested:1;
177         bool need_process_child:1;
178         bool watchdog:1;
179
180         int exit_code;
181
182         pid_t tid;
183         sd_event **default_event_ptr;
184
185         usec_t watchdog_last, watchdog_period;
186
187         unsigned n_sources;
188
189         LIST_HEAD(sd_event_source, sources);
190 };
191
192 static void source_disconnect(sd_event_source *s);
193
194 static int pending_prioq_compare(const void *a, const void *b) {
195         const sd_event_source *x = a, *y = b;
196
197         assert(x->pending);
198         assert(y->pending);
199
200         /* Enabled ones first */
201         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
202                 return -1;
203         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
204                 return 1;
205
206         /* Lower priority values first */
207         if (x->priority < y->priority)
208                 return -1;
209         if (x->priority > y->priority)
210                 return 1;
211
212         /* Older entries first */
213         if (x->pending_iteration < y->pending_iteration)
214                 return -1;
215         if (x->pending_iteration > y->pending_iteration)
216                 return 1;
217
218         /* Stability for the rest */
219         if (x < y)
220                 return -1;
221         if (x > y)
222                 return 1;
223
224         return 0;
225 }
226
227 static int prepare_prioq_compare(const void *a, const void *b) {
228         const sd_event_source *x = a, *y = b;
229
230         assert(x->prepare);
231         assert(y->prepare);
232
233         /* Move most recently prepared ones last, so that we can stop
234          * preparing as soon as we hit one that has already been
235          * prepared in the current iteration */
236         if (x->prepare_iteration < y->prepare_iteration)
237                 return -1;
238         if (x->prepare_iteration > y->prepare_iteration)
239                 return 1;
240
241         /* Enabled ones first */
242         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
243                 return -1;
244         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
245                 return 1;
246
247         /* Lower priority values first */
248         if (x->priority < y->priority)
249                 return -1;
250         if (x->priority > y->priority)
251                 return 1;
252
253         /* Stability for the rest */
254         if (x < y)
255                 return -1;
256         if (x > y)
257                 return 1;
258
259         return 0;
260 }
261
262 static int earliest_time_prioq_compare(const void *a, const void *b) {
263         const sd_event_source *x = a, *y = b;
264
265         assert(EVENT_SOURCE_IS_TIME(x->type));
266         assert(x->type == y->type);
267
268         /* Enabled ones first */
269         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
270                 return -1;
271         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
272                 return 1;
273
274         /* Move the pending ones to the end */
275         if (!x->pending && y->pending)
276                 return -1;
277         if (x->pending && !y->pending)
278                 return 1;
279
280         /* Order by time */
281         if (x->time.next < y->time.next)
282                 return -1;
283         if (x->time.next > y->time.next)
284                 return 1;
285
286         /* Stability for the rest */
287         if (x < y)
288                 return -1;
289         if (x > y)
290                 return 1;
291
292         return 0;
293 }
294
295 static int latest_time_prioq_compare(const void *a, const void *b) {
296         const sd_event_source *x = a, *y = b;
297
298         assert(EVENT_SOURCE_IS_TIME(x->type));
299         assert(x->type == y->type);
300
301         /* Enabled ones first */
302         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
303                 return -1;
304         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
305                 return 1;
306
307         /* Move the pending ones to the end */
308         if (!x->pending && y->pending)
309                 return -1;
310         if (x->pending && !y->pending)
311                 return 1;
312
313         /* Order by time */
314         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
315                 return -1;
316         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
317                 return 1;
318
319         /* Stability for the rest */
320         if (x < y)
321                 return -1;
322         if (x > y)
323                 return 1;
324
325         return 0;
326 }
327
328 static int exit_prioq_compare(const void *a, const void *b) {
329         const sd_event_source *x = a, *y = b;
330
331         assert(x->type == SOURCE_EXIT);
332         assert(y->type == SOURCE_EXIT);
333
334         /* Enabled ones first */
335         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
336                 return -1;
337         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338                 return 1;
339
340         /* Lower priority values first */
341         if (x->priority < y->priority)
342                 return -1;
343         if (x->priority > y->priority)
344                 return 1;
345
346         /* Stability for the rest */
347         if (x < y)
348                 return -1;
349         if (x > y)
350                 return 1;
351
352         return 0;
353 }
354
355 static void free_clock_data(struct clock_data *d) {
356         assert(d);
357
358         safe_close(d->fd);
359         prioq_free(d->earliest);
360         prioq_free(d->latest);
361 }
362
363 static void event_free(sd_event *e) {
364         sd_event_source *s;
365
366         assert(e);
367
368         while ((s = e->sources)) {
369                 assert(s->floating);
370                 source_disconnect(s);
371                 sd_event_source_unref(s);
372         }
373
374         assert(e->n_sources == 0);
375
376         if (e->default_event_ptr)
377                 *(e->default_event_ptr) = NULL;
378
379         safe_close(e->epoll_fd);
380         safe_close(e->signal_fd);
381         safe_close(e->watchdog_fd);
382
383         free_clock_data(&e->realtime);
384         free_clock_data(&e->boottime);
385         free_clock_data(&e->monotonic);
386         free_clock_data(&e->realtime_alarm);
387         free_clock_data(&e->boottime_alarm);
388
389         prioq_free(e->pending);
390         prioq_free(e->prepare);
391         prioq_free(e->exit);
392
393         free(e->signal_sources);
394
395         hashmap_free(e->child_sources);
396         set_free(e->post_sources);
397         free(e);
398 }
399
400 _public_ int sd_event_new(sd_event** ret) {
401         sd_event *e;
402         int r;
403
404         assert_return(ret, -EINVAL);
405
406         e = new0(sd_event, 1);
407         if (!e)
408                 return -ENOMEM;
409
410         e->n_ref = 1;
411         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
412         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
413         e->original_pid = getpid();
414         e->perturb = USEC_INFINITY;
415
416         assert_se(sigemptyset(&e->sigset) == 0);
417
418         e->pending = prioq_new(pending_prioq_compare);
419         if (!e->pending) {
420                 r = -ENOMEM;
421                 goto fail;
422         }
423
424         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
425         if (e->epoll_fd < 0) {
426                 r = -errno;
427                 goto fail;
428         }
429
430         *ret = e;
431         return 0;
432
433 fail:
434         event_free(e);
435         return r;
436 }
437
438 _public_ sd_event* sd_event_ref(sd_event *e) {
439         assert_return(e, NULL);
440
441         assert(e->n_ref >= 1);
442         e->n_ref++;
443
444         return e;
445 }
446
447 _public_ sd_event* sd_event_unref(sd_event *e) {
448
449         if (!e)
450                 return NULL;
451
452         assert(e->n_ref >= 1);
453         e->n_ref--;
454
455         if (e->n_ref <= 0)
456                 event_free(e);
457
458         return NULL;
459 }
460
461 static bool event_pid_changed(sd_event *e) {
462         assert(e);
463
464         /* We don't support people creating an event loop and keeping
465          * it around over a fork(). Let's complain. */
466
467         return e->original_pid != getpid();
468 }
469
470 static int source_io_unregister(sd_event_source *s) {
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475
476         if (!s->io.registered)
477                 return 0;
478
479         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
480         if (r < 0)
481                 return -errno;
482
483         s->io.registered = false;
484         return 0;
485 }
486
487 static int source_io_register(
488                 sd_event_source *s,
489                 int enabled,
490                 uint32_t events) {
491
492         struct epoll_event ev = {};
493         int r;
494
495         assert(s);
496         assert(s->type == SOURCE_IO);
497         assert(enabled != SD_EVENT_OFF);
498
499         ev.events = events;
500         ev.data.ptr = s;
501
502         if (enabled == SD_EVENT_ONESHOT)
503                 ev.events |= EPOLLONESHOT;
504
505         if (s->io.registered)
506                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
507         else
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
509
510         if (r < 0)
511                 return -errno;
512
513         s->io.registered = true;
514
515         return 0;
516 }
517
518 static clockid_t event_source_type_to_clock(EventSourceType t) {
519
520         switch (t) {
521
522         case SOURCE_TIME_REALTIME:
523                 return CLOCK_REALTIME;
524
525         case SOURCE_TIME_BOOTTIME:
526                 return CLOCK_BOOTTIME;
527
528         case SOURCE_TIME_MONOTONIC:
529                 return CLOCK_MONOTONIC;
530
531         case SOURCE_TIME_REALTIME_ALARM:
532                 return CLOCK_REALTIME_ALARM;
533
534         case SOURCE_TIME_BOOTTIME_ALARM:
535                 return CLOCK_BOOTTIME_ALARM;
536
537         default:
538                 return (clockid_t) -1;
539         }
540 }
541
542 static EventSourceType clock_to_event_source_type(clockid_t clock) {
543
544         switch (clock) {
545
546         case CLOCK_REALTIME:
547                 return SOURCE_TIME_REALTIME;
548
549         case CLOCK_BOOTTIME:
550                 return SOURCE_TIME_BOOTTIME;
551
552         case CLOCK_MONOTONIC:
553                 return SOURCE_TIME_MONOTONIC;
554
555         case CLOCK_REALTIME_ALARM:
556                 return SOURCE_TIME_REALTIME_ALARM;
557
558         case CLOCK_BOOTTIME_ALARM:
559                 return SOURCE_TIME_BOOTTIME_ALARM;
560
561         default:
562                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
563         }
564 }
565
566 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
567         assert(e);
568
569         switch (t) {
570
571         case SOURCE_TIME_REALTIME:
572                 return &e->realtime;
573
574         case SOURCE_TIME_BOOTTIME:
575                 return &e->boottime;
576
577         case SOURCE_TIME_MONOTONIC:
578                 return &e->monotonic;
579
580         case SOURCE_TIME_REALTIME_ALARM:
581                 return &e->realtime_alarm;
582
583         case SOURCE_TIME_BOOTTIME_ALARM:
584                 return &e->boottime_alarm;
585
586         default:
587                 return NULL;
588         }
589 }
590
591 static bool need_signal(sd_event *e, int signal) {
592         return (e->signal_sources && e->signal_sources[signal] &&
593                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
594                 ||
595                (signal == SIGCHLD &&
596                 e->n_enabled_child_sources > 0);
597 }
598
599 static int event_update_signal_fd(sd_event *e) {
600         struct epoll_event ev = {};
601         bool add_to_epoll;
602         int r;
603
604         assert(e);
605
606         add_to_epoll = e->signal_fd < 0;
607
608         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
609         if (r < 0)
610                 return -errno;
611
612         e->signal_fd = r;
613
614         if (!add_to_epoll)
615                 return 0;
616
617         ev.events = EPOLLIN;
618         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
619
620         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
621         if (r < 0) {
622                 e->signal_fd = safe_close(e->signal_fd);
623                 return -errno;
624         }
625
626         return 0;
627 }
628
629 static void source_disconnect(sd_event_source *s) {
630         sd_event *event;
631
632         assert(s);
633
634         if (!s->event)
635                 return;
636
637         assert(s->event->n_sources > 0);
638
639         switch (s->type) {
640
641         case SOURCE_IO:
642                 if (s->io.fd >= 0)
643                         source_io_unregister(s);
644
645                 break;
646
647         case SOURCE_TIME_REALTIME:
648         case SOURCE_TIME_BOOTTIME:
649         case SOURCE_TIME_MONOTONIC:
650         case SOURCE_TIME_REALTIME_ALARM:
651         case SOURCE_TIME_BOOTTIME_ALARM: {
652                 struct clock_data *d;
653
654                 d = event_get_clock_data(s->event, s->type);
655                 assert(d);
656
657                 prioq_remove(d->earliest, s, &s->time.earliest_index);
658                 prioq_remove(d->latest, s, &s->time.latest_index);
659                 d->needs_rearm = true;
660                 break;
661         }
662
663         case SOURCE_SIGNAL:
664                 if (s->signal.sig > 0) {
665                         if (s->event->signal_sources)
666                                 s->event->signal_sources[s->signal.sig] = NULL;
667
668                         /* If the signal was on and now it is off... */
669                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
670                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
671
672                                 (void) event_update_signal_fd(s->event);
673                                 /* If disabling failed, we might get a spurious event,
674                                  * but otherwise nothing bad should happen. */
675                         }
676                 }
677
678                 break;
679
680         case SOURCE_CHILD:
681                 if (s->child.pid > 0) {
682                         if (s->enabled != SD_EVENT_OFF) {
683                                 assert(s->event->n_enabled_child_sources > 0);
684                                 s->event->n_enabled_child_sources--;
685
686                                 /* We know the signal was on, if it is off now... */
687                                 if (!need_signal(s->event, SIGCHLD)) {
688                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
689
690                                         (void) event_update_signal_fd(s->event);
691                                         /* If disabling failed, we might get a spurious event,
692                                          * but otherwise nothing bad should happen. */
693                                 }
694                         }
695
696                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
697                 }
698
699                 break;
700
701         case SOURCE_DEFER:
702                 /* nothing */
703                 break;
704
705         case SOURCE_POST:
706                 set_remove(s->event->post_sources, s);
707                 break;
708
709         case SOURCE_EXIT:
710                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
711                 break;
712
713         default:
714                 assert_not_reached("Wut? I shouldn't exist.");
715         }
716
717         if (s->pending)
718                 prioq_remove(s->event->pending, s, &s->pending_index);
719
720         if (s->prepare)
721                 prioq_remove(s->event->prepare, s, &s->prepare_index);
722
723         event = s->event;
724
725         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
726         s->event = NULL;
727         LIST_REMOVE(sources, event->sources, s);
728         event->n_sources--;
729
730         if (!s->floating)
731                 sd_event_unref(event);
732 }
733
734 static void source_free(sd_event_source *s) {
735         assert(s);
736
737         source_disconnect(s);
738         free(s->description);
739         free(s);
740 }
741
742 static int source_set_pending(sd_event_source *s, bool b) {
743         int r;
744
745         assert(s);
746         assert(s->type != SOURCE_EXIT);
747
748         if (s->pending == b)
749                 return 0;
750
751         s->pending = b;
752
753         if (b) {
754                 s->pending_iteration = s->event->iteration;
755
756                 r = prioq_put(s->event->pending, s, &s->pending_index);
757                 if (r < 0) {
758                         s->pending = false;
759                         return r;
760                 }
761         } else
762                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
763
764         if (EVENT_SOURCE_IS_TIME(s->type)) {
765                 struct clock_data *d;
766
767                 d = event_get_clock_data(s->event, s->type);
768                 assert(d);
769
770                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
771                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
772                 d->needs_rearm = true;
773         }
774
775         return 0;
776 }
777
778 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
779         sd_event_source *s;
780
781         assert(e);
782
783         s = new0(sd_event_source, 1);
784         if (!s)
785                 return NULL;
786
787         s->n_ref = 1;
788         s->event = e;
789         s->floating = floating;
790         s->type = type;
791         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
792
793         if (!floating)
794                 sd_event_ref(e);
795
796         LIST_PREPEND(sources, e->sources, s);
797         e->n_sources ++;
798
799         return s;
800 }
801
802 _public_ int sd_event_add_io(
803                 sd_event *e,
804                 sd_event_source **ret,
805                 int fd,
806                 uint32_t events,
807                 sd_event_io_handler_t callback,
808                 void *userdata) {
809
810         sd_event_source *s;
811         int r;
812
813         assert_return(e, -EINVAL);
814         assert_return(fd >= 0, -EINVAL);
815         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
816         assert_return(callback, -EINVAL);
817         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
818         assert_return(!event_pid_changed(e), -ECHILD);
819
820         s = source_new(e, !ret, SOURCE_IO);
821         if (!s)
822                 return -ENOMEM;
823
824         s->io.fd = fd;
825         s->io.events = events;
826         s->io.callback = callback;
827         s->userdata = userdata;
828         s->enabled = SD_EVENT_ON;
829
830         r = source_io_register(s, s->enabled, events);
831         if (r < 0) {
832                 source_free(s);
833                 return r;
834         }
835
836         if (ret)
837                 *ret = s;
838
839         return 0;
840 }
841
842 static void initialize_perturb(sd_event *e) {
843         sd_id128_t bootid = {};
844
845         /* When we sleep for longer, we try to realign the wakeup to
846            the same time wihtin each minute/second/250ms, so that
847            events all across the system can be coalesced into a single
848            CPU wakeup. However, let's take some system-specific
849            randomness for this value, so that in a network of systems
850            with synced clocks timer events are distributed a
851            bit. Here, we calculate a perturbation usec offset from the
852            boot ID. */
853
854         if (_likely_(e->perturb != USEC_INFINITY))
855                 return;
856
857         if (sd_id128_get_boot(&bootid) >= 0)
858                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
859 }
860
861 static int event_setup_timer_fd(
862                 sd_event *e,
863                 struct clock_data *d,
864                 clockid_t clock) {
865
866         struct epoll_event ev = {};
867         int r, fd;
868
869         assert(e);
870         assert(d);
871
872         if (_likely_(d->fd >= 0))
873                 return 0;
874
875         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
876         if (fd < 0)
877                 return -errno;
878
879         ev.events = EPOLLIN;
880         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
881
882         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
883         if (r < 0) {
884                 safe_close(fd);
885                 return -errno;
886         }
887
888         d->fd = fd;
889         return 0;
890 }
891
892 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
893         assert(s);
894
895         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
896 }
897
898 _public_ int sd_event_add_time(
899                 sd_event *e,
900                 sd_event_source **ret,
901                 clockid_t clock,
902                 uint64_t usec,
903                 uint64_t accuracy,
904                 sd_event_time_handler_t callback,
905                 void *userdata) {
906
907         EventSourceType type;
908         sd_event_source *s;
909         struct clock_data *d;
910         int r;
911
912         assert_return(e, -EINVAL);
913         assert_return(usec != (uint64_t) -1, -EINVAL);
914         assert_return(accuracy != (uint64_t) -1, -EINVAL);
915         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
916         assert_return(!event_pid_changed(e), -ECHILD);
917
918         if (!callback)
919                 callback = time_exit_callback;
920
921         type = clock_to_event_source_type(clock);
922         assert_return(type >= 0, -EOPNOTSUPP);
923
924         d = event_get_clock_data(e, type);
925         assert(d);
926
927         if (!d->earliest) {
928                 d->earliest = prioq_new(earliest_time_prioq_compare);
929                 if (!d->earliest)
930                         return -ENOMEM;
931         }
932
933         if (!d->latest) {
934                 d->latest = prioq_new(latest_time_prioq_compare);
935                 if (!d->latest)
936                         return -ENOMEM;
937         }
938
939         if (d->fd < 0) {
940                 r = event_setup_timer_fd(e, d, clock);
941                 if (r < 0)
942                         return r;
943         }
944
945         s = source_new(e, !ret, type);
946         if (!s)
947                 return -ENOMEM;
948
949         s->time.next = usec;
950         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
951         s->time.callback = callback;
952         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
953         s->userdata = userdata;
954         s->enabled = SD_EVENT_ONESHOT;
955
956         d->needs_rearm = true;
957
958         r = prioq_put(d->earliest, s, &s->time.earliest_index);
959         if (r < 0)
960                 goto fail;
961
962         r = prioq_put(d->latest, s, &s->time.latest_index);
963         if (r < 0)
964                 goto fail;
965
966         if (ret)
967                 *ret = s;
968
969         return 0;
970
971 fail:
972         source_free(s);
973         return r;
974 }
975
976 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
977         assert(s);
978
979         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
980 }
981
982 _public_ int sd_event_add_signal(
983                 sd_event *e,
984                 sd_event_source **ret,
985                 int sig,
986                 sd_event_signal_handler_t callback,
987                 void *userdata) {
988
989         sd_event_source *s;
990         sigset_t ss;
991         int r;
992         bool previous;
993
994         assert_return(e, -EINVAL);
995         assert_return(sig > 0, -EINVAL);
996         assert_return(sig < _NSIG, -EINVAL);
997         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
998         assert_return(!event_pid_changed(e), -ECHILD);
999
1000         if (!callback)
1001                 callback = signal_exit_callback;
1002
1003         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1004         if (r < 0)
1005                 return -errno;
1006
1007         if (!sigismember(&ss, sig))
1008                 return -EBUSY;
1009
1010         if (!e->signal_sources) {
1011                 e->signal_sources = new0(sd_event_source*, _NSIG);
1012                 if (!e->signal_sources)
1013                         return -ENOMEM;
1014         } else if (e->signal_sources[sig])
1015                 return -EBUSY;
1016
1017         previous = need_signal(e, sig);
1018
1019         s = source_new(e, !ret, SOURCE_SIGNAL);
1020         if (!s)
1021                 return -ENOMEM;
1022
1023         s->signal.sig = sig;
1024         s->signal.callback = callback;
1025         s->userdata = userdata;
1026         s->enabled = SD_EVENT_ON;
1027
1028         e->signal_sources[sig] = s;
1029
1030         if (!previous) {
1031                 assert_se(sigaddset(&e->sigset, sig) == 0);
1032
1033                 r = event_update_signal_fd(e);
1034                 if (r < 0) {
1035                         source_free(s);
1036                         return r;
1037                 }
1038         }
1039
1040         /* Use the signal name as description for the event source by default */
1041         (void) sd_event_source_set_description(s, signal_to_string(sig));
1042
1043         if (ret)
1044                 *ret = s;
1045
1046         return 0;
1047 }
1048
1049 _public_ int sd_event_add_child(
1050                 sd_event *e,
1051                 sd_event_source **ret,
1052                 pid_t pid,
1053                 int options,
1054                 sd_event_child_handler_t callback,
1055                 void *userdata) {
1056
1057         sd_event_source *s;
1058         int r;
1059         bool previous;
1060
1061         assert_return(e, -EINVAL);
1062         assert_return(pid > 1, -EINVAL);
1063         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1064         assert_return(options != 0, -EINVAL);
1065         assert_return(callback, -EINVAL);
1066         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1067         assert_return(!event_pid_changed(e), -ECHILD);
1068
1069         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1070         if (r < 0)
1071                 return r;
1072
1073         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1074                 return -EBUSY;
1075
1076         previous = need_signal(e, SIGCHLD);
1077
1078         s = source_new(e, !ret, SOURCE_CHILD);
1079         if (!s)
1080                 return -ENOMEM;
1081
1082         s->child.pid = pid;
1083         s->child.options = options;
1084         s->child.callback = callback;
1085         s->userdata = userdata;
1086         s->enabled = SD_EVENT_ONESHOT;
1087
1088         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1089         if (r < 0) {
1090                 source_free(s);
1091                 return r;
1092         }
1093
1094         e->n_enabled_child_sources ++;
1095
1096         if (!previous) {
1097                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1098
1099                 r = event_update_signal_fd(e);
1100                 if (r < 0) {
1101                         source_free(s);
1102                         return r;
1103                 }
1104         }
1105
1106         e->need_process_child = true;
1107
1108         if (ret)
1109                 *ret = s;
1110
1111         return 0;
1112 }
1113
1114 _public_ int sd_event_add_defer(
1115                 sd_event *e,
1116                 sd_event_source **ret,
1117                 sd_event_handler_t callback,
1118                 void *userdata) {
1119
1120         sd_event_source *s;
1121         int r;
1122
1123         assert_return(e, -EINVAL);
1124         assert_return(callback, -EINVAL);
1125         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1126         assert_return(!event_pid_changed(e), -ECHILD);
1127
1128         s = source_new(e, !ret, SOURCE_DEFER);
1129         if (!s)
1130                 return -ENOMEM;
1131
1132         s->defer.callback = callback;
1133         s->userdata = userdata;
1134         s->enabled = SD_EVENT_ONESHOT;
1135
1136         r = source_set_pending(s, true);
1137         if (r < 0) {
1138                 source_free(s);
1139                 return r;
1140         }
1141
1142         if (ret)
1143                 *ret = s;
1144
1145         return 0;
1146 }
1147
1148 _public_ int sd_event_add_post(
1149                 sd_event *e,
1150                 sd_event_source **ret,
1151                 sd_event_handler_t callback,
1152                 void *userdata) {
1153
1154         sd_event_source *s;
1155         int r;
1156
1157         assert_return(e, -EINVAL);
1158         assert_return(callback, -EINVAL);
1159         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1160         assert_return(!event_pid_changed(e), -ECHILD);
1161
1162         r = set_ensure_allocated(&e->post_sources, NULL);
1163         if (r < 0)
1164                 return r;
1165
1166         s = source_new(e, !ret, SOURCE_POST);
1167         if (!s)
1168                 return -ENOMEM;
1169
1170         s->post.callback = callback;
1171         s->userdata = userdata;
1172         s->enabled = SD_EVENT_ON;
1173
1174         r = set_put(e->post_sources, s);
1175         if (r < 0) {
1176                 source_free(s);
1177                 return r;
1178         }
1179
1180         if (ret)
1181                 *ret = s;
1182
1183         return 0;
1184 }
1185
1186 _public_ int sd_event_add_exit(
1187                 sd_event *e,
1188                 sd_event_source **ret,
1189                 sd_event_handler_t callback,
1190                 void *userdata) {
1191
1192         sd_event_source *s;
1193         int r;
1194
1195         assert_return(e, -EINVAL);
1196         assert_return(callback, -EINVAL);
1197         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1198         assert_return(!event_pid_changed(e), -ECHILD);
1199
1200         if (!e->exit) {
1201                 e->exit = prioq_new(exit_prioq_compare);
1202                 if (!e->exit)
1203                         return -ENOMEM;
1204         }
1205
1206         s = source_new(e, !ret, SOURCE_EXIT);
1207         if (!s)
1208                 return -ENOMEM;
1209
1210         s->exit.callback = callback;
1211         s->userdata = userdata;
1212         s->exit.prioq_index = PRIOQ_IDX_NULL;
1213         s->enabled = SD_EVENT_ONESHOT;
1214
1215         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1216         if (r < 0) {
1217                 source_free(s);
1218                 return r;
1219         }
1220
1221         if (ret)
1222                 *ret = s;
1223
1224         return 0;
1225 }
1226
1227 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1228         assert_return(s, NULL);
1229
1230         assert(s->n_ref >= 1);
1231         s->n_ref++;
1232
1233         return s;
1234 }
1235
1236 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1237
1238         if (!s)
1239                 return NULL;
1240
1241         assert(s->n_ref >= 1);
1242         s->n_ref--;
1243
1244         if (s->n_ref <= 0) {
1245                 /* Here's a special hack: when we are called from a
1246                  * dispatch handler we won't free the event source
1247                  * immediately, but we will detach the fd from the
1248                  * epoll. This way it is safe for the caller to unref
1249                  * the event source and immediately close the fd, but
1250                  * we still retain a valid event source object after
1251                  * the callback. */
1252
1253                 if (s->dispatching) {
1254                         if (s->type == SOURCE_IO)
1255                                 source_io_unregister(s);
1256
1257                         source_disconnect(s);
1258                 } else
1259                         source_free(s);
1260         }
1261
1262         return NULL;
1263 }
1264
1265 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1266         assert_return(s, -EINVAL);
1267         assert_return(!event_pid_changed(s->event), -ECHILD);
1268
1269         return free_and_strdup(&s->description, description);
1270 }
1271
1272 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1273         assert_return(s, -EINVAL);
1274         assert_return(description, -EINVAL);
1275         assert_return(s->description, -ENXIO);
1276         assert_return(!event_pid_changed(s->event), -ECHILD);
1277
1278         *description = s->description;
1279         return 0;
1280 }
1281
1282 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1283         assert_return(s, NULL);
1284
1285         return s->event;
1286 }
1287
1288 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1289         assert_return(s, -EINVAL);
1290         assert_return(s->type != SOURCE_EXIT, -EDOM);
1291         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1292         assert_return(!event_pid_changed(s->event), -ECHILD);
1293
1294         return s->pending;
1295 }
1296
1297 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1298         assert_return(s, -EINVAL);
1299         assert_return(s->type == SOURCE_IO, -EDOM);
1300         assert_return(!event_pid_changed(s->event), -ECHILD);
1301
1302         return s->io.fd;
1303 }
1304
1305 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1306         int r;
1307
1308         assert_return(s, -EINVAL);
1309         assert_return(fd >= 0, -EINVAL);
1310         assert_return(s->type == SOURCE_IO, -EDOM);
1311         assert_return(!event_pid_changed(s->event), -ECHILD);
1312
1313         if (s->io.fd == fd)
1314                 return 0;
1315
1316         if (s->enabled == SD_EVENT_OFF) {
1317                 s->io.fd = fd;
1318                 s->io.registered = false;
1319         } else {
1320                 int saved_fd;
1321
1322                 saved_fd = s->io.fd;
1323                 assert(s->io.registered);
1324
1325                 s->io.fd = fd;
1326                 s->io.registered = false;
1327
1328                 r = source_io_register(s, s->enabled, s->io.events);
1329                 if (r < 0) {
1330                         s->io.fd = saved_fd;
1331                         s->io.registered = true;
1332                         return r;
1333                 }
1334
1335                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1336         }
1337
1338         return 0;
1339 }
1340
1341 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1342         assert_return(s, -EINVAL);
1343         assert_return(events, -EINVAL);
1344         assert_return(s->type == SOURCE_IO, -EDOM);
1345         assert_return(!event_pid_changed(s->event), -ECHILD);
1346
1347         *events = s->io.events;
1348         return 0;
1349 }
1350
1351 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1352         int r;
1353
1354         assert_return(s, -EINVAL);
1355         assert_return(s->type == SOURCE_IO, -EDOM);
1356         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1357         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1358         assert_return(!event_pid_changed(s->event), -ECHILD);
1359
1360         /* edge-triggered updates are never skipped, so we can reset edges */
1361         if (s->io.events == events && !(events & EPOLLET))
1362                 return 0;
1363
1364         if (s->enabled != SD_EVENT_OFF) {
1365                 r = source_io_register(s, s->enabled, events);
1366                 if (r < 0)
1367                         return r;
1368         }
1369
1370         s->io.events = events;
1371         source_set_pending(s, false);
1372
1373         return 0;
1374 }
1375
1376 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1377         assert_return(s, -EINVAL);
1378         assert_return(revents, -EINVAL);
1379         assert_return(s->type == SOURCE_IO, -EDOM);
1380         assert_return(s->pending, -ENODATA);
1381         assert_return(!event_pid_changed(s->event), -ECHILD);
1382
1383         *revents = s->io.revents;
1384         return 0;
1385 }
1386
1387 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1388         assert_return(s, -EINVAL);
1389         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1390         assert_return(!event_pid_changed(s->event), -ECHILD);
1391
1392         return s->signal.sig;
1393 }
1394
1395 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1396         assert_return(s, -EINVAL);
1397         assert_return(!event_pid_changed(s->event), -ECHILD);
1398
1399         return s->priority;
1400 }
1401
1402 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1403         assert_return(s, -EINVAL);
1404         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1405         assert_return(!event_pid_changed(s->event), -ECHILD);
1406
1407         if (s->priority == priority)
1408                 return 0;
1409
1410         s->priority = priority;
1411
1412         if (s->pending)
1413                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1414
1415         if (s->prepare)
1416                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1417
1418         if (s->type == SOURCE_EXIT)
1419                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1420
1421         return 0;
1422 }
1423
1424 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1425         assert_return(s, -EINVAL);
1426         assert_return(m, -EINVAL);
1427         assert_return(!event_pid_changed(s->event), -ECHILD);
1428
1429         *m = s->enabled;
1430         return 0;
1431 }
1432
1433 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1434         int r;
1435
1436         assert_return(s, -EINVAL);
1437         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1438         assert_return(!event_pid_changed(s->event), -ECHILD);
1439
1440         /* If we are dead anyway, we are fine with turning off
1441          * sources, but everything else needs to fail. */
1442         if (s->event->state == SD_EVENT_FINISHED)
1443                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1444
1445         if (s->enabled == m)
1446                 return 0;
1447
1448         if (m == SD_EVENT_OFF) {
1449
1450                 switch (s->type) {
1451
1452                 case SOURCE_IO:
1453                         r = source_io_unregister(s);
1454                         if (r < 0)
1455                                 return r;
1456
1457                         s->enabled = m;
1458                         break;
1459
1460                 case SOURCE_TIME_REALTIME:
1461                 case SOURCE_TIME_BOOTTIME:
1462                 case SOURCE_TIME_MONOTONIC:
1463                 case SOURCE_TIME_REALTIME_ALARM:
1464                 case SOURCE_TIME_BOOTTIME_ALARM: {
1465                         struct clock_data *d;
1466
1467                         s->enabled = m;
1468                         d = event_get_clock_data(s->event, s->type);
1469                         assert(d);
1470
1471                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1472                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1473                         d->needs_rearm = true;
1474                         break;
1475                 }
1476
1477                 case SOURCE_SIGNAL:
1478                         assert(need_signal(s->event, s->signal.sig));
1479
1480                         s->enabled = m;
1481
1482                         if (!need_signal(s->event, s->signal.sig)) {
1483                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1484
1485                                 (void) event_update_signal_fd(s->event);
1486                                 /* If disabling failed, we might get a spurious event,
1487                                  * but otherwise nothing bad should happen. */
1488                         }
1489
1490                         break;
1491
1492                 case SOURCE_CHILD:
1493                         assert(need_signal(s->event, SIGCHLD));
1494
1495                         s->enabled = m;
1496
1497                         assert(s->event->n_enabled_child_sources > 0);
1498                         s->event->n_enabled_child_sources--;
1499
1500                         if (!need_signal(s->event, SIGCHLD)) {
1501                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1502
1503                                 (void) event_update_signal_fd(s->event);
1504                         }
1505
1506                         break;
1507
1508                 case SOURCE_EXIT:
1509                         s->enabled = m;
1510                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1511                         break;
1512
1513                 case SOURCE_DEFER:
1514                 case SOURCE_POST:
1515                         s->enabled = m;
1516                         break;
1517
1518                 default:
1519                         assert_not_reached("Wut? I shouldn't exist.");
1520                 }
1521
1522         } else {
1523                 switch (s->type) {
1524
1525                 case SOURCE_IO:
1526                         r = source_io_register(s, m, s->io.events);
1527                         if (r < 0)
1528                                 return r;
1529
1530                         s->enabled = m;
1531                         break;
1532
1533                 case SOURCE_TIME_REALTIME:
1534                 case SOURCE_TIME_BOOTTIME:
1535                 case SOURCE_TIME_MONOTONIC:
1536                 case SOURCE_TIME_REALTIME_ALARM:
1537                 case SOURCE_TIME_BOOTTIME_ALARM: {
1538                         struct clock_data *d;
1539
1540                         s->enabled = m;
1541                         d = event_get_clock_data(s->event, s->type);
1542                         assert(d);
1543
1544                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1545                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1546                         d->needs_rearm = true;
1547                         break;
1548                 }
1549
1550                 case SOURCE_SIGNAL:
1551                         /* Check status before enabling. */
1552                         if (!need_signal(s->event, s->signal.sig)) {
1553                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1554
1555                                 r = event_update_signal_fd(s->event);
1556                                 if (r < 0) {
1557                                         s->enabled = SD_EVENT_OFF;
1558                                         return r;
1559                                 }
1560                         }
1561
1562                         s->enabled = m;
1563                         break;
1564
1565                 case SOURCE_CHILD:
1566                         /* Check status before enabling. */
1567                         if (s->enabled == SD_EVENT_OFF) {
1568                                 if (!need_signal(s->event, SIGCHLD)) {
1569                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1570
1571                                         r = event_update_signal_fd(s->event);
1572                                         if (r < 0) {
1573                                                 s->enabled = SD_EVENT_OFF;
1574                                                 return r;
1575                                         }
1576                                 }
1577
1578                                 s->event->n_enabled_child_sources++;
1579                         }
1580
1581                         s->enabled = m;
1582                         break;
1583
1584                 case SOURCE_EXIT:
1585                         s->enabled = m;
1586                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1587                         break;
1588
1589                 case SOURCE_DEFER:
1590                 case SOURCE_POST:
1591                         s->enabled = m;
1592                         break;
1593
1594                 default:
1595                         assert_not_reached("Wut? I shouldn't exist.");
1596                 }
1597         }
1598
1599         if (s->pending)
1600                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1601
1602         if (s->prepare)
1603                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1604
1605         return 0;
1606 }
1607
1608 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1609         assert_return(s, -EINVAL);
1610         assert_return(usec, -EINVAL);
1611         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1612         assert_return(!event_pid_changed(s->event), -ECHILD);
1613
1614         *usec = s->time.next;
1615         return 0;
1616 }
1617
1618 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1619         struct clock_data *d;
1620
1621         assert_return(s, -EINVAL);
1622         assert_return(usec != (uint64_t) -1, -EINVAL);
1623         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1624         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1625         assert_return(!event_pid_changed(s->event), -ECHILD);
1626
1627         s->time.next = usec;
1628
1629         source_set_pending(s, false);
1630
1631         d = event_get_clock_data(s->event, s->type);
1632         assert(d);
1633
1634         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1635         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1636         d->needs_rearm = true;
1637
1638         return 0;
1639 }
1640
1641 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1642         assert_return(s, -EINVAL);
1643         assert_return(usec, -EINVAL);
1644         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1645         assert_return(!event_pid_changed(s->event), -ECHILD);
1646
1647         *usec = s->time.accuracy;
1648         return 0;
1649 }
1650
1651 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1652         struct clock_data *d;
1653
1654         assert_return(s, -EINVAL);
1655         assert_return(usec != (uint64_t) -1, -EINVAL);
1656         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1657         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1658         assert_return(!event_pid_changed(s->event), -ECHILD);
1659
1660         if (usec == 0)
1661                 usec = DEFAULT_ACCURACY_USEC;
1662
1663         s->time.accuracy = usec;
1664
1665         source_set_pending(s, false);
1666
1667         d = event_get_clock_data(s->event, s->type);
1668         assert(d);
1669
1670         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1671         d->needs_rearm = true;
1672
1673         return 0;
1674 }
1675
1676 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1677         assert_return(s, -EINVAL);
1678         assert_return(clock, -EINVAL);
1679         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1680         assert_return(!event_pid_changed(s->event), -ECHILD);
1681
1682         *clock = event_source_type_to_clock(s->type);
1683         return 0;
1684 }
1685
1686 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1687         assert_return(s, -EINVAL);
1688         assert_return(pid, -EINVAL);
1689         assert_return(s->type == SOURCE_CHILD, -EDOM);
1690         assert_return(!event_pid_changed(s->event), -ECHILD);
1691
1692         *pid = s->child.pid;
1693         return 0;
1694 }
1695
1696 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1697         int r;
1698
1699         assert_return(s, -EINVAL);
1700         assert_return(s->type != SOURCE_EXIT, -EDOM);
1701         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1702         assert_return(!event_pid_changed(s->event), -ECHILD);
1703
1704         if (s->prepare == callback)
1705                 return 0;
1706
1707         if (callback && s->prepare) {
1708                 s->prepare = callback;
1709                 return 0;
1710         }
1711
1712         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1713         if (r < 0)
1714                 return r;
1715
1716         s->prepare = callback;
1717
1718         if (callback) {
1719                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1720                 if (r < 0)
1721                         return r;
1722         } else
1723                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1724
1725         return 0;
1726 }
1727
1728 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1729         assert_return(s, NULL);
1730
1731         return s->userdata;
1732 }
1733
1734 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1735         void *ret;
1736
1737         assert_return(s, NULL);
1738
1739         ret = s->userdata;
1740         s->userdata = userdata;
1741
1742         return ret;
1743 }
1744
1745 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1746         usec_t c;
1747         assert(e);
1748         assert(a <= b);
1749
1750         if (a <= 0)
1751                 return 0;
1752
1753         if (b <= a + 1)
1754                 return a;
1755
1756         initialize_perturb(e);
1757
1758         /*
1759           Find a good time to wake up again between times a and b. We
1760           have two goals here:
1761
1762           a) We want to wake up as seldom as possible, hence prefer
1763              later times over earlier times.
1764
1765           b) But if we have to wake up, then let's make sure to
1766              dispatch as much as possible on the entire system.
1767
1768           We implement this by waking up everywhere at the same time
1769           within any given minute if we can, synchronised via the
1770           perturbation value determined from the boot ID. If we can't,
1771           then we try to find the same spot in every 10s, then 1s and
1772           then 250ms step. Otherwise, we pick the last possible time
1773           to wake up.
1774         */
1775
1776         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1777         if (c >= b) {
1778                 if (_unlikely_(c < USEC_PER_MINUTE))
1779                         return b;
1780
1781                 c -= USEC_PER_MINUTE;
1782         }
1783
1784         if (c >= a)
1785                 return c;
1786
1787         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1788         if (c >= b) {
1789                 if (_unlikely_(c < USEC_PER_SEC*10))
1790                         return b;
1791
1792                 c -= USEC_PER_SEC*10;
1793         }
1794
1795         if (c >= a)
1796                 return c;
1797
1798         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1799         if (c >= b) {
1800                 if (_unlikely_(c < USEC_PER_SEC))
1801                         return b;
1802
1803                 c -= USEC_PER_SEC;
1804         }
1805
1806         if (c >= a)
1807                 return c;
1808
1809         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1810         if (c >= b) {
1811                 if (_unlikely_(c < USEC_PER_MSEC*250))
1812                         return b;
1813
1814                 c -= USEC_PER_MSEC*250;
1815         }
1816
1817         if (c >= a)
1818                 return c;
1819
1820         return b;
1821 }
1822
1823 static int event_arm_timer(
1824                 sd_event *e,
1825                 struct clock_data *d) {
1826
1827         struct itimerspec its = {};
1828         sd_event_source *a, *b;
1829         usec_t t;
1830         int r;
1831
1832         assert(e);
1833         assert(d);
1834
1835         if (!d->needs_rearm)
1836                 return 0;
1837         else
1838                 d->needs_rearm = false;
1839
1840         a = prioq_peek(d->earliest);
1841         if (!a || a->enabled == SD_EVENT_OFF) {
1842
1843                 if (d->fd < 0)
1844                         return 0;
1845
1846                 if (d->next == USEC_INFINITY)
1847                         return 0;
1848
1849                 /* disarm */
1850                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1851                 if (r < 0)
1852                         return r;
1853
1854                 d->next = USEC_INFINITY;
1855                 return 0;
1856         }
1857
1858         b = prioq_peek(d->latest);
1859         assert_se(b && b->enabled != SD_EVENT_OFF);
1860
1861         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1862         if (d->next == t)
1863                 return 0;
1864
1865         assert_se(d->fd >= 0);
1866
1867         if (t == 0) {
1868                 /* We don' want to disarm here, just mean some time looooong ago. */
1869                 its.it_value.tv_sec = 0;
1870                 its.it_value.tv_nsec = 1;
1871         } else
1872                 timespec_store(&its.it_value, t);
1873
1874         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1875         if (r < 0)
1876                 return -errno;
1877
1878         d->next = t;
1879         return 0;
1880 }
1881
1882 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1883         assert(e);
1884         assert(s);
1885         assert(s->type == SOURCE_IO);
1886
1887         /* If the event source was already pending, we just OR in the
1888          * new revents, otherwise we reset the value. The ORing is
1889          * necessary to handle EPOLLONESHOT events properly where
1890          * readability might happen independently of writability, and
1891          * we need to keep track of both */
1892
1893         if (s->pending)
1894                 s->io.revents |= revents;
1895         else
1896                 s->io.revents = revents;
1897
1898         return source_set_pending(s, true);
1899 }
1900
1901 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1902         uint64_t x;
1903         ssize_t ss;
1904
1905         assert(e);
1906         assert(fd >= 0);
1907
1908         assert_return(events == EPOLLIN, -EIO);
1909
1910         ss = read(fd, &x, sizeof(x));
1911         if (ss < 0) {
1912                 if (errno == EAGAIN || errno == EINTR)
1913                         return 0;
1914
1915                 return -errno;
1916         }
1917
1918         if (_unlikely_(ss != sizeof(x)))
1919                 return -EIO;
1920
1921         if (next)
1922                 *next = USEC_INFINITY;
1923
1924         return 0;
1925 }
1926
1927 static int process_timer(
1928                 sd_event *e,
1929                 usec_t n,
1930                 struct clock_data *d) {
1931
1932         sd_event_source *s;
1933         int r;
1934
1935         assert(e);
1936         assert(d);
1937
1938         for (;;) {
1939                 s = prioq_peek(d->earliest);
1940                 if (!s ||
1941                     s->time.next > n ||
1942                     s->enabled == SD_EVENT_OFF ||
1943                     s->pending)
1944                         break;
1945
1946                 r = source_set_pending(s, true);
1947                 if (r < 0)
1948                         return r;
1949
1950                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1951                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1952                 d->needs_rearm = true;
1953         }
1954
1955         return 0;
1956 }
1957
1958 static int process_child(sd_event *e) {
1959         sd_event_source *s;
1960         Iterator i;
1961         int r;
1962
1963         assert(e);
1964
1965         e->need_process_child = false;
1966
1967         /*
1968            So, this is ugly. We iteratively invoke waitid() with P_PID
1969            + WNOHANG for each PID we wait for, instead of using
1970            P_ALL. This is because we only want to get child
1971            information of very specific child processes, and not all
1972            of them. We might not have processed the SIGCHLD even of a
1973            previous invocation and we don't want to maintain a
1974            unbounded *per-child* event queue, hence we really don't
1975            want anything flushed out of the kernel's queue that we
1976            don't care about. Since this is O(n) this means that if you
1977            have a lot of processes you probably want to handle SIGCHLD
1978            yourself.
1979
1980            We do not reap the children here (by using WNOWAIT), this
1981            is only done after the event source is dispatched so that
1982            the callback still sees the process as a zombie.
1983         */
1984
1985         HASHMAP_FOREACH(s, e->child_sources, i) {
1986                 assert(s->type == SOURCE_CHILD);
1987
1988                 if (s->pending)
1989                         continue;
1990
1991                 if (s->enabled == SD_EVENT_OFF)
1992                         continue;
1993
1994                 zero(s->child.siginfo);
1995                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1996                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1997                 if (r < 0)
1998                         return -errno;
1999
2000                 if (s->child.siginfo.si_pid != 0) {
2001                         bool zombie =
2002                                 s->child.siginfo.si_code == CLD_EXITED ||
2003                                 s->child.siginfo.si_code == CLD_KILLED ||
2004                                 s->child.siginfo.si_code == CLD_DUMPED;
2005
2006                         if (!zombie && (s->child.options & WEXITED)) {
2007                                 /* If the child isn't dead then let's
2008                                  * immediately remove the state change
2009                                  * from the queue, since there's no
2010                                  * benefit in leaving it queued */
2011
2012                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2013                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2014                         }
2015
2016                         r = source_set_pending(s, true);
2017                         if (r < 0)
2018                                 return r;
2019                 }
2020         }
2021
2022         return 0;
2023 }
2024
2025 static int process_signal(sd_event *e, uint32_t events) {
2026         bool read_one = false;
2027         int r;
2028
2029         assert(e);
2030
2031         assert_return(events == EPOLLIN, -EIO);
2032
2033         for (;;) {
2034                 struct signalfd_siginfo si;
2035                 ssize_t n;
2036                 sd_event_source *s = NULL;
2037
2038                 n = read(e->signal_fd, &si, sizeof(si));
2039                 if (n < 0) {
2040                         if (errno == EAGAIN || errno == EINTR)
2041                                 return read_one;
2042
2043                         return -errno;
2044                 }
2045
2046                 if (_unlikely_(n != sizeof(si)))
2047                         return -EIO;
2048
2049                 assert(si.ssi_signo < _NSIG);
2050
2051                 read_one = true;
2052
2053                 if (si.ssi_signo == SIGCHLD) {
2054                         r = process_child(e);
2055                         if (r < 0)
2056                                 return r;
2057                         if (r > 0)
2058                                 continue;
2059                 }
2060
2061                 if (e->signal_sources)
2062                         s = e->signal_sources[si.ssi_signo];
2063
2064                 if (!s)
2065                         continue;
2066
2067                 s->signal.siginfo = si;
2068                 r = source_set_pending(s, true);
2069                 if (r < 0)
2070                         return r;
2071         }
2072 }
2073
2074 static int source_dispatch(sd_event_source *s) {
2075         int r = 0;
2076
2077         assert(s);
2078         assert(s->pending || s->type == SOURCE_EXIT);
2079
2080         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2081                 r = source_set_pending(s, false);
2082                 if (r < 0)
2083                         return r;
2084         }
2085
2086         if (s->type != SOURCE_POST) {
2087                 sd_event_source *z;
2088                 Iterator i;
2089
2090                 /* If we execute a non-post source, let's mark all
2091                  * post sources as pending */
2092
2093                 SET_FOREACH(z, s->event->post_sources, i) {
2094                         if (z->enabled == SD_EVENT_OFF)
2095                                 continue;
2096
2097                         r = source_set_pending(z, true);
2098                         if (r < 0)
2099                                 return r;
2100                 }
2101         }
2102
2103         if (s->enabled == SD_EVENT_ONESHOT) {
2104                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2105                 if (r < 0)
2106                         return r;
2107         }
2108
2109         s->dispatching = true;
2110
2111         switch (s->type) {
2112
2113         case SOURCE_IO:
2114                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2115                 break;
2116
2117         case SOURCE_TIME_REALTIME:
2118         case SOURCE_TIME_BOOTTIME:
2119         case SOURCE_TIME_MONOTONIC:
2120         case SOURCE_TIME_REALTIME_ALARM:
2121         case SOURCE_TIME_BOOTTIME_ALARM:
2122                 r = s->time.callback(s, s->time.next, s->userdata);
2123                 break;
2124
2125         case SOURCE_SIGNAL:
2126                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2127                 break;
2128
2129         case SOURCE_CHILD: {
2130                 bool zombie;
2131
2132                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2133                          s->child.siginfo.si_code == CLD_KILLED ||
2134                          s->child.siginfo.si_code == CLD_DUMPED;
2135
2136                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2137
2138                 /* Now, reap the PID for good. */
2139                 if (zombie)
2140                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2141
2142                 break;
2143         }
2144
2145         case SOURCE_DEFER:
2146                 r = s->defer.callback(s, s->userdata);
2147                 break;
2148
2149         case SOURCE_POST:
2150                 r = s->post.callback(s, s->userdata);
2151                 break;
2152
2153         case SOURCE_EXIT:
2154                 r = s->exit.callback(s, s->userdata);
2155                 break;
2156
2157         case SOURCE_WATCHDOG:
2158         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2159         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2160                 assert_not_reached("Wut? I shouldn't exist.");
2161         }
2162
2163         s->dispatching = false;
2164
2165         if (r < 0) {
2166                 if (s->description)
2167                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2168                 else
2169                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2170         }
2171
2172         if (s->n_ref == 0)
2173                 source_free(s);
2174         else if (r < 0)
2175                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2176
2177         return 1;
2178 }
2179
2180 static int event_prepare(sd_event *e) {
2181         int r;
2182
2183         assert(e);
2184
2185         for (;;) {
2186                 sd_event_source *s;
2187
2188                 s = prioq_peek(e->prepare);
2189                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2190                         break;
2191
2192                 s->prepare_iteration = e->iteration;
2193                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2194                 if (r < 0)
2195                         return r;
2196
2197                 assert(s->prepare);
2198
2199                 s->dispatching = true;
2200                 r = s->prepare(s, s->userdata);
2201                 s->dispatching = false;
2202
2203                 if (r < 0) {
2204                         if (s->description)
2205                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2206                         else
2207                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2208                 }
2209
2210                 if (s->n_ref == 0)
2211                         source_free(s);
2212                 else if (r < 0)
2213                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2214         }
2215
2216         return 0;
2217 }
2218
2219 static int dispatch_exit(sd_event *e) {
2220         sd_event_source *p;
2221         int r;
2222
2223         assert(e);
2224
2225         p = prioq_peek(e->exit);
2226         if (!p || p->enabled == SD_EVENT_OFF) {
2227                 e->state = SD_EVENT_FINISHED;
2228                 return 0;
2229         }
2230
2231         sd_event_ref(e);
2232         e->iteration++;
2233         e->state = SD_EVENT_EXITING;
2234
2235         r = source_dispatch(p);
2236
2237         e->state = SD_EVENT_PASSIVE;
2238         sd_event_unref(e);
2239
2240         return r;
2241 }
2242
2243 static sd_event_source* event_next_pending(sd_event *e) {
2244         sd_event_source *p;
2245
2246         assert(e);
2247
2248         p = prioq_peek(e->pending);
2249         if (!p)
2250                 return NULL;
2251
2252         if (p->enabled == SD_EVENT_OFF)
2253                 return NULL;
2254
2255         return p;
2256 }
2257
2258 static int arm_watchdog(sd_event *e) {
2259         struct itimerspec its = {};
2260         usec_t t;
2261         int r;
2262
2263         assert(e);
2264         assert(e->watchdog_fd >= 0);
2265
2266         t = sleep_between(e,
2267                           e->watchdog_last + (e->watchdog_period / 2),
2268                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2269
2270         timespec_store(&its.it_value, t);
2271
2272         /* Make sure we never set the watchdog to 0, which tells the
2273          * kernel to disable it. */
2274         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2275                 its.it_value.tv_nsec = 1;
2276
2277         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2278         if (r < 0)
2279                 return -errno;
2280
2281         return 0;
2282 }
2283
2284 static int process_watchdog(sd_event *e) {
2285         assert(e);
2286
2287         if (!e->watchdog)
2288                 return 0;
2289
2290         /* Don't notify watchdog too often */
2291         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2292                 return 0;
2293
2294         sd_notify(false, "WATCHDOG=1");
2295         e->watchdog_last = e->timestamp.monotonic;
2296
2297         return arm_watchdog(e);
2298 }
2299
2300 _public_ int sd_event_prepare(sd_event *e) {
2301         int r;
2302
2303         assert_return(e, -EINVAL);
2304         assert_return(!event_pid_changed(e), -ECHILD);
2305         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2306         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2307
2308         if (e->exit_requested)
2309                 goto pending;
2310
2311         e->iteration++;
2312
2313         r = event_prepare(e);
2314         if (r < 0)
2315                 return r;
2316
2317         r = event_arm_timer(e, &e->realtime);
2318         if (r < 0)
2319                 return r;
2320
2321         r = event_arm_timer(e, &e->boottime);
2322         if (r < 0)
2323                 return r;
2324
2325         r = event_arm_timer(e, &e->monotonic);
2326         if (r < 0)
2327                 return r;
2328
2329         r = event_arm_timer(e, &e->realtime_alarm);
2330         if (r < 0)
2331                 return r;
2332
2333         r = event_arm_timer(e, &e->boottime_alarm);
2334         if (r < 0)
2335                 return r;
2336
2337         if (event_next_pending(e) || e->need_process_child)
2338                 goto pending;
2339
2340         e->state = SD_EVENT_PREPARED;
2341
2342         return 0;
2343
2344 pending:
2345         e->state = SD_EVENT_PREPARED;
2346         r = sd_event_wait(e, 0);
2347         if (r == 0)
2348                 e->state = SD_EVENT_PREPARED;
2349
2350         return r;
2351 }
2352
2353 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2354         struct epoll_event *ev_queue;
2355         unsigned ev_queue_max;
2356         int r, m, i;
2357
2358         assert_return(e, -EINVAL);
2359         assert_return(!event_pid_changed(e), -ECHILD);
2360         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2361         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2362
2363         if (e->exit_requested) {
2364                 e->state = SD_EVENT_PENDING;
2365                 return 1;
2366         }
2367
2368         ev_queue_max = MAX(e->n_sources, 1u);
2369         ev_queue = newa(struct epoll_event, ev_queue_max);
2370
2371         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2372                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2373         if (m < 0) {
2374                 if (errno == EINTR) {
2375                         e->state = SD_EVENT_PENDING;
2376                         return 1;
2377                 }
2378
2379                 r = -errno;
2380
2381                 goto finish;
2382         }
2383
2384         dual_timestamp_get(&e->timestamp);
2385         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2386
2387         for (i = 0; i < m; i++) {
2388
2389                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2390                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2391                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2392                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2393                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2394                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2395                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2396                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2397                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2398                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2399                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2400                         r = process_signal(e, ev_queue[i].events);
2401                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2402                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2403                 else
2404                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2405
2406                 if (r < 0)
2407                         goto finish;
2408         }
2409
2410         r = process_watchdog(e);
2411         if (r < 0)
2412                 goto finish;
2413
2414         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2415         if (r < 0)
2416                 goto finish;
2417
2418         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2419         if (r < 0)
2420                 goto finish;
2421
2422         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2423         if (r < 0)
2424                 goto finish;
2425
2426         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2427         if (r < 0)
2428                 goto finish;
2429
2430         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2431         if (r < 0)
2432                 goto finish;
2433
2434         if (e->need_process_child) {
2435                 r = process_child(e);
2436                 if (r < 0)
2437                         goto finish;
2438         }
2439
2440         if (event_next_pending(e)) {
2441                 e->state = SD_EVENT_PENDING;
2442
2443                 return 1;
2444         }
2445
2446         r = 0;
2447
2448 finish:
2449         e->state = SD_EVENT_PASSIVE;
2450
2451         return r;
2452 }
2453
2454 _public_ int sd_event_dispatch(sd_event *e) {
2455         sd_event_source *p;
2456         int r;
2457
2458         assert_return(e, -EINVAL);
2459         assert_return(!event_pid_changed(e), -ECHILD);
2460         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2461         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2462
2463         if (e->exit_requested)
2464                 return dispatch_exit(e);
2465
2466         p = event_next_pending(e);
2467         if (p) {
2468                 sd_event_ref(e);
2469
2470                 e->state = SD_EVENT_RUNNING;
2471                 r = source_dispatch(p);
2472                 e->state = SD_EVENT_PASSIVE;
2473
2474                 sd_event_unref(e);
2475
2476                 return r;
2477         }
2478
2479         e->state = SD_EVENT_PASSIVE;
2480
2481         return 1;
2482 }
2483
2484 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2485         int r;
2486
2487         assert_return(e, -EINVAL);
2488         assert_return(!event_pid_changed(e), -ECHILD);
2489         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2490         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2491
2492         r = sd_event_prepare(e);
2493         if (r > 0)
2494                 return sd_event_dispatch(e);
2495         else if (r < 0)
2496                 return r;
2497
2498         r = sd_event_wait(e, timeout);
2499         if (r > 0)
2500                 return sd_event_dispatch(e);
2501         else
2502                 return r;
2503 }
2504
2505 _public_ int sd_event_loop(sd_event *e) {
2506         int r;
2507
2508         assert_return(e, -EINVAL);
2509         assert_return(!event_pid_changed(e), -ECHILD);
2510         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2511
2512         sd_event_ref(e);
2513
2514         while (e->state != SD_EVENT_FINISHED) {
2515                 r = sd_event_run(e, (uint64_t) -1);
2516                 if (r < 0)
2517                         goto finish;
2518         }
2519
2520         r = e->exit_code;
2521
2522 finish:
2523         sd_event_unref(e);
2524         return r;
2525 }
2526
2527 _public_ int sd_event_get_fd(sd_event *e) {
2528
2529         assert_return(e, -EINVAL);
2530         assert_return(!event_pid_changed(e), -ECHILD);
2531
2532         return e->epoll_fd;
2533 }
2534
2535 _public_ int sd_event_get_state(sd_event *e) {
2536         assert_return(e, -EINVAL);
2537         assert_return(!event_pid_changed(e), -ECHILD);
2538
2539         return e->state;
2540 }
2541
2542 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2543         assert_return(e, -EINVAL);
2544         assert_return(code, -EINVAL);
2545         assert_return(!event_pid_changed(e), -ECHILD);
2546
2547         if (!e->exit_requested)
2548                 return -ENODATA;
2549
2550         *code = e->exit_code;
2551         return 0;
2552 }
2553
2554 _public_ int sd_event_exit(sd_event *e, int code) {
2555         assert_return(e, -EINVAL);
2556         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2557         assert_return(!event_pid_changed(e), -ECHILD);
2558
2559         e->exit_requested = true;
2560         e->exit_code = code;
2561
2562         return 0;
2563 }
2564
2565 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2566         assert_return(e, -EINVAL);
2567         assert_return(usec, -EINVAL);
2568         assert_return(!event_pid_changed(e), -ECHILD);
2569
2570         /* If we haven't run yet, just get the actual time */
2571         if (!dual_timestamp_is_set(&e->timestamp))
2572                 return -ENODATA;
2573
2574         switch (clock) {
2575
2576         case CLOCK_REALTIME:
2577         case CLOCK_REALTIME_ALARM:
2578                 *usec = e->timestamp.realtime;
2579                 break;
2580
2581         case CLOCK_MONOTONIC:
2582                 *usec = e->timestamp.monotonic;
2583                 break;
2584
2585         case CLOCK_BOOTTIME:
2586         case CLOCK_BOOTTIME_ALARM:
2587                 *usec = e->timestamp_boottime;
2588                 break;
2589         }
2590
2591         return 0;
2592 }
2593
2594 _public_ int sd_event_default(sd_event **ret) {
2595
2596         static thread_local sd_event *default_event = NULL;
2597         sd_event *e = NULL;
2598         int r;
2599
2600         if (!ret)
2601                 return !!default_event;
2602
2603         if (default_event) {
2604                 *ret = sd_event_ref(default_event);
2605                 return 0;
2606         }
2607
2608         r = sd_event_new(&e);
2609         if (r < 0)
2610                 return r;
2611
2612         e->default_event_ptr = &default_event;
2613         e->tid = gettid();
2614         default_event = e;
2615
2616         *ret = e;
2617         return 1;
2618 }
2619
2620 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2621         assert_return(e, -EINVAL);
2622         assert_return(tid, -EINVAL);
2623         assert_return(!event_pid_changed(e), -ECHILD);
2624
2625         if (e->tid != 0) {
2626                 *tid = e->tid;
2627                 return 0;
2628         }
2629
2630         return -ENXIO;
2631 }
2632
2633 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2634         int r;
2635
2636         assert_return(e, -EINVAL);
2637         assert_return(!event_pid_changed(e), -ECHILD);
2638
2639         if (e->watchdog == !!b)
2640                 return e->watchdog;
2641
2642         if (b) {
2643                 struct epoll_event ev = {};
2644
2645                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2646                 if (r <= 0)
2647                         return r;
2648
2649                 /* Issue first ping immediately */
2650                 sd_notify(false, "WATCHDOG=1");
2651                 e->watchdog_last = now(CLOCK_MONOTONIC);
2652
2653                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2654                 if (e->watchdog_fd < 0)
2655                         return -errno;
2656
2657                 r = arm_watchdog(e);
2658                 if (r < 0)
2659                         goto fail;
2660
2661                 ev.events = EPOLLIN;
2662                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2663
2664                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2665                 if (r < 0) {
2666                         r = -errno;
2667                         goto fail;
2668                 }
2669
2670         } else {
2671                 if (e->watchdog_fd >= 0) {
2672                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2673                         e->watchdog_fd = safe_close(e->watchdog_fd);
2674                 }
2675         }
2676
2677         e->watchdog = !!b;
2678         return e->watchdog;
2679
2680 fail:
2681         e->watchdog_fd = safe_close(e->watchdog_fd);
2682         return r;
2683 }
2684
2685 _public_ int sd_event_get_watchdog(sd_event *e) {
2686         assert_return(e, -EINVAL);
2687         assert_return(!event_pid_changed(e), -ECHILD);
2688
2689         return e->watchdog;
2690 }