chiark / gitweb /
sd-bus,sd-event: unify error handling of object descriptions
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *description;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static int event_update_signal_fd(sd_event *e) {
602         struct epoll_event ev = {};
603         bool add_to_epoll;
604         int r;
605
606         assert(e);
607
608         add_to_epoll = e->signal_fd < 0;
609
610         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
611         if (r < 0)
612                 return -errno;
613
614         e->signal_fd = r;
615
616         if (!add_to_epoll)
617                 return 0;
618
619         ev.events = EPOLLIN;
620         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
621
622         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
623         if (r < 0) {
624                 e->signal_fd = safe_close(e->signal_fd);
625                 return -errno;
626         }
627
628         return 0;
629 }
630
631 static void source_disconnect(sd_event_source *s) {
632         sd_event *event;
633
634         assert(s);
635
636         if (!s->event)
637                 return;
638
639         assert(s->event->n_sources > 0);
640
641         switch (s->type) {
642
643         case SOURCE_IO:
644                 if (s->io.fd >= 0)
645                         source_io_unregister(s);
646
647                 break;
648
649         case SOURCE_TIME_REALTIME:
650         case SOURCE_TIME_BOOTTIME:
651         case SOURCE_TIME_MONOTONIC:
652         case SOURCE_TIME_REALTIME_ALARM:
653         case SOURCE_TIME_BOOTTIME_ALARM: {
654                 struct clock_data *d;
655
656                 d = event_get_clock_data(s->event, s->type);
657                 assert(d);
658
659                 prioq_remove(d->earliest, s, &s->time.earliest_index);
660                 prioq_remove(d->latest, s, &s->time.latest_index);
661                 d->needs_rearm = true;
662                 break;
663         }
664
665         case SOURCE_SIGNAL:
666                 if (s->signal.sig > 0) {
667                         if (s->event->signal_sources)
668                                 s->event->signal_sources[s->signal.sig] = NULL;
669
670                         /* If the signal was on and now it is off... */
671                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
672                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
673
674                                 (void) event_update_signal_fd(s->event);
675                                 /* If disabling failed, we might get a spurious event,
676                                  * but otherwise nothing bad should happen. */
677                         }
678                 }
679
680                 break;
681
682         case SOURCE_CHILD:
683                 if (s->child.pid > 0) {
684                         if (s->enabled != SD_EVENT_OFF) {
685                                 assert(s->event->n_enabled_child_sources > 0);
686                                 s->event->n_enabled_child_sources--;
687
688                                 /* We know the signal was on, if it is off now... */
689                                 if (!need_signal(s->event, SIGCHLD)) {
690                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
691
692                                         (void) event_update_signal_fd(s->event);
693                                         /* If disabling failed, we might get a spurious event,
694                                          * but otherwise nothing bad should happen. */
695                                 }
696                         }
697
698                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
699                 }
700
701                 break;
702
703         case SOURCE_DEFER:
704                 /* nothing */
705                 break;
706
707         case SOURCE_POST:
708                 set_remove(s->event->post_sources, s);
709                 break;
710
711         case SOURCE_EXIT:
712                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713                 break;
714
715         default:
716                 assert_not_reached("Wut? I shouldn't exist.");
717         }
718
719         if (s->pending)
720                 prioq_remove(s->event->pending, s, &s->pending_index);
721
722         if (s->prepare)
723                 prioq_remove(s->event->prepare, s, &s->prepare_index);
724
725         event = s->event;
726
727         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
728         s->event = NULL;
729         LIST_REMOVE(sources, event->sources, s);
730         event->n_sources--;
731
732         if (!s->floating)
733                 sd_event_unref(event);
734 }
735
736 static void source_free(sd_event_source *s) {
737         assert(s);
738
739         source_disconnect(s);
740         free(s->description);
741         free(s);
742 }
743
744 static int source_set_pending(sd_event_source *s, bool b) {
745         int r;
746
747         assert(s);
748         assert(s->type != SOURCE_EXIT);
749
750         if (s->pending == b)
751                 return 0;
752
753         s->pending = b;
754
755         if (b) {
756                 s->pending_iteration = s->event->iteration;
757
758                 r = prioq_put(s->event->pending, s, &s->pending_index);
759                 if (r < 0) {
760                         s->pending = false;
761                         return r;
762                 }
763         } else
764                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
765
766         if (EVENT_SOURCE_IS_TIME(s->type)) {
767                 struct clock_data *d;
768
769                 d = event_get_clock_data(s->event, s->type);
770                 assert(d);
771
772                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
773                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
774                 d->needs_rearm = true;
775         }
776
777         return 0;
778 }
779
780 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
781         sd_event_source *s;
782
783         assert(e);
784
785         s = new0(sd_event_source, 1);
786         if (!s)
787                 return NULL;
788
789         s->n_ref = 1;
790         s->event = e;
791         s->floating = floating;
792         s->type = type;
793         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
794
795         if (!floating)
796                 sd_event_ref(e);
797
798         LIST_PREPEND(sources, e->sources, s);
799         e->n_sources ++;
800
801         return s;
802 }
803
804 _public_ int sd_event_add_io(
805                 sd_event *e,
806                 sd_event_source **ret,
807                 int fd,
808                 uint32_t events,
809                 sd_event_io_handler_t callback,
810                 void *userdata) {
811
812         sd_event_source *s;
813         int r;
814
815         assert_return(e, -EINVAL);
816         assert_return(fd >= 0, -EINVAL);
817         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
818         assert_return(callback, -EINVAL);
819         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
820         assert_return(!event_pid_changed(e), -ECHILD);
821
822         s = source_new(e, !ret, SOURCE_IO);
823         if (!s)
824                 return -ENOMEM;
825
826         s->io.fd = fd;
827         s->io.events = events;
828         s->io.callback = callback;
829         s->userdata = userdata;
830         s->enabled = SD_EVENT_ON;
831
832         r = source_io_register(s, s->enabled, events);
833         if (r < 0) {
834                 source_free(s);
835                 return r;
836         }
837
838         if (ret)
839                 *ret = s;
840
841         return 0;
842 }
843
844 static void initialize_perturb(sd_event *e) {
845         sd_id128_t bootid = {};
846
847         /* When we sleep for longer, we try to realign the wakeup to
848            the same time wihtin each minute/second/250ms, so that
849            events all across the system can be coalesced into a single
850            CPU wakeup. However, let's take some system-specific
851            randomness for this value, so that in a network of systems
852            with synced clocks timer events are distributed a
853            bit. Here, we calculate a perturbation usec offset from the
854            boot ID. */
855
856         if (_likely_(e->perturb != USEC_INFINITY))
857                 return;
858
859         if (sd_id128_get_boot(&bootid) >= 0)
860                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
861 }
862
863 static int event_setup_timer_fd(
864                 sd_event *e,
865                 struct clock_data *d,
866                 clockid_t clock) {
867
868         struct epoll_event ev = {};
869         int r, fd;
870
871         assert(e);
872         assert(d);
873
874         if (_likely_(d->fd >= 0))
875                 return 0;
876
877         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
878         if (fd < 0)
879                 return -errno;
880
881         ev.events = EPOLLIN;
882         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
883
884         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
885         if (r < 0) {
886                 safe_close(fd);
887                 return -errno;
888         }
889
890         d->fd = fd;
891         return 0;
892 }
893
894 _public_ int sd_event_add_time(
895                 sd_event *e,
896                 sd_event_source **ret,
897                 clockid_t clock,
898                 uint64_t usec,
899                 uint64_t accuracy,
900                 sd_event_time_handler_t callback,
901                 void *userdata) {
902
903         EventSourceType type;
904         sd_event_source *s;
905         struct clock_data *d;
906         int r;
907
908         assert_return(e, -EINVAL);
909         assert_return(usec != (uint64_t) -1, -EINVAL);
910         assert_return(accuracy != (uint64_t) -1, -EINVAL);
911         assert_return(callback, -EINVAL);
912         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
913         assert_return(!event_pid_changed(e), -ECHILD);
914
915         type = clock_to_event_source_type(clock);
916         assert_return(type >= 0, -ENOTSUP);
917
918         d = event_get_clock_data(e, type);
919         assert(d);
920
921         if (!d->earliest) {
922                 d->earliest = prioq_new(earliest_time_prioq_compare);
923                 if (!d->earliest)
924                         return -ENOMEM;
925         }
926
927         if (!d->latest) {
928                 d->latest = prioq_new(latest_time_prioq_compare);
929                 if (!d->latest)
930                         return -ENOMEM;
931         }
932
933         if (d->fd < 0) {
934                 r = event_setup_timer_fd(e, d, clock);
935                 if (r < 0)
936                         return r;
937         }
938
939         s = source_new(e, !ret, type);
940         if (!s)
941                 return -ENOMEM;
942
943         s->time.next = usec;
944         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
945         s->time.callback = callback;
946         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
947         s->userdata = userdata;
948         s->enabled = SD_EVENT_ONESHOT;
949
950         d->needs_rearm = true;
951
952         r = prioq_put(d->earliest, s, &s->time.earliest_index);
953         if (r < 0)
954                 goto fail;
955
956         r = prioq_put(d->latest, s, &s->time.latest_index);
957         if (r < 0)
958                 goto fail;
959
960         if (ret)
961                 *ret = s;
962
963         return 0;
964
965 fail:
966         source_free(s);
967         return r;
968 }
969
970 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
971         assert(s);
972
973         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
974 }
975
976 _public_ int sd_event_add_signal(
977                 sd_event *e,
978                 sd_event_source **ret,
979                 int sig,
980                 sd_event_signal_handler_t callback,
981                 void *userdata) {
982
983         sd_event_source *s;
984         sigset_t ss;
985         int r;
986         bool previous;
987
988         assert_return(e, -EINVAL);
989         assert_return(sig > 0, -EINVAL);
990         assert_return(sig < _NSIG, -EINVAL);
991         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992         assert_return(!event_pid_changed(e), -ECHILD);
993
994         if (!callback)
995                 callback = signal_exit_callback;
996
997         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
998         if (r < 0)
999                 return -errno;
1000
1001         if (!sigismember(&ss, sig))
1002                 return -EBUSY;
1003
1004         if (!e->signal_sources) {
1005                 e->signal_sources = new0(sd_event_source*, _NSIG);
1006                 if (!e->signal_sources)
1007                         return -ENOMEM;
1008         } else if (e->signal_sources[sig])
1009                 return -EBUSY;
1010
1011         previous = need_signal(e, sig);
1012
1013         s = source_new(e, !ret, SOURCE_SIGNAL);
1014         if (!s)
1015                 return -ENOMEM;
1016
1017         s->signal.sig = sig;
1018         s->signal.callback = callback;
1019         s->userdata = userdata;
1020         s->enabled = SD_EVENT_ON;
1021
1022         e->signal_sources[sig] = s;
1023
1024         if (!previous) {
1025                 assert_se(sigaddset(&e->sigset, sig) == 0);
1026
1027                 r = event_update_signal_fd(e);
1028                 if (r < 0) {
1029                         source_free(s);
1030                         return r;
1031                 }
1032         }
1033
1034         /* Use the signal name as description for the event source by default */
1035         (void) sd_event_source_set_description(s, signal_to_string(sig));
1036
1037         if (ret)
1038                 *ret = s;
1039
1040         return 0;
1041 }
1042
1043 _public_ int sd_event_add_child(
1044                 sd_event *e,
1045                 sd_event_source **ret,
1046                 pid_t pid,
1047                 int options,
1048                 sd_event_child_handler_t callback,
1049                 void *userdata) {
1050
1051         sd_event_source *s;
1052         int r;
1053         bool previous;
1054
1055         assert_return(e, -EINVAL);
1056         assert_return(pid > 1, -EINVAL);
1057         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1058         assert_return(options != 0, -EINVAL);
1059         assert_return(callback, -EINVAL);
1060         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1061         assert_return(!event_pid_changed(e), -ECHILD);
1062
1063         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1064         if (r < 0)
1065                 return r;
1066
1067         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1068                 return -EBUSY;
1069
1070         previous = need_signal(e, SIGCHLD);
1071
1072         s = source_new(e, !ret, SOURCE_CHILD);
1073         if (!s)
1074                 return -ENOMEM;
1075
1076         s->child.pid = pid;
1077         s->child.options = options;
1078         s->child.callback = callback;
1079         s->userdata = userdata;
1080         s->enabled = SD_EVENT_ONESHOT;
1081
1082         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1083         if (r < 0) {
1084                 source_free(s);
1085                 return r;
1086         }
1087
1088         e->n_enabled_child_sources ++;
1089
1090         if (!previous) {
1091                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1092
1093                 r = event_update_signal_fd(e);
1094                 if (r < 0) {
1095                         source_free(s);
1096                         return r;
1097                 }
1098         }
1099
1100         e->need_process_child = true;
1101
1102         if (ret)
1103                 *ret = s;
1104
1105         return 0;
1106 }
1107
1108 _public_ int sd_event_add_defer(
1109                 sd_event *e,
1110                 sd_event_source **ret,
1111                 sd_event_handler_t callback,
1112                 void *userdata) {
1113
1114         sd_event_source *s;
1115         int r;
1116
1117         assert_return(e, -EINVAL);
1118         assert_return(callback, -EINVAL);
1119         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1120         assert_return(!event_pid_changed(e), -ECHILD);
1121
1122         s = source_new(e, !ret, SOURCE_DEFER);
1123         if (!s)
1124                 return -ENOMEM;
1125
1126         s->defer.callback = callback;
1127         s->userdata = userdata;
1128         s->enabled = SD_EVENT_ONESHOT;
1129
1130         r = source_set_pending(s, true);
1131         if (r < 0) {
1132                 source_free(s);
1133                 return r;
1134         }
1135
1136         if (ret)
1137                 *ret = s;
1138
1139         return 0;
1140 }
1141
1142 _public_ int sd_event_add_post(
1143                 sd_event *e,
1144                 sd_event_source **ret,
1145                 sd_event_handler_t callback,
1146                 void *userdata) {
1147
1148         sd_event_source *s;
1149         int r;
1150
1151         assert_return(e, -EINVAL);
1152         assert_return(callback, -EINVAL);
1153         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1154         assert_return(!event_pid_changed(e), -ECHILD);
1155
1156         r = set_ensure_allocated(&e->post_sources, NULL);
1157         if (r < 0)
1158                 return r;
1159
1160         s = source_new(e, !ret, SOURCE_POST);
1161         if (!s)
1162                 return -ENOMEM;
1163
1164         s->post.callback = callback;
1165         s->userdata = userdata;
1166         s->enabled = SD_EVENT_ON;
1167
1168         r = set_put(e->post_sources, s);
1169         if (r < 0) {
1170                 source_free(s);
1171                 return r;
1172         }
1173
1174         if (ret)
1175                 *ret = s;
1176
1177         return 0;
1178 }
1179
1180 _public_ int sd_event_add_exit(
1181                 sd_event *e,
1182                 sd_event_source **ret,
1183                 sd_event_handler_t callback,
1184                 void *userdata) {
1185
1186         sd_event_source *s;
1187         int r;
1188
1189         assert_return(e, -EINVAL);
1190         assert_return(callback, -EINVAL);
1191         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1192         assert_return(!event_pid_changed(e), -ECHILD);
1193
1194         if (!e->exit) {
1195                 e->exit = prioq_new(exit_prioq_compare);
1196                 if (!e->exit)
1197                         return -ENOMEM;
1198         }
1199
1200         s = source_new(e, !ret, SOURCE_EXIT);
1201         if (!s)
1202                 return -ENOMEM;
1203
1204         s->exit.callback = callback;
1205         s->userdata = userdata;
1206         s->exit.prioq_index = PRIOQ_IDX_NULL;
1207         s->enabled = SD_EVENT_ONESHOT;
1208
1209         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1210         if (r < 0) {
1211                 source_free(s);
1212                 return r;
1213         }
1214
1215         if (ret)
1216                 *ret = s;
1217
1218         return 0;
1219 }
1220
1221 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1222         assert_return(s, NULL);
1223
1224         assert(s->n_ref >= 1);
1225         s->n_ref++;
1226
1227         return s;
1228 }
1229
1230 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1231
1232         if (!s)
1233                 return NULL;
1234
1235         assert(s->n_ref >= 1);
1236         s->n_ref--;
1237
1238         if (s->n_ref <= 0) {
1239                 /* Here's a special hack: when we are called from a
1240                  * dispatch handler we won't free the event source
1241                  * immediately, but we will detach the fd from the
1242                  * epoll. This way it is safe for the caller to unref
1243                  * the event source and immediately close the fd, but
1244                  * we still retain a valid event source object after
1245                  * the callback. */
1246
1247                 if (s->dispatching) {
1248                         if (s->type == SOURCE_IO)
1249                                 source_io_unregister(s);
1250
1251                         source_disconnect(s);
1252                 } else
1253                         source_free(s);
1254         }
1255
1256         return NULL;
1257 }
1258
1259 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1260         assert_return(s, -EINVAL);
1261         assert_return(!event_pid_changed(s->event), -ECHILD);
1262
1263         return free_and_strdup(&s->description, description);
1264 }
1265
1266 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1267         assert_return(s, -EINVAL);
1268         assert_return(description, -EINVAL);
1269         assert_return(s->description, -ENXIO);
1270         assert_return(!event_pid_changed(s->event), -ECHILD);
1271
1272         *description = s->description;
1273         return 0;
1274 }
1275
1276 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1277         assert_return(s, NULL);
1278
1279         return s->event;
1280 }
1281
1282 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1283         assert_return(s, -EINVAL);
1284         assert_return(s->type != SOURCE_EXIT, -EDOM);
1285         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1286         assert_return(!event_pid_changed(s->event), -ECHILD);
1287
1288         return s->pending;
1289 }
1290
1291 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1292         assert_return(s, -EINVAL);
1293         assert_return(s->type == SOURCE_IO, -EDOM);
1294         assert_return(!event_pid_changed(s->event), -ECHILD);
1295
1296         return s->io.fd;
1297 }
1298
1299 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1300         int r;
1301
1302         assert_return(s, -EINVAL);
1303         assert_return(fd >= 0, -EINVAL);
1304         assert_return(s->type == SOURCE_IO, -EDOM);
1305         assert_return(!event_pid_changed(s->event), -ECHILD);
1306
1307         if (s->io.fd == fd)
1308                 return 0;
1309
1310         if (s->enabled == SD_EVENT_OFF) {
1311                 s->io.fd = fd;
1312                 s->io.registered = false;
1313         } else {
1314                 int saved_fd;
1315
1316                 saved_fd = s->io.fd;
1317                 assert(s->io.registered);
1318
1319                 s->io.fd = fd;
1320                 s->io.registered = false;
1321
1322                 r = source_io_register(s, s->enabled, s->io.events);
1323                 if (r < 0) {
1324                         s->io.fd = saved_fd;
1325                         s->io.registered = true;
1326                         return r;
1327                 }
1328
1329                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1330         }
1331
1332         return 0;
1333 }
1334
1335 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1336         assert_return(s, -EINVAL);
1337         assert_return(events, -EINVAL);
1338         assert_return(s->type == SOURCE_IO, -EDOM);
1339         assert_return(!event_pid_changed(s->event), -ECHILD);
1340
1341         *events = s->io.events;
1342         return 0;
1343 }
1344
1345 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1346         int r;
1347
1348         assert_return(s, -EINVAL);
1349         assert_return(s->type == SOURCE_IO, -EDOM);
1350         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1351         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1352         assert_return(!event_pid_changed(s->event), -ECHILD);
1353
1354         /* edge-triggered updates are never skipped, so we can reset edges */
1355         if (s->io.events == events && !(events & EPOLLET))
1356                 return 0;
1357
1358         if (s->enabled != SD_EVENT_OFF) {
1359                 r = source_io_register(s, s->enabled, events);
1360                 if (r < 0)
1361                         return r;
1362         }
1363
1364         s->io.events = events;
1365         source_set_pending(s, false);
1366
1367         return 0;
1368 }
1369
1370 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1371         assert_return(s, -EINVAL);
1372         assert_return(revents, -EINVAL);
1373         assert_return(s->type == SOURCE_IO, -EDOM);
1374         assert_return(s->pending, -ENODATA);
1375         assert_return(!event_pid_changed(s->event), -ECHILD);
1376
1377         *revents = s->io.revents;
1378         return 0;
1379 }
1380
1381 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1382         assert_return(s, -EINVAL);
1383         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1384         assert_return(!event_pid_changed(s->event), -ECHILD);
1385
1386         return s->signal.sig;
1387 }
1388
1389 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1390         assert_return(s, -EINVAL);
1391         assert_return(!event_pid_changed(s->event), -ECHILD);
1392
1393         return s->priority;
1394 }
1395
1396 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1397         assert_return(s, -EINVAL);
1398         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1399         assert_return(!event_pid_changed(s->event), -ECHILD);
1400
1401         if (s->priority == priority)
1402                 return 0;
1403
1404         s->priority = priority;
1405
1406         if (s->pending)
1407                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1408
1409         if (s->prepare)
1410                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1411
1412         if (s->type == SOURCE_EXIT)
1413                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1414
1415         return 0;
1416 }
1417
1418 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1419         assert_return(s, -EINVAL);
1420         assert_return(m, -EINVAL);
1421         assert_return(!event_pid_changed(s->event), -ECHILD);
1422
1423         *m = s->enabled;
1424         return 0;
1425 }
1426
1427 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1428         int r;
1429
1430         assert_return(s, -EINVAL);
1431         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1432         assert_return(!event_pid_changed(s->event), -ECHILD);
1433
1434         /* If we are dead anyway, we are fine with turning off
1435          * sources, but everything else needs to fail. */
1436         if (s->event->state == SD_EVENT_FINISHED)
1437                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1438
1439         if (s->enabled == m)
1440                 return 0;
1441
1442         if (m == SD_EVENT_OFF) {
1443
1444                 switch (s->type) {
1445
1446                 case SOURCE_IO:
1447                         r = source_io_unregister(s);
1448                         if (r < 0)
1449                                 return r;
1450
1451                         s->enabled = m;
1452                         break;
1453
1454                 case SOURCE_TIME_REALTIME:
1455                 case SOURCE_TIME_BOOTTIME:
1456                 case SOURCE_TIME_MONOTONIC:
1457                 case SOURCE_TIME_REALTIME_ALARM:
1458                 case SOURCE_TIME_BOOTTIME_ALARM: {
1459                         struct clock_data *d;
1460
1461                         s->enabled = m;
1462                         d = event_get_clock_data(s->event, s->type);
1463                         assert(d);
1464
1465                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1466                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1467                         d->needs_rearm = true;
1468                         break;
1469                 }
1470
1471                 case SOURCE_SIGNAL:
1472                         assert(need_signal(s->event, s->signal.sig));
1473
1474                         s->enabled = m;
1475
1476                         if (!need_signal(s->event, s->signal.sig)) {
1477                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1478
1479                                 (void) event_update_signal_fd(s->event);
1480                                 /* If disabling failed, we might get a spurious event,
1481                                  * but otherwise nothing bad should happen. */
1482                         }
1483
1484                         break;
1485
1486                 case SOURCE_CHILD:
1487                         assert(need_signal(s->event, SIGCHLD));
1488
1489                         s->enabled = m;
1490
1491                         assert(s->event->n_enabled_child_sources > 0);
1492                         s->event->n_enabled_child_sources--;
1493
1494                         if (!need_signal(s->event, SIGCHLD)) {
1495                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1496
1497                                 (void) event_update_signal_fd(s->event);
1498                         }
1499
1500                         break;
1501
1502                 case SOURCE_EXIT:
1503                         s->enabled = m;
1504                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1505                         break;
1506
1507                 case SOURCE_DEFER:
1508                 case SOURCE_POST:
1509                         s->enabled = m;
1510                         break;
1511
1512                 default:
1513                         assert_not_reached("Wut? I shouldn't exist.");
1514                 }
1515
1516         } else {
1517                 switch (s->type) {
1518
1519                 case SOURCE_IO:
1520                         r = source_io_register(s, m, s->io.events);
1521                         if (r < 0)
1522                                 return r;
1523
1524                         s->enabled = m;
1525                         break;
1526
1527                 case SOURCE_TIME_REALTIME:
1528                 case SOURCE_TIME_BOOTTIME:
1529                 case SOURCE_TIME_MONOTONIC:
1530                 case SOURCE_TIME_REALTIME_ALARM:
1531                 case SOURCE_TIME_BOOTTIME_ALARM: {
1532                         struct clock_data *d;
1533
1534                         s->enabled = m;
1535                         d = event_get_clock_data(s->event, s->type);
1536                         assert(d);
1537
1538                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1539                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1540                         d->needs_rearm = true;
1541                         break;
1542                 }
1543
1544                 case SOURCE_SIGNAL:
1545                         /* Check status before enabling. */
1546                         if (!need_signal(s->event, s->signal.sig)) {
1547                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1548
1549                                 r = event_update_signal_fd(s->event);
1550                                 if (r < 0) {
1551                                         s->enabled = SD_EVENT_OFF;
1552                                         return r;
1553                                 }
1554                         }
1555
1556                         s->enabled = m;
1557                         break;
1558
1559                 case SOURCE_CHILD:
1560                         /* Check status before enabling. */
1561                         if (s->enabled == SD_EVENT_OFF) {
1562                                 if (!need_signal(s->event, SIGCHLD)) {
1563                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1564
1565                                         r = event_update_signal_fd(s->event);
1566                                         if (r < 0) {
1567                                                 s->enabled = SD_EVENT_OFF;
1568                                                 return r;
1569                                         }
1570                                 }
1571
1572                                 s->event->n_enabled_child_sources++;
1573                         }
1574
1575                         s->enabled = m;
1576                         break;
1577
1578                 case SOURCE_EXIT:
1579                         s->enabled = m;
1580                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1581                         break;
1582
1583                 case SOURCE_DEFER:
1584                 case SOURCE_POST:
1585                         s->enabled = m;
1586                         break;
1587
1588                 default:
1589                         assert_not_reached("Wut? I shouldn't exist.");
1590                 }
1591         }
1592
1593         if (s->pending)
1594                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1595
1596         if (s->prepare)
1597                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1598
1599         return 0;
1600 }
1601
1602 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1603         assert_return(s, -EINVAL);
1604         assert_return(usec, -EINVAL);
1605         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1606         assert_return(!event_pid_changed(s->event), -ECHILD);
1607
1608         *usec = s->time.next;
1609         return 0;
1610 }
1611
1612 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1613         struct clock_data *d;
1614
1615         assert_return(s, -EINVAL);
1616         assert_return(usec != (uint64_t) -1, -EINVAL);
1617         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1618         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1619         assert_return(!event_pid_changed(s->event), -ECHILD);
1620
1621         s->time.next = usec;
1622
1623         source_set_pending(s, false);
1624
1625         d = event_get_clock_data(s->event, s->type);
1626         assert(d);
1627
1628         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1629         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1630         d->needs_rearm = true;
1631
1632         return 0;
1633 }
1634
1635 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1636         assert_return(s, -EINVAL);
1637         assert_return(usec, -EINVAL);
1638         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1639         assert_return(!event_pid_changed(s->event), -ECHILD);
1640
1641         *usec = s->time.accuracy;
1642         return 0;
1643 }
1644
1645 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1646         struct clock_data *d;
1647
1648         assert_return(s, -EINVAL);
1649         assert_return(usec != (uint64_t) -1, -EINVAL);
1650         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1651         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1652         assert_return(!event_pid_changed(s->event), -ECHILD);
1653
1654         if (usec == 0)
1655                 usec = DEFAULT_ACCURACY_USEC;
1656
1657         s->time.accuracy = usec;
1658
1659         source_set_pending(s, false);
1660
1661         d = event_get_clock_data(s->event, s->type);
1662         assert(d);
1663
1664         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1665         d->needs_rearm = true;
1666
1667         return 0;
1668 }
1669
1670 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1671         assert_return(s, -EINVAL);
1672         assert_return(clock, -EINVAL);
1673         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1674         assert_return(!event_pid_changed(s->event), -ECHILD);
1675
1676         *clock = event_source_type_to_clock(s->type);
1677         return 0;
1678 }
1679
1680 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1681         assert_return(s, -EINVAL);
1682         assert_return(pid, -EINVAL);
1683         assert_return(s->type == SOURCE_CHILD, -EDOM);
1684         assert_return(!event_pid_changed(s->event), -ECHILD);
1685
1686         *pid = s->child.pid;
1687         return 0;
1688 }
1689
1690 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1691         int r;
1692
1693         assert_return(s, -EINVAL);
1694         assert_return(s->type != SOURCE_EXIT, -EDOM);
1695         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1696         assert_return(!event_pid_changed(s->event), -ECHILD);
1697
1698         if (s->prepare == callback)
1699                 return 0;
1700
1701         if (callback && s->prepare) {
1702                 s->prepare = callback;
1703                 return 0;
1704         }
1705
1706         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1707         if (r < 0)
1708                 return r;
1709
1710         s->prepare = callback;
1711
1712         if (callback) {
1713                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1714                 if (r < 0)
1715                         return r;
1716         } else
1717                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1718
1719         return 0;
1720 }
1721
1722 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1723         assert_return(s, NULL);
1724
1725         return s->userdata;
1726 }
1727
1728 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1729         void *ret;
1730
1731         assert_return(s, NULL);
1732
1733         ret = s->userdata;
1734         s->userdata = userdata;
1735
1736         return ret;
1737 }
1738
1739 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1740         usec_t c;
1741         assert(e);
1742         assert(a <= b);
1743
1744         if (a <= 0)
1745                 return 0;
1746
1747         if (b <= a + 1)
1748                 return a;
1749
1750         initialize_perturb(e);
1751
1752         /*
1753           Find a good time to wake up again between times a and b. We
1754           have two goals here:
1755
1756           a) We want to wake up as seldom as possible, hence prefer
1757              later times over earlier times.
1758
1759           b) But if we have to wake up, then let's make sure to
1760              dispatch as much as possible on the entire system.
1761
1762           We implement this by waking up everywhere at the same time
1763           within any given minute if we can, synchronised via the
1764           perturbation value determined from the boot ID. If we can't,
1765           then we try to find the same spot in every 10s, then 1s and
1766           then 250ms step. Otherwise, we pick the last possible time
1767           to wake up.
1768         */
1769
1770         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1771         if (c >= b) {
1772                 if (_unlikely_(c < USEC_PER_MINUTE))
1773                         return b;
1774
1775                 c -= USEC_PER_MINUTE;
1776         }
1777
1778         if (c >= a)
1779                 return c;
1780
1781         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1782         if (c >= b) {
1783                 if (_unlikely_(c < USEC_PER_SEC*10))
1784                         return b;
1785
1786                 c -= USEC_PER_SEC*10;
1787         }
1788
1789         if (c >= a)
1790                 return c;
1791
1792         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1793         if (c >= b) {
1794                 if (_unlikely_(c < USEC_PER_SEC))
1795                         return b;
1796
1797                 c -= USEC_PER_SEC;
1798         }
1799
1800         if (c >= a)
1801                 return c;
1802
1803         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1804         if (c >= b) {
1805                 if (_unlikely_(c < USEC_PER_MSEC*250))
1806                         return b;
1807
1808                 c -= USEC_PER_MSEC*250;
1809         }
1810
1811         if (c >= a)
1812                 return c;
1813
1814         return b;
1815 }
1816
1817 static int event_arm_timer(
1818                 sd_event *e,
1819                 struct clock_data *d) {
1820
1821         struct itimerspec its = {};
1822         sd_event_source *a, *b;
1823         usec_t t;
1824         int r;
1825
1826         assert(e);
1827         assert(d);
1828
1829         if (!d->needs_rearm)
1830                 return 0;
1831         else
1832                 d->needs_rearm = false;
1833
1834         a = prioq_peek(d->earliest);
1835         if (!a || a->enabled == SD_EVENT_OFF) {
1836
1837                 if (d->fd < 0)
1838                         return 0;
1839
1840                 if (d->next == USEC_INFINITY)
1841                         return 0;
1842
1843                 /* disarm */
1844                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1845                 if (r < 0)
1846                         return r;
1847
1848                 d->next = USEC_INFINITY;
1849                 return 0;
1850         }
1851
1852         b = prioq_peek(d->latest);
1853         assert_se(b && b->enabled != SD_EVENT_OFF);
1854
1855         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1856         if (d->next == t)
1857                 return 0;
1858
1859         assert_se(d->fd >= 0);
1860
1861         if (t == 0) {
1862                 /* We don' want to disarm here, just mean some time looooong ago. */
1863                 its.it_value.tv_sec = 0;
1864                 its.it_value.tv_nsec = 1;
1865         } else
1866                 timespec_store(&its.it_value, t);
1867
1868         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1869         if (r < 0)
1870                 return -errno;
1871
1872         d->next = t;
1873         return 0;
1874 }
1875
1876 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1877         assert(e);
1878         assert(s);
1879         assert(s->type == SOURCE_IO);
1880
1881         /* If the event source was already pending, we just OR in the
1882          * new revents, otherwise we reset the value. The ORing is
1883          * necessary to handle EPOLLONESHOT events properly where
1884          * readability might happen independently of writability, and
1885          * we need to keep track of both */
1886
1887         if (s->pending)
1888                 s->io.revents |= revents;
1889         else
1890                 s->io.revents = revents;
1891
1892         return source_set_pending(s, true);
1893 }
1894
1895 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1896         uint64_t x;
1897         ssize_t ss;
1898
1899         assert(e);
1900         assert(fd >= 0);
1901
1902         assert_return(events == EPOLLIN, -EIO);
1903
1904         ss = read(fd, &x, sizeof(x));
1905         if (ss < 0) {
1906                 if (errno == EAGAIN || errno == EINTR)
1907                         return 0;
1908
1909                 return -errno;
1910         }
1911
1912         if (_unlikely_(ss != sizeof(x)))
1913                 return -EIO;
1914
1915         if (next)
1916                 *next = USEC_INFINITY;
1917
1918         return 0;
1919 }
1920
1921 static int process_timer(
1922                 sd_event *e,
1923                 usec_t n,
1924                 struct clock_data *d) {
1925
1926         sd_event_source *s;
1927         int r;
1928
1929         assert(e);
1930         assert(d);
1931
1932         for (;;) {
1933                 s = prioq_peek(d->earliest);
1934                 if (!s ||
1935                     s->time.next > n ||
1936                     s->enabled == SD_EVENT_OFF ||
1937                     s->pending)
1938                         break;
1939
1940                 r = source_set_pending(s, true);
1941                 if (r < 0)
1942                         return r;
1943
1944                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1945                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1946                 d->needs_rearm = true;
1947         }
1948
1949         return 0;
1950 }
1951
1952 static int process_child(sd_event *e) {
1953         sd_event_source *s;
1954         Iterator i;
1955         int r;
1956
1957         assert(e);
1958
1959         e->need_process_child = false;
1960
1961         /*
1962            So, this is ugly. We iteratively invoke waitid() with P_PID
1963            + WNOHANG for each PID we wait for, instead of using
1964            P_ALL. This is because we only want to get child
1965            information of very specific child processes, and not all
1966            of them. We might not have processed the SIGCHLD even of a
1967            previous invocation and we don't want to maintain a
1968            unbounded *per-child* event queue, hence we really don't
1969            want anything flushed out of the kernel's queue that we
1970            don't care about. Since this is O(n) this means that if you
1971            have a lot of processes you probably want to handle SIGCHLD
1972            yourself.
1973
1974            We do not reap the children here (by using WNOWAIT), this
1975            is only done after the event source is dispatched so that
1976            the callback still sees the process as a zombie.
1977         */
1978
1979         HASHMAP_FOREACH(s, e->child_sources, i) {
1980                 assert(s->type == SOURCE_CHILD);
1981
1982                 if (s->pending)
1983                         continue;
1984
1985                 if (s->enabled == SD_EVENT_OFF)
1986                         continue;
1987
1988                 zero(s->child.siginfo);
1989                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1990                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1991                 if (r < 0)
1992                         return -errno;
1993
1994                 if (s->child.siginfo.si_pid != 0) {
1995                         bool zombie =
1996                                 s->child.siginfo.si_code == CLD_EXITED ||
1997                                 s->child.siginfo.si_code == CLD_KILLED ||
1998                                 s->child.siginfo.si_code == CLD_DUMPED;
1999
2000                         if (!zombie && (s->child.options & WEXITED)) {
2001                                 /* If the child isn't dead then let's
2002                                  * immediately remove the state change
2003                                  * from the queue, since there's no
2004                                  * benefit in leaving it queued */
2005
2006                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2007                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2008                         }
2009
2010                         r = source_set_pending(s, true);
2011                         if (r < 0)
2012                                 return r;
2013                 }
2014         }
2015
2016         return 0;
2017 }
2018
2019 static int process_signal(sd_event *e, uint32_t events) {
2020         bool read_one = false;
2021         int r;
2022
2023         assert(e);
2024
2025         assert_return(events == EPOLLIN, -EIO);
2026
2027         for (;;) {
2028                 struct signalfd_siginfo si;
2029                 ssize_t n;
2030                 sd_event_source *s = NULL;
2031
2032                 n = read(e->signal_fd, &si, sizeof(si));
2033                 if (n < 0) {
2034                         if (errno == EAGAIN || errno == EINTR)
2035                                 return read_one;
2036
2037                         return -errno;
2038                 }
2039
2040                 if (_unlikely_(n != sizeof(si)))
2041                         return -EIO;
2042
2043                 assert(si.ssi_signo < _NSIG);
2044
2045                 read_one = true;
2046
2047                 if (si.ssi_signo == SIGCHLD) {
2048                         r = process_child(e);
2049                         if (r < 0)
2050                                 return r;
2051                         if (r > 0)
2052                                 continue;
2053                 }
2054
2055                 if (e->signal_sources)
2056                         s = e->signal_sources[si.ssi_signo];
2057
2058                 if (!s)
2059                         continue;
2060
2061                 s->signal.siginfo = si;
2062                 r = source_set_pending(s, true);
2063                 if (r < 0)
2064                         return r;
2065         }
2066 }
2067
2068 static int source_dispatch(sd_event_source *s) {
2069         int r = 0;
2070
2071         assert(s);
2072         assert(s->pending || s->type == SOURCE_EXIT);
2073
2074         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2075                 r = source_set_pending(s, false);
2076                 if (r < 0)
2077                         return r;
2078         }
2079
2080         if (s->type != SOURCE_POST) {
2081                 sd_event_source *z;
2082                 Iterator i;
2083
2084                 /* If we execute a non-post source, let's mark all
2085                  * post sources as pending */
2086
2087                 SET_FOREACH(z, s->event->post_sources, i) {
2088                         if (z->enabled == SD_EVENT_OFF)
2089                                 continue;
2090
2091                         r = source_set_pending(z, true);
2092                         if (r < 0)
2093                                 return r;
2094                 }
2095         }
2096
2097         if (s->enabled == SD_EVENT_ONESHOT) {
2098                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2099                 if (r < 0)
2100                         return r;
2101         }
2102
2103         s->dispatching = true;
2104
2105         switch (s->type) {
2106
2107         case SOURCE_IO:
2108                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2109                 break;
2110
2111         case SOURCE_TIME_REALTIME:
2112         case SOURCE_TIME_BOOTTIME:
2113         case SOURCE_TIME_MONOTONIC:
2114         case SOURCE_TIME_REALTIME_ALARM:
2115         case SOURCE_TIME_BOOTTIME_ALARM:
2116                 r = s->time.callback(s, s->time.next, s->userdata);
2117                 break;
2118
2119         case SOURCE_SIGNAL:
2120                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2121                 break;
2122
2123         case SOURCE_CHILD: {
2124                 bool zombie;
2125
2126                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2127                          s->child.siginfo.si_code == CLD_KILLED ||
2128                          s->child.siginfo.si_code == CLD_DUMPED;
2129
2130                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2131
2132                 /* Now, reap the PID for good. */
2133                 if (zombie)
2134                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2135
2136                 break;
2137         }
2138
2139         case SOURCE_DEFER:
2140                 r = s->defer.callback(s, s->userdata);
2141                 break;
2142
2143         case SOURCE_POST:
2144                 r = s->post.callback(s, s->userdata);
2145                 break;
2146
2147         case SOURCE_EXIT:
2148                 r = s->exit.callback(s, s->userdata);
2149                 break;
2150
2151         case SOURCE_WATCHDOG:
2152         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2153         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2154                 assert_not_reached("Wut? I shouldn't exist.");
2155         }
2156
2157         s->dispatching = false;
2158
2159         if (r < 0) {
2160                 if (s->description)
2161                         log_debug("Event source '%s' returned error, disabling: %s", s->description, strerror(-r));
2162                 else
2163                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2164         }
2165
2166         if (s->n_ref == 0)
2167                 source_free(s);
2168         else if (r < 0)
2169                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2170
2171         return 1;
2172 }
2173
2174 static int event_prepare(sd_event *e) {
2175         int r;
2176
2177         assert(e);
2178
2179         for (;;) {
2180                 sd_event_source *s;
2181
2182                 s = prioq_peek(e->prepare);
2183                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2184                         break;
2185
2186                 s->prepare_iteration = e->iteration;
2187                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2188                 if (r < 0)
2189                         return r;
2190
2191                 assert(s->prepare);
2192
2193                 s->dispatching = true;
2194                 r = s->prepare(s, s->userdata);
2195                 s->dispatching = false;
2196
2197                 if (r < 0) {
2198                         if (s->description)
2199                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->description, strerror(-r));
2200                         else
2201                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2202                 }
2203
2204                 if (s->n_ref == 0)
2205                         source_free(s);
2206                 else if (r < 0)
2207                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2208         }
2209
2210         return 0;
2211 }
2212
2213 static int dispatch_exit(sd_event *e) {
2214         sd_event_source *p;
2215         int r;
2216
2217         assert(e);
2218
2219         p = prioq_peek(e->exit);
2220         if (!p || p->enabled == SD_EVENT_OFF) {
2221                 e->state = SD_EVENT_FINISHED;
2222                 return 0;
2223         }
2224
2225         sd_event_ref(e);
2226         e->iteration++;
2227         e->state = SD_EVENT_EXITING;
2228
2229         r = source_dispatch(p);
2230
2231         e->state = SD_EVENT_PASSIVE;
2232         sd_event_unref(e);
2233
2234         return r;
2235 }
2236
2237 static sd_event_source* event_next_pending(sd_event *e) {
2238         sd_event_source *p;
2239
2240         assert(e);
2241
2242         p = prioq_peek(e->pending);
2243         if (!p)
2244                 return NULL;
2245
2246         if (p->enabled == SD_EVENT_OFF)
2247                 return NULL;
2248
2249         return p;
2250 }
2251
2252 static int arm_watchdog(sd_event *e) {
2253         struct itimerspec its = {};
2254         usec_t t;
2255         int r;
2256
2257         assert(e);
2258         assert(e->watchdog_fd >= 0);
2259
2260         t = sleep_between(e,
2261                           e->watchdog_last + (e->watchdog_period / 2),
2262                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2263
2264         timespec_store(&its.it_value, t);
2265
2266         /* Make sure we never set the watchdog to 0, which tells the
2267          * kernel to disable it. */
2268         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2269                 its.it_value.tv_nsec = 1;
2270
2271         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2272         if (r < 0)
2273                 return -errno;
2274
2275         return 0;
2276 }
2277
2278 static int process_watchdog(sd_event *e) {
2279         assert(e);
2280
2281         if (!e->watchdog)
2282                 return 0;
2283
2284         /* Don't notify watchdog too often */
2285         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2286                 return 0;
2287
2288         sd_notify(false, "WATCHDOG=1");
2289         e->watchdog_last = e->timestamp.monotonic;
2290
2291         return arm_watchdog(e);
2292 }
2293
2294 _public_ int sd_event_prepare(sd_event *e) {
2295         int r;
2296
2297         assert_return(e, -EINVAL);
2298         assert_return(!event_pid_changed(e), -ECHILD);
2299         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2300         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2301
2302         if (e->exit_requested)
2303                 goto pending;
2304
2305         e->iteration++;
2306
2307         r = event_prepare(e);
2308         if (r < 0)
2309                 return r;
2310
2311         r = event_arm_timer(e, &e->realtime);
2312         if (r < 0)
2313                 return r;
2314
2315         r = event_arm_timer(e, &e->boottime);
2316         if (r < 0)
2317                 return r;
2318
2319         r = event_arm_timer(e, &e->monotonic);
2320         if (r < 0)
2321                 return r;
2322
2323         r = event_arm_timer(e, &e->realtime_alarm);
2324         if (r < 0)
2325                 return r;
2326
2327         r = event_arm_timer(e, &e->boottime_alarm);
2328         if (r < 0)
2329                 return r;
2330
2331         if (event_next_pending(e) || e->need_process_child)
2332                 goto pending;
2333
2334         e->state = SD_EVENT_PREPARED;
2335
2336         return 0;
2337
2338 pending:
2339         e->state = SD_EVENT_PREPARED;
2340         r = sd_event_wait(e, 0);
2341         if (r == 0)
2342                 e->state = SD_EVENT_PREPARED;
2343
2344         return r;
2345 }
2346
2347 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2348         struct epoll_event *ev_queue;
2349         unsigned ev_queue_max;
2350         int r, m, i;
2351
2352         assert_return(e, -EINVAL);
2353         assert_return(!event_pid_changed(e), -ECHILD);
2354         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2355         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2356
2357         if (e->exit_requested) {
2358                 e->state = SD_EVENT_PENDING;
2359                 return 1;
2360         }
2361
2362         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2363         ev_queue = newa(struct epoll_event, ev_queue_max);
2364
2365         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2366                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2367         if (m < 0) {
2368                 if (errno == EINTR) {
2369                         e->state = SD_EVENT_PENDING;
2370                         return 1;
2371                 }
2372
2373                 r = -errno;
2374
2375                 goto finish;
2376         }
2377
2378         dual_timestamp_get(&e->timestamp);
2379         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2380
2381         for (i = 0; i < m; i++) {
2382
2383                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2384                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2385                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2386                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2387                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2388                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2389                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2390                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2391                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2392                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2393                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2394                         r = process_signal(e, ev_queue[i].events);
2395                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2396                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2397                 else
2398                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2399
2400                 if (r < 0)
2401                         goto finish;
2402         }
2403
2404         r = process_watchdog(e);
2405         if (r < 0)
2406                 goto finish;
2407
2408         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2409         if (r < 0)
2410                 goto finish;
2411
2412         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2413         if (r < 0)
2414                 goto finish;
2415
2416         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2417         if (r < 0)
2418                 goto finish;
2419
2420         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2421         if (r < 0)
2422                 goto finish;
2423
2424         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2425         if (r < 0)
2426                 goto finish;
2427
2428         if (e->need_process_child) {
2429                 r = process_child(e);
2430                 if (r < 0)
2431                         goto finish;
2432         }
2433
2434         if (event_next_pending(e)) {
2435                 e->state = SD_EVENT_PENDING;
2436
2437                 return 1;
2438         }
2439
2440         r = 0;
2441
2442 finish:
2443         e->state = SD_EVENT_PASSIVE;
2444
2445         return r;
2446 }
2447
2448 _public_ int sd_event_dispatch(sd_event *e) {
2449         sd_event_source *p;
2450         int r;
2451
2452         assert_return(e, -EINVAL);
2453         assert_return(!event_pid_changed(e), -ECHILD);
2454         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2455         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2456
2457         if (e->exit_requested)
2458                 return dispatch_exit(e);
2459
2460         p = event_next_pending(e);
2461         if (p) {
2462                 sd_event_ref(e);
2463
2464                 e->state = SD_EVENT_RUNNING;
2465                 r = source_dispatch(p);
2466                 e->state = SD_EVENT_PASSIVE;
2467
2468                 sd_event_unref(e);
2469
2470                 return r;
2471         }
2472
2473         e->state = SD_EVENT_PASSIVE;
2474
2475         return 1;
2476 }
2477
2478 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2479         int r;
2480
2481         assert_return(e, -EINVAL);
2482         assert_return(!event_pid_changed(e), -ECHILD);
2483         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2484         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2485
2486         r = sd_event_prepare(e);
2487         if (r > 0)
2488                 return sd_event_dispatch(e);
2489         else if (r < 0)
2490                 return r;
2491
2492         r = sd_event_wait(e, timeout);
2493         if (r > 0)
2494                 return sd_event_dispatch(e);
2495         else
2496                 return r;
2497 }
2498
2499 _public_ int sd_event_loop(sd_event *e) {
2500         int r;
2501
2502         assert_return(e, -EINVAL);
2503         assert_return(!event_pid_changed(e), -ECHILD);
2504         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2505
2506         sd_event_ref(e);
2507
2508         while (e->state != SD_EVENT_FINISHED) {
2509                 r = sd_event_run(e, (uint64_t) -1);
2510                 if (r < 0)
2511                         goto finish;
2512         }
2513
2514         r = e->exit_code;
2515
2516 finish:
2517         sd_event_unref(e);
2518         return r;
2519 }
2520
2521 _public_ int sd_event_get_fd(sd_event *e) {
2522
2523         assert_return(e, -EINVAL);
2524         assert_return(!event_pid_changed(e), -ECHILD);
2525
2526         return e->epoll_fd;
2527 }
2528
2529 _public_ int sd_event_get_state(sd_event *e) {
2530         assert_return(e, -EINVAL);
2531         assert_return(!event_pid_changed(e), -ECHILD);
2532
2533         return e->state;
2534 }
2535
2536 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2537         assert_return(e, -EINVAL);
2538         assert_return(code, -EINVAL);
2539         assert_return(!event_pid_changed(e), -ECHILD);
2540
2541         if (!e->exit_requested)
2542                 return -ENODATA;
2543
2544         *code = e->exit_code;
2545         return 0;
2546 }
2547
2548 _public_ int sd_event_exit(sd_event *e, int code) {
2549         assert_return(e, -EINVAL);
2550         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2551         assert_return(!event_pid_changed(e), -ECHILD);
2552
2553         e->exit_requested = true;
2554         e->exit_code = code;
2555
2556         return 0;
2557 }
2558
2559 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2560         assert_return(e, -EINVAL);
2561         assert_return(usec, -EINVAL);
2562         assert_return(!event_pid_changed(e), -ECHILD);
2563
2564         /* If we haven't run yet, just get the actual time */
2565         if (!dual_timestamp_is_set(&e->timestamp))
2566                 return -ENODATA;
2567
2568         switch (clock) {
2569
2570         case CLOCK_REALTIME:
2571         case CLOCK_REALTIME_ALARM:
2572                 *usec = e->timestamp.realtime;
2573                 break;
2574
2575         case CLOCK_MONOTONIC:
2576                 *usec = e->timestamp.monotonic;
2577                 break;
2578
2579         case CLOCK_BOOTTIME:
2580         case CLOCK_BOOTTIME_ALARM:
2581                 *usec = e->timestamp_boottime;
2582                 break;
2583         }
2584
2585         return 0;
2586 }
2587
2588 _public_ int sd_event_default(sd_event **ret) {
2589
2590         static thread_local sd_event *default_event = NULL;
2591         sd_event *e = NULL;
2592         int r;
2593
2594         if (!ret)
2595                 return !!default_event;
2596
2597         if (default_event) {
2598                 *ret = sd_event_ref(default_event);
2599                 return 0;
2600         }
2601
2602         r = sd_event_new(&e);
2603         if (r < 0)
2604                 return r;
2605
2606         e->default_event_ptr = &default_event;
2607         e->tid = gettid();
2608         default_event = e;
2609
2610         *ret = e;
2611         return 1;
2612 }
2613
2614 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2615         assert_return(e, -EINVAL);
2616         assert_return(tid, -EINVAL);
2617         assert_return(!event_pid_changed(e), -ECHILD);
2618
2619         if (e->tid != 0) {
2620                 *tid = e->tid;
2621                 return 0;
2622         }
2623
2624         return -ENXIO;
2625 }
2626
2627 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2628         int r;
2629
2630         assert_return(e, -EINVAL);
2631         assert_return(!event_pid_changed(e), -ECHILD);
2632
2633         if (e->watchdog == !!b)
2634                 return e->watchdog;
2635
2636         if (b) {
2637                 struct epoll_event ev = {};
2638
2639                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2640                 if (r <= 0)
2641                         return r;
2642
2643                 /* Issue first ping immediately */
2644                 sd_notify(false, "WATCHDOG=1");
2645                 e->watchdog_last = now(CLOCK_MONOTONIC);
2646
2647                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2648                 if (e->watchdog_fd < 0)
2649                         return -errno;
2650
2651                 r = arm_watchdog(e);
2652                 if (r < 0)
2653                         goto fail;
2654
2655                 ev.events = EPOLLIN;
2656                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2657
2658                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2659                 if (r < 0) {
2660                         r = -errno;
2661                         goto fail;
2662                 }
2663
2664         } else {
2665                 if (e->watchdog_fd >= 0) {
2666                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2667                         e->watchdog_fd = safe_close(e->watchdog_fd);
2668                 }
2669         }
2670
2671         e->watchdog = !!b;
2672         return e->watchdog;
2673
2674 fail:
2675         e->watchdog_fd = safe_close(e->watchdog_fd);
2676         return r;
2677 }
2678
2679 _public_ int sd_event_get_watchdog(sd_event *e) {
2680         assert_return(e, -EINVAL);
2681         assert_return(!event_pid_changed(e), -ECHILD);
2682
2683         return e->watchdog;
2684 }