chiark / gitweb /
f0356a48e647f90711699daf7062852c00b5a595
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *description;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static int event_update_signal_fd(sd_event *e) {
602         struct epoll_event ev = {};
603         bool add_to_epoll;
604         int r;
605
606         assert(e);
607
608         add_to_epoll = e->signal_fd < 0;
609
610         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
611         if (r < 0)
612                 return -errno;
613
614         e->signal_fd = r;
615
616         if (!add_to_epoll)
617                 return 0;
618
619         ev.events = EPOLLIN;
620         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
621
622         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
623         if (r < 0) {
624                 e->signal_fd = safe_close(e->signal_fd);
625                 return -errno;
626         }
627
628         return 0;
629 }
630
631 static void source_disconnect(sd_event_source *s) {
632         sd_event *event;
633
634         assert(s);
635
636         if (!s->event)
637                 return;
638
639         assert(s->event->n_sources > 0);
640
641         switch (s->type) {
642
643         case SOURCE_IO:
644                 if (s->io.fd >= 0)
645                         source_io_unregister(s);
646
647                 break;
648
649         case SOURCE_TIME_REALTIME:
650         case SOURCE_TIME_BOOTTIME:
651         case SOURCE_TIME_MONOTONIC:
652         case SOURCE_TIME_REALTIME_ALARM:
653         case SOURCE_TIME_BOOTTIME_ALARM: {
654                 struct clock_data *d;
655
656                 d = event_get_clock_data(s->event, s->type);
657                 assert(d);
658
659                 prioq_remove(d->earliest, s, &s->time.earliest_index);
660                 prioq_remove(d->latest, s, &s->time.latest_index);
661                 d->needs_rearm = true;
662                 break;
663         }
664
665         case SOURCE_SIGNAL:
666                 if (s->signal.sig > 0) {
667                         if (s->event->signal_sources)
668                                 s->event->signal_sources[s->signal.sig] = NULL;
669
670                         /* If the signal was on and now it is off... */
671                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
672                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
673
674                                 (void) event_update_signal_fd(s->event);
675                                 /* If disabling failed, we might get a spurious event,
676                                  * but otherwise nothing bad should happen. */
677                         }
678                 }
679
680                 break;
681
682         case SOURCE_CHILD:
683                 if (s->child.pid > 0) {
684                         if (s->enabled != SD_EVENT_OFF) {
685                                 assert(s->event->n_enabled_child_sources > 0);
686                                 s->event->n_enabled_child_sources--;
687
688                                 /* We know the signal was on, if it is off now... */
689                                 if (!need_signal(s->event, SIGCHLD)) {
690                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
691
692                                         (void) event_update_signal_fd(s->event);
693                                         /* If disabling failed, we might get a spurious event,
694                                          * but otherwise nothing bad should happen. */
695                                 }
696                         }
697
698                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
699                 }
700
701                 break;
702
703         case SOURCE_DEFER:
704                 /* nothing */
705                 break;
706
707         case SOURCE_POST:
708                 set_remove(s->event->post_sources, s);
709                 break;
710
711         case SOURCE_EXIT:
712                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713                 break;
714
715         default:
716                 assert_not_reached("Wut? I shouldn't exist.");
717         }
718
719         if (s->pending)
720                 prioq_remove(s->event->pending, s, &s->pending_index);
721
722         if (s->prepare)
723                 prioq_remove(s->event->prepare, s, &s->prepare_index);
724
725         event = s->event;
726
727         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
728         s->event = NULL;
729         LIST_REMOVE(sources, event->sources, s);
730         event->n_sources--;
731
732         if (!s->floating)
733                 sd_event_unref(event);
734 }
735
736 static void source_free(sd_event_source *s) {
737         assert(s);
738
739         source_disconnect(s);
740         free(s->description);
741         free(s);
742 }
743
744 static int source_set_pending(sd_event_source *s, bool b) {
745         int r;
746
747         assert(s);
748         assert(s->type != SOURCE_EXIT);
749
750         if (s->pending == b)
751                 return 0;
752
753         s->pending = b;
754
755         if (b) {
756                 s->pending_iteration = s->event->iteration;
757
758                 r = prioq_put(s->event->pending, s, &s->pending_index);
759                 if (r < 0) {
760                         s->pending = false;
761                         return r;
762                 }
763         } else
764                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
765
766         if (EVENT_SOURCE_IS_TIME(s->type)) {
767                 struct clock_data *d;
768
769                 d = event_get_clock_data(s->event, s->type);
770                 assert(d);
771
772                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
773                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
774                 d->needs_rearm = true;
775         }
776
777         return 0;
778 }
779
780 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
781         sd_event_source *s;
782
783         assert(e);
784
785         s = new0(sd_event_source, 1);
786         if (!s)
787                 return NULL;
788
789         s->n_ref = 1;
790         s->event = e;
791         s->floating = floating;
792         s->type = type;
793         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
794
795         if (!floating)
796                 sd_event_ref(e);
797
798         LIST_PREPEND(sources, e->sources, s);
799         e->n_sources ++;
800
801         return s;
802 }
803
804 _public_ int sd_event_add_io(
805                 sd_event *e,
806                 sd_event_source **ret,
807                 int fd,
808                 uint32_t events,
809                 sd_event_io_handler_t callback,
810                 void *userdata) {
811
812         sd_event_source *s;
813         int r;
814
815         assert_return(e, -EINVAL);
816         assert_return(fd >= 0, -EINVAL);
817         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
818         assert_return(callback, -EINVAL);
819         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
820         assert_return(!event_pid_changed(e), -ECHILD);
821
822         s = source_new(e, !ret, SOURCE_IO);
823         if (!s)
824                 return -ENOMEM;
825
826         s->io.fd = fd;
827         s->io.events = events;
828         s->io.callback = callback;
829         s->userdata = userdata;
830         s->enabled = SD_EVENT_ON;
831
832         r = source_io_register(s, s->enabled, events);
833         if (r < 0) {
834                 source_free(s);
835                 return r;
836         }
837
838         if (ret)
839                 *ret = s;
840
841         return 0;
842 }
843
844 static void initialize_perturb(sd_event *e) {
845         sd_id128_t bootid = {};
846
847         /* When we sleep for longer, we try to realign the wakeup to
848            the same time wihtin each minute/second/250ms, so that
849            events all across the system can be coalesced into a single
850            CPU wakeup. However, let's take some system-specific
851            randomness for this value, so that in a network of systems
852            with synced clocks timer events are distributed a
853            bit. Here, we calculate a perturbation usec offset from the
854            boot ID. */
855
856         if (_likely_(e->perturb != USEC_INFINITY))
857                 return;
858
859         if (sd_id128_get_boot(&bootid) >= 0)
860                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
861 }
862
863 static int event_setup_timer_fd(
864                 sd_event *e,
865                 struct clock_data *d,
866                 clockid_t clock) {
867
868         struct epoll_event ev = {};
869         int r, fd;
870
871         assert(e);
872         assert(d);
873
874         if (_likely_(d->fd >= 0))
875                 return 0;
876
877         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
878         if (fd < 0)
879                 return -errno;
880
881         ev.events = EPOLLIN;
882         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
883
884         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
885         if (r < 0) {
886                 safe_close(fd);
887                 return -errno;
888         }
889
890         d->fd = fd;
891         return 0;
892 }
893
894 _public_ int sd_event_add_time(
895                 sd_event *e,
896                 sd_event_source **ret,
897                 clockid_t clock,
898                 uint64_t usec,
899                 uint64_t accuracy,
900                 sd_event_time_handler_t callback,
901                 void *userdata) {
902
903         EventSourceType type;
904         sd_event_source *s;
905         struct clock_data *d;
906         int r;
907
908         assert_return(e, -EINVAL);
909         assert_return(usec != (uint64_t) -1, -EINVAL);
910         assert_return(accuracy != (uint64_t) -1, -EINVAL);
911         assert_return(callback, -EINVAL);
912         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
913         assert_return(!event_pid_changed(e), -ECHILD);
914
915         type = clock_to_event_source_type(clock);
916         assert_return(type >= 0, -ENOTSUP);
917
918         d = event_get_clock_data(e, type);
919         assert(d);
920
921         if (!d->earliest) {
922                 d->earliest = prioq_new(earliest_time_prioq_compare);
923                 if (!d->earliest)
924                         return -ENOMEM;
925         }
926
927         if (!d->latest) {
928                 d->latest = prioq_new(latest_time_prioq_compare);
929                 if (!d->latest)
930                         return -ENOMEM;
931         }
932
933         if (d->fd < 0) {
934                 r = event_setup_timer_fd(e, d, clock);
935                 if (r < 0)
936                         return r;
937         }
938
939         s = source_new(e, !ret, type);
940         if (!s)
941                 return -ENOMEM;
942
943         s->time.next = usec;
944         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
945         s->time.callback = callback;
946         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
947         s->userdata = userdata;
948         s->enabled = SD_EVENT_ONESHOT;
949
950         d->needs_rearm = true;
951
952         r = prioq_put(d->earliest, s, &s->time.earliest_index);
953         if (r < 0)
954                 goto fail;
955
956         r = prioq_put(d->latest, s, &s->time.latest_index);
957         if (r < 0)
958                 goto fail;
959
960         if (ret)
961                 *ret = s;
962
963         return 0;
964
965 fail:
966         source_free(s);
967         return r;
968 }
969
970 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
971         assert(s);
972
973         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
974 }
975
976 _public_ int sd_event_add_signal(
977                 sd_event *e,
978                 sd_event_source **ret,
979                 int sig,
980                 sd_event_signal_handler_t callback,
981                 void *userdata) {
982
983         sd_event_source *s;
984         sigset_t ss;
985         int r;
986         bool previous;
987
988         assert_return(e, -EINVAL);
989         assert_return(sig > 0, -EINVAL);
990         assert_return(sig < _NSIG, -EINVAL);
991         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992         assert_return(!event_pid_changed(e), -ECHILD);
993
994         if (!callback)
995                 callback = signal_exit_callback;
996
997         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
998         if (r < 0)
999                 return -errno;
1000
1001         if (!sigismember(&ss, sig))
1002                 return -EBUSY;
1003
1004         if (!e->signal_sources) {
1005                 e->signal_sources = new0(sd_event_source*, _NSIG);
1006                 if (!e->signal_sources)
1007                         return -ENOMEM;
1008         } else if (e->signal_sources[sig])
1009                 return -EBUSY;
1010
1011         previous = need_signal(e, sig);
1012
1013         s = source_new(e, !ret, SOURCE_SIGNAL);
1014         if (!s)
1015                 return -ENOMEM;
1016
1017         s->signal.sig = sig;
1018         s->signal.callback = callback;
1019         s->userdata = userdata;
1020         s->enabled = SD_EVENT_ON;
1021
1022         e->signal_sources[sig] = s;
1023
1024         if (!previous) {
1025                 assert_se(sigaddset(&e->sigset, sig) == 0);
1026
1027                 r = event_update_signal_fd(e);
1028                 if (r < 0) {
1029                         source_free(s);
1030                         return r;
1031                 }
1032         }
1033
1034         if (ret)
1035                 *ret = s;
1036
1037         return 0;
1038 }
1039
1040 _public_ int sd_event_add_child(
1041                 sd_event *e,
1042                 sd_event_source **ret,
1043                 pid_t pid,
1044                 int options,
1045                 sd_event_child_handler_t callback,
1046                 void *userdata) {
1047
1048         sd_event_source *s;
1049         int r;
1050         bool previous;
1051
1052         assert_return(e, -EINVAL);
1053         assert_return(pid > 1, -EINVAL);
1054         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1055         assert_return(options != 0, -EINVAL);
1056         assert_return(callback, -EINVAL);
1057         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1058         assert_return(!event_pid_changed(e), -ECHILD);
1059
1060         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1061         if (r < 0)
1062                 return r;
1063
1064         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1065                 return -EBUSY;
1066
1067         previous = need_signal(e, SIGCHLD);
1068
1069         s = source_new(e, !ret, SOURCE_CHILD);
1070         if (!s)
1071                 return -ENOMEM;
1072
1073         s->child.pid = pid;
1074         s->child.options = options;
1075         s->child.callback = callback;
1076         s->userdata = userdata;
1077         s->enabled = SD_EVENT_ONESHOT;
1078
1079         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1080         if (r < 0) {
1081                 source_free(s);
1082                 return r;
1083         }
1084
1085         e->n_enabled_child_sources ++;
1086
1087         if (!previous) {
1088                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1089
1090                 r = event_update_signal_fd(e);
1091                 if (r < 0) {
1092                         source_free(s);
1093                         return r;
1094                 }
1095         }
1096
1097         e->need_process_child = true;
1098
1099         if (ret)
1100                 *ret = s;
1101
1102         return 0;
1103 }
1104
1105 _public_ int sd_event_add_defer(
1106                 sd_event *e,
1107                 sd_event_source **ret,
1108                 sd_event_handler_t callback,
1109                 void *userdata) {
1110
1111         sd_event_source *s;
1112         int r;
1113
1114         assert_return(e, -EINVAL);
1115         assert_return(callback, -EINVAL);
1116         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1117         assert_return(!event_pid_changed(e), -ECHILD);
1118
1119         s = source_new(e, !ret, SOURCE_DEFER);
1120         if (!s)
1121                 return -ENOMEM;
1122
1123         s->defer.callback = callback;
1124         s->userdata = userdata;
1125         s->enabled = SD_EVENT_ONESHOT;
1126
1127         r = source_set_pending(s, true);
1128         if (r < 0) {
1129                 source_free(s);
1130                 return r;
1131         }
1132
1133         if (ret)
1134                 *ret = s;
1135
1136         return 0;
1137 }
1138
1139 _public_ int sd_event_add_post(
1140                 sd_event *e,
1141                 sd_event_source **ret,
1142                 sd_event_handler_t callback,
1143                 void *userdata) {
1144
1145         sd_event_source *s;
1146         int r;
1147
1148         assert_return(e, -EINVAL);
1149         assert_return(callback, -EINVAL);
1150         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1151         assert_return(!event_pid_changed(e), -ECHILD);
1152
1153         r = set_ensure_allocated(&e->post_sources, NULL);
1154         if (r < 0)
1155                 return r;
1156
1157         s = source_new(e, !ret, SOURCE_POST);
1158         if (!s)
1159                 return -ENOMEM;
1160
1161         s->post.callback = callback;
1162         s->userdata = userdata;
1163         s->enabled = SD_EVENT_ON;
1164
1165         r = set_put(e->post_sources, s);
1166         if (r < 0) {
1167                 source_free(s);
1168                 return r;
1169         }
1170
1171         if (ret)
1172                 *ret = s;
1173
1174         return 0;
1175 }
1176
1177 _public_ int sd_event_add_exit(
1178                 sd_event *e,
1179                 sd_event_source **ret,
1180                 sd_event_handler_t callback,
1181                 void *userdata) {
1182
1183         sd_event_source *s;
1184         int r;
1185
1186         assert_return(e, -EINVAL);
1187         assert_return(callback, -EINVAL);
1188         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1189         assert_return(!event_pid_changed(e), -ECHILD);
1190
1191         if (!e->exit) {
1192                 e->exit = prioq_new(exit_prioq_compare);
1193                 if (!e->exit)
1194                         return -ENOMEM;
1195         }
1196
1197         s = source_new(e, !ret, SOURCE_EXIT);
1198         if (!s)
1199                 return -ENOMEM;
1200
1201         s->exit.callback = callback;
1202         s->userdata = userdata;
1203         s->exit.prioq_index = PRIOQ_IDX_NULL;
1204         s->enabled = SD_EVENT_ONESHOT;
1205
1206         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1207         if (r < 0) {
1208                 source_free(s);
1209                 return r;
1210         }
1211
1212         if (ret)
1213                 *ret = s;
1214
1215         return 0;
1216 }
1217
1218 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1219         assert_return(s, NULL);
1220
1221         assert(s->n_ref >= 1);
1222         s->n_ref++;
1223
1224         return s;
1225 }
1226
1227 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1228
1229         if (!s)
1230                 return NULL;
1231
1232         assert(s->n_ref >= 1);
1233         s->n_ref--;
1234
1235         if (s->n_ref <= 0) {
1236                 /* Here's a special hack: when we are called from a
1237                  * dispatch handler we won't free the event source
1238                  * immediately, but we will detach the fd from the
1239                  * epoll. This way it is safe for the caller to unref
1240                  * the event source and immediately close the fd, but
1241                  * we still retain a valid event source object after
1242                  * the callback. */
1243
1244                 if (s->dispatching) {
1245                         if (s->type == SOURCE_IO)
1246                                 source_io_unregister(s);
1247
1248                         source_disconnect(s);
1249                 } else
1250                         source_free(s);
1251         }
1252
1253         return NULL;
1254 }
1255
1256 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1257         assert_return(s, -EINVAL);
1258
1259         return free_and_strdup(&s->description, description);
1260 }
1261
1262 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1263         assert_return(s, -EINVAL);
1264         assert_return(description, -EINVAL);
1265
1266         *description = s->description;
1267         return 0;
1268 }
1269
1270 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1271         assert_return(s, NULL);
1272
1273         return s->event;
1274 }
1275
1276 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1277         assert_return(s, -EINVAL);
1278         assert_return(s->type != SOURCE_EXIT, -EDOM);
1279         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1280         assert_return(!event_pid_changed(s->event), -ECHILD);
1281
1282         return s->pending;
1283 }
1284
1285 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1286         assert_return(s, -EINVAL);
1287         assert_return(s->type == SOURCE_IO, -EDOM);
1288         assert_return(!event_pid_changed(s->event), -ECHILD);
1289
1290         return s->io.fd;
1291 }
1292
1293 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1294         int r;
1295
1296         assert_return(s, -EINVAL);
1297         assert_return(fd >= 0, -EINVAL);
1298         assert_return(s->type == SOURCE_IO, -EDOM);
1299         assert_return(!event_pid_changed(s->event), -ECHILD);
1300
1301         if (s->io.fd == fd)
1302                 return 0;
1303
1304         if (s->enabled == SD_EVENT_OFF) {
1305                 s->io.fd = fd;
1306                 s->io.registered = false;
1307         } else {
1308                 int saved_fd;
1309
1310                 saved_fd = s->io.fd;
1311                 assert(s->io.registered);
1312
1313                 s->io.fd = fd;
1314                 s->io.registered = false;
1315
1316                 r = source_io_register(s, s->enabled, s->io.events);
1317                 if (r < 0) {
1318                         s->io.fd = saved_fd;
1319                         s->io.registered = true;
1320                         return r;
1321                 }
1322
1323                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1324         }
1325
1326         return 0;
1327 }
1328
1329 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1330         assert_return(s, -EINVAL);
1331         assert_return(events, -EINVAL);
1332         assert_return(s->type == SOURCE_IO, -EDOM);
1333         assert_return(!event_pid_changed(s->event), -ECHILD);
1334
1335         *events = s->io.events;
1336         return 0;
1337 }
1338
1339 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1340         int r;
1341
1342         assert_return(s, -EINVAL);
1343         assert_return(s->type == SOURCE_IO, -EDOM);
1344         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1345         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1346         assert_return(!event_pid_changed(s->event), -ECHILD);
1347
1348         /* edge-triggered updates are never skipped, so we can reset edges */
1349         if (s->io.events == events && !(events & EPOLLET))
1350                 return 0;
1351
1352         if (s->enabled != SD_EVENT_OFF) {
1353                 r = source_io_register(s, s->enabled, events);
1354                 if (r < 0)
1355                         return r;
1356         }
1357
1358         s->io.events = events;
1359         source_set_pending(s, false);
1360
1361         return 0;
1362 }
1363
1364 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1365         assert_return(s, -EINVAL);
1366         assert_return(revents, -EINVAL);
1367         assert_return(s->type == SOURCE_IO, -EDOM);
1368         assert_return(s->pending, -ENODATA);
1369         assert_return(!event_pid_changed(s->event), -ECHILD);
1370
1371         *revents = s->io.revents;
1372         return 0;
1373 }
1374
1375 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1376         assert_return(s, -EINVAL);
1377         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1378         assert_return(!event_pid_changed(s->event), -ECHILD);
1379
1380         return s->signal.sig;
1381 }
1382
1383 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1384         assert_return(s, -EINVAL);
1385         assert_return(!event_pid_changed(s->event), -ECHILD);
1386
1387         return s->priority;
1388 }
1389
1390 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1391         assert_return(s, -EINVAL);
1392         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1393         assert_return(!event_pid_changed(s->event), -ECHILD);
1394
1395         if (s->priority == priority)
1396                 return 0;
1397
1398         s->priority = priority;
1399
1400         if (s->pending)
1401                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1402
1403         if (s->prepare)
1404                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1405
1406         if (s->type == SOURCE_EXIT)
1407                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1408
1409         return 0;
1410 }
1411
1412 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1413         assert_return(s, -EINVAL);
1414         assert_return(m, -EINVAL);
1415         assert_return(!event_pid_changed(s->event), -ECHILD);
1416
1417         *m = s->enabled;
1418         return 0;
1419 }
1420
1421 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1422         int r;
1423
1424         assert_return(s, -EINVAL);
1425         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1426         assert_return(!event_pid_changed(s->event), -ECHILD);
1427
1428         /* If we are dead anyway, we are fine with turning off
1429          * sources, but everything else needs to fail. */
1430         if (s->event->state == SD_EVENT_FINISHED)
1431                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1432
1433         if (s->enabled == m)
1434                 return 0;
1435
1436         if (m == SD_EVENT_OFF) {
1437
1438                 switch (s->type) {
1439
1440                 case SOURCE_IO:
1441                         r = source_io_unregister(s);
1442                         if (r < 0)
1443                                 return r;
1444
1445                         s->enabled = m;
1446                         break;
1447
1448                 case SOURCE_TIME_REALTIME:
1449                 case SOURCE_TIME_BOOTTIME:
1450                 case SOURCE_TIME_MONOTONIC:
1451                 case SOURCE_TIME_REALTIME_ALARM:
1452                 case SOURCE_TIME_BOOTTIME_ALARM: {
1453                         struct clock_data *d;
1454
1455                         s->enabled = m;
1456                         d = event_get_clock_data(s->event, s->type);
1457                         assert(d);
1458
1459                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1460                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1461                         d->needs_rearm = true;
1462                         break;
1463                 }
1464
1465                 case SOURCE_SIGNAL:
1466                         assert(need_signal(s->event, s->signal.sig));
1467
1468                         s->enabled = m;
1469
1470                         if (!need_signal(s->event, s->signal.sig)) {
1471                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1472
1473                                 (void) event_update_signal_fd(s->event);
1474                                 /* If disabling failed, we might get a spurious event,
1475                                  * but otherwise nothing bad should happen. */
1476                         }
1477
1478                         break;
1479
1480                 case SOURCE_CHILD:
1481                         assert(need_signal(s->event, SIGCHLD));
1482
1483                         s->enabled = m;
1484
1485                         assert(s->event->n_enabled_child_sources > 0);
1486                         s->event->n_enabled_child_sources--;
1487
1488                         if (!need_signal(s->event, SIGCHLD)) {
1489                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1490
1491                                 (void) event_update_signal_fd(s->event);
1492                         }
1493
1494                         break;
1495
1496                 case SOURCE_EXIT:
1497                         s->enabled = m;
1498                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1499                         break;
1500
1501                 case SOURCE_DEFER:
1502                 case SOURCE_POST:
1503                         s->enabled = m;
1504                         break;
1505
1506                 default:
1507                         assert_not_reached("Wut? I shouldn't exist.");
1508                 }
1509
1510         } else {
1511                 switch (s->type) {
1512
1513                 case SOURCE_IO:
1514                         r = source_io_register(s, m, s->io.events);
1515                         if (r < 0)
1516                                 return r;
1517
1518                         s->enabled = m;
1519                         break;
1520
1521                 case SOURCE_TIME_REALTIME:
1522                 case SOURCE_TIME_BOOTTIME:
1523                 case SOURCE_TIME_MONOTONIC:
1524                 case SOURCE_TIME_REALTIME_ALARM:
1525                 case SOURCE_TIME_BOOTTIME_ALARM: {
1526                         struct clock_data *d;
1527
1528                         s->enabled = m;
1529                         d = event_get_clock_data(s->event, s->type);
1530                         assert(d);
1531
1532                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1533                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1534                         d->needs_rearm = true;
1535                         break;
1536                 }
1537
1538                 case SOURCE_SIGNAL:
1539                         /* Check status before enabling. */
1540                         if (!need_signal(s->event, s->signal.sig)) {
1541                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1542
1543                                 r = event_update_signal_fd(s->event);
1544                                 if (r < 0) {
1545                                         s->enabled = SD_EVENT_OFF;
1546                                         return r;
1547                                 }
1548                         }
1549
1550                         s->enabled = m;
1551                         break;
1552
1553                 case SOURCE_CHILD:
1554                         /* Check status before enabling. */
1555                         if (s->enabled == SD_EVENT_OFF) {
1556                                 if (!need_signal(s->event, SIGCHLD)) {
1557                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1558
1559                                         r = event_update_signal_fd(s->event);
1560                                         if (r < 0) {
1561                                                 s->enabled = SD_EVENT_OFF;
1562                                                 return r;
1563                                         }
1564                                 }
1565
1566                                 s->event->n_enabled_child_sources++;
1567                         }
1568
1569                         s->enabled = m;
1570                         break;
1571
1572                 case SOURCE_EXIT:
1573                         s->enabled = m;
1574                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1575                         break;
1576
1577                 case SOURCE_DEFER:
1578                 case SOURCE_POST:
1579                         s->enabled = m;
1580                         break;
1581
1582                 default:
1583                         assert_not_reached("Wut? I shouldn't exist.");
1584                 }
1585         }
1586
1587         if (s->pending)
1588                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1589
1590         if (s->prepare)
1591                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1592
1593         return 0;
1594 }
1595
1596 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1597         assert_return(s, -EINVAL);
1598         assert_return(usec, -EINVAL);
1599         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1600         assert_return(!event_pid_changed(s->event), -ECHILD);
1601
1602         *usec = s->time.next;
1603         return 0;
1604 }
1605
1606 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1607         struct clock_data *d;
1608
1609         assert_return(s, -EINVAL);
1610         assert_return(usec != (uint64_t) -1, -EINVAL);
1611         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1612         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1613         assert_return(!event_pid_changed(s->event), -ECHILD);
1614
1615         s->time.next = usec;
1616
1617         source_set_pending(s, false);
1618
1619         d = event_get_clock_data(s->event, s->type);
1620         assert(d);
1621
1622         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1623         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1624         d->needs_rearm = true;
1625
1626         return 0;
1627 }
1628
1629 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1630         assert_return(s, -EINVAL);
1631         assert_return(usec, -EINVAL);
1632         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1633         assert_return(!event_pid_changed(s->event), -ECHILD);
1634
1635         *usec = s->time.accuracy;
1636         return 0;
1637 }
1638
1639 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1640         struct clock_data *d;
1641
1642         assert_return(s, -EINVAL);
1643         assert_return(usec != (uint64_t) -1, -EINVAL);
1644         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1645         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1646         assert_return(!event_pid_changed(s->event), -ECHILD);
1647
1648         if (usec == 0)
1649                 usec = DEFAULT_ACCURACY_USEC;
1650
1651         s->time.accuracy = usec;
1652
1653         source_set_pending(s, false);
1654
1655         d = event_get_clock_data(s->event, s->type);
1656         assert(d);
1657
1658         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1659         d->needs_rearm = true;
1660
1661         return 0;
1662 }
1663
1664 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1665         assert_return(s, -EINVAL);
1666         assert_return(clock, -EINVAL);
1667         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1668         assert_return(!event_pid_changed(s->event), -ECHILD);
1669
1670         *clock = event_source_type_to_clock(s->type);
1671         return 0;
1672 }
1673
1674 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1675         assert_return(s, -EINVAL);
1676         assert_return(pid, -EINVAL);
1677         assert_return(s->type == SOURCE_CHILD, -EDOM);
1678         assert_return(!event_pid_changed(s->event), -ECHILD);
1679
1680         *pid = s->child.pid;
1681         return 0;
1682 }
1683
1684 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1685         int r;
1686
1687         assert_return(s, -EINVAL);
1688         assert_return(s->type != SOURCE_EXIT, -EDOM);
1689         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1690         assert_return(!event_pid_changed(s->event), -ECHILD);
1691
1692         if (s->prepare == callback)
1693                 return 0;
1694
1695         if (callback && s->prepare) {
1696                 s->prepare = callback;
1697                 return 0;
1698         }
1699
1700         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1701         if (r < 0)
1702                 return r;
1703
1704         s->prepare = callback;
1705
1706         if (callback) {
1707                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1708                 if (r < 0)
1709                         return r;
1710         } else
1711                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1712
1713         return 0;
1714 }
1715
1716 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1717         assert_return(s, NULL);
1718
1719         return s->userdata;
1720 }
1721
1722 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1723         void *ret;
1724
1725         assert_return(s, NULL);
1726
1727         ret = s->userdata;
1728         s->userdata = userdata;
1729
1730         return ret;
1731 }
1732
1733 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1734         usec_t c;
1735         assert(e);
1736         assert(a <= b);
1737
1738         if (a <= 0)
1739                 return 0;
1740
1741         if (b <= a + 1)
1742                 return a;
1743
1744         initialize_perturb(e);
1745
1746         /*
1747           Find a good time to wake up again between times a and b. We
1748           have two goals here:
1749
1750           a) We want to wake up as seldom as possible, hence prefer
1751              later times over earlier times.
1752
1753           b) But if we have to wake up, then let's make sure to
1754              dispatch as much as possible on the entire system.
1755
1756           We implement this by waking up everywhere at the same time
1757           within any given minute if we can, synchronised via the
1758           perturbation value determined from the boot ID. If we can't,
1759           then we try to find the same spot in every 10s, then 1s and
1760           then 250ms step. Otherwise, we pick the last possible time
1761           to wake up.
1762         */
1763
1764         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1765         if (c >= b) {
1766                 if (_unlikely_(c < USEC_PER_MINUTE))
1767                         return b;
1768
1769                 c -= USEC_PER_MINUTE;
1770         }
1771
1772         if (c >= a)
1773                 return c;
1774
1775         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1776         if (c >= b) {
1777                 if (_unlikely_(c < USEC_PER_SEC*10))
1778                         return b;
1779
1780                 c -= USEC_PER_SEC*10;
1781         }
1782
1783         if (c >= a)
1784                 return c;
1785
1786         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1787         if (c >= b) {
1788                 if (_unlikely_(c < USEC_PER_SEC))
1789                         return b;
1790
1791                 c -= USEC_PER_SEC;
1792         }
1793
1794         if (c >= a)
1795                 return c;
1796
1797         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1798         if (c >= b) {
1799                 if (_unlikely_(c < USEC_PER_MSEC*250))
1800                         return b;
1801
1802                 c -= USEC_PER_MSEC*250;
1803         }
1804
1805         if (c >= a)
1806                 return c;
1807
1808         return b;
1809 }
1810
1811 static int event_arm_timer(
1812                 sd_event *e,
1813                 struct clock_data *d) {
1814
1815         struct itimerspec its = {};
1816         sd_event_source *a, *b;
1817         usec_t t;
1818         int r;
1819
1820         assert(e);
1821         assert(d);
1822
1823         if (!d->needs_rearm)
1824                 return 0;
1825         else
1826                 d->needs_rearm = false;
1827
1828         a = prioq_peek(d->earliest);
1829         if (!a || a->enabled == SD_EVENT_OFF) {
1830
1831                 if (d->fd < 0)
1832                         return 0;
1833
1834                 if (d->next == USEC_INFINITY)
1835                         return 0;
1836
1837                 /* disarm */
1838                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1839                 if (r < 0)
1840                         return r;
1841
1842                 d->next = USEC_INFINITY;
1843                 return 0;
1844         }
1845
1846         b = prioq_peek(d->latest);
1847         assert_se(b && b->enabled != SD_EVENT_OFF);
1848
1849         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1850         if (d->next == t)
1851                 return 0;
1852
1853         assert_se(d->fd >= 0);
1854
1855         if (t == 0) {
1856                 /* We don' want to disarm here, just mean some time looooong ago. */
1857                 its.it_value.tv_sec = 0;
1858                 its.it_value.tv_nsec = 1;
1859         } else
1860                 timespec_store(&its.it_value, t);
1861
1862         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1863         if (r < 0)
1864                 return -errno;
1865
1866         d->next = t;
1867         return 0;
1868 }
1869
1870 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1871         assert(e);
1872         assert(s);
1873         assert(s->type == SOURCE_IO);
1874
1875         /* If the event source was already pending, we just OR in the
1876          * new revents, otherwise we reset the value. The ORing is
1877          * necessary to handle EPOLLONESHOT events properly where
1878          * readability might happen independently of writability, and
1879          * we need to keep track of both */
1880
1881         if (s->pending)
1882                 s->io.revents |= revents;
1883         else
1884                 s->io.revents = revents;
1885
1886         return source_set_pending(s, true);
1887 }
1888
1889 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1890         uint64_t x;
1891         ssize_t ss;
1892
1893         assert(e);
1894         assert(fd >= 0);
1895
1896         assert_return(events == EPOLLIN, -EIO);
1897
1898         ss = read(fd, &x, sizeof(x));
1899         if (ss < 0) {
1900                 if (errno == EAGAIN || errno == EINTR)
1901                         return 0;
1902
1903                 return -errno;
1904         }
1905
1906         if (_unlikely_(ss != sizeof(x)))
1907                 return -EIO;
1908
1909         if (next)
1910                 *next = USEC_INFINITY;
1911
1912         return 0;
1913 }
1914
1915 static int process_timer(
1916                 sd_event *e,
1917                 usec_t n,
1918                 struct clock_data *d) {
1919
1920         sd_event_source *s;
1921         int r;
1922
1923         assert(e);
1924         assert(d);
1925
1926         for (;;) {
1927                 s = prioq_peek(d->earliest);
1928                 if (!s ||
1929                     s->time.next > n ||
1930                     s->enabled == SD_EVENT_OFF ||
1931                     s->pending)
1932                         break;
1933
1934                 r = source_set_pending(s, true);
1935                 if (r < 0)
1936                         return r;
1937
1938                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1939                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1940                 d->needs_rearm = true;
1941         }
1942
1943         return 0;
1944 }
1945
1946 static int process_child(sd_event *e) {
1947         sd_event_source *s;
1948         Iterator i;
1949         int r;
1950
1951         assert(e);
1952
1953         e->need_process_child = false;
1954
1955         /*
1956            So, this is ugly. We iteratively invoke waitid() with P_PID
1957            + WNOHANG for each PID we wait for, instead of using
1958            P_ALL. This is because we only want to get child
1959            information of very specific child processes, and not all
1960            of them. We might not have processed the SIGCHLD even of a
1961            previous invocation and we don't want to maintain a
1962            unbounded *per-child* event queue, hence we really don't
1963            want anything flushed out of the kernel's queue that we
1964            don't care about. Since this is O(n) this means that if you
1965            have a lot of processes you probably want to handle SIGCHLD
1966            yourself.
1967
1968            We do not reap the children here (by using WNOWAIT), this
1969            is only done after the event source is dispatched so that
1970            the callback still sees the process as a zombie.
1971         */
1972
1973         HASHMAP_FOREACH(s, e->child_sources, i) {
1974                 assert(s->type == SOURCE_CHILD);
1975
1976                 if (s->pending)
1977                         continue;
1978
1979                 if (s->enabled == SD_EVENT_OFF)
1980                         continue;
1981
1982                 zero(s->child.siginfo);
1983                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1984                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1985                 if (r < 0)
1986                         return -errno;
1987
1988                 if (s->child.siginfo.si_pid != 0) {
1989                         bool zombie =
1990                                 s->child.siginfo.si_code == CLD_EXITED ||
1991                                 s->child.siginfo.si_code == CLD_KILLED ||
1992                                 s->child.siginfo.si_code == CLD_DUMPED;
1993
1994                         if (!zombie && (s->child.options & WEXITED)) {
1995                                 /* If the child isn't dead then let's
1996                                  * immediately remove the state change
1997                                  * from the queue, since there's no
1998                                  * benefit in leaving it queued */
1999
2000                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2001                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2002                         }
2003
2004                         r = source_set_pending(s, true);
2005                         if (r < 0)
2006                                 return r;
2007                 }
2008         }
2009
2010         return 0;
2011 }
2012
2013 static int process_signal(sd_event *e, uint32_t events) {
2014         bool read_one = false;
2015         int r;
2016
2017         assert(e);
2018
2019         assert_return(events == EPOLLIN, -EIO);
2020
2021         for (;;) {
2022                 struct signalfd_siginfo si;
2023                 ssize_t n;
2024                 sd_event_source *s = NULL;
2025
2026                 n = read(e->signal_fd, &si, sizeof(si));
2027                 if (n < 0) {
2028                         if (errno == EAGAIN || errno == EINTR)
2029                                 return read_one;
2030
2031                         return -errno;
2032                 }
2033
2034                 if (_unlikely_(n != sizeof(si)))
2035                         return -EIO;
2036
2037                 assert(si.ssi_signo < _NSIG);
2038
2039                 read_one = true;
2040
2041                 if (si.ssi_signo == SIGCHLD) {
2042                         r = process_child(e);
2043                         if (r < 0)
2044                                 return r;
2045                         if (r > 0)
2046                                 continue;
2047                 }
2048
2049                 if (e->signal_sources)
2050                         s = e->signal_sources[si.ssi_signo];
2051
2052                 if (!s)
2053                         continue;
2054
2055                 s->signal.siginfo = si;
2056                 r = source_set_pending(s, true);
2057                 if (r < 0)
2058                         return r;
2059         }
2060 }
2061
2062 static int source_dispatch(sd_event_source *s) {
2063         int r = 0;
2064
2065         assert(s);
2066         assert(s->pending || s->type == SOURCE_EXIT);
2067
2068         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2069                 r = source_set_pending(s, false);
2070                 if (r < 0)
2071                         return r;
2072         }
2073
2074         if (s->type != SOURCE_POST) {
2075                 sd_event_source *z;
2076                 Iterator i;
2077
2078                 /* If we execute a non-post source, let's mark all
2079                  * post sources as pending */
2080
2081                 SET_FOREACH(z, s->event->post_sources, i) {
2082                         if (z->enabled == SD_EVENT_OFF)
2083                                 continue;
2084
2085                         r = source_set_pending(z, true);
2086                         if (r < 0)
2087                                 return r;
2088                 }
2089         }
2090
2091         if (s->enabled == SD_EVENT_ONESHOT) {
2092                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2093                 if (r < 0)
2094                         return r;
2095         }
2096
2097         s->dispatching = true;
2098
2099         switch (s->type) {
2100
2101         case SOURCE_IO:
2102                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2103                 break;
2104
2105         case SOURCE_TIME_REALTIME:
2106         case SOURCE_TIME_BOOTTIME:
2107         case SOURCE_TIME_MONOTONIC:
2108         case SOURCE_TIME_REALTIME_ALARM:
2109         case SOURCE_TIME_BOOTTIME_ALARM:
2110                 r = s->time.callback(s, s->time.next, s->userdata);
2111                 break;
2112
2113         case SOURCE_SIGNAL:
2114                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2115                 break;
2116
2117         case SOURCE_CHILD: {
2118                 bool zombie;
2119
2120                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2121                          s->child.siginfo.si_code == CLD_KILLED ||
2122                          s->child.siginfo.si_code == CLD_DUMPED;
2123
2124                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2125
2126                 /* Now, reap the PID for good. */
2127                 if (zombie)
2128                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2129
2130                 break;
2131         }
2132
2133         case SOURCE_DEFER:
2134                 r = s->defer.callback(s, s->userdata);
2135                 break;
2136
2137         case SOURCE_POST:
2138                 r = s->post.callback(s, s->userdata);
2139                 break;
2140
2141         case SOURCE_EXIT:
2142                 r = s->exit.callback(s, s->userdata);
2143                 break;
2144
2145         case SOURCE_WATCHDOG:
2146         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2147         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2148                 assert_not_reached("Wut? I shouldn't exist.");
2149         }
2150
2151         s->dispatching = false;
2152
2153         if (r < 0) {
2154                 if (s->description)
2155                         log_debug("Event source '%s' returned error, disabling: %s", s->description, strerror(-r));
2156                 else
2157                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2158         }
2159
2160         if (s->n_ref == 0)
2161                 source_free(s);
2162         else if (r < 0)
2163                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2164
2165         return 1;
2166 }
2167
2168 static int event_prepare(sd_event *e) {
2169         int r;
2170
2171         assert(e);
2172
2173         for (;;) {
2174                 sd_event_source *s;
2175
2176                 s = prioq_peek(e->prepare);
2177                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2178                         break;
2179
2180                 s->prepare_iteration = e->iteration;
2181                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2182                 if (r < 0)
2183                         return r;
2184
2185                 assert(s->prepare);
2186
2187                 s->dispatching = true;
2188                 r = s->prepare(s, s->userdata);
2189                 s->dispatching = false;
2190
2191                 if (r < 0) {
2192                         if (s->description)
2193                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->description, strerror(-r));
2194                         else
2195                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2196                 }
2197
2198                 if (s->n_ref == 0)
2199                         source_free(s);
2200                 else if (r < 0)
2201                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2202         }
2203
2204         return 0;
2205 }
2206
2207 static int dispatch_exit(sd_event *e) {
2208         sd_event_source *p;
2209         int r;
2210
2211         assert(e);
2212
2213         p = prioq_peek(e->exit);
2214         if (!p || p->enabled == SD_EVENT_OFF) {
2215                 e->state = SD_EVENT_FINISHED;
2216                 return 0;
2217         }
2218
2219         sd_event_ref(e);
2220         e->iteration++;
2221         e->state = SD_EVENT_EXITING;
2222
2223         r = source_dispatch(p);
2224
2225         e->state = SD_EVENT_PASSIVE;
2226         sd_event_unref(e);
2227
2228         return r;
2229 }
2230
2231 static sd_event_source* event_next_pending(sd_event *e) {
2232         sd_event_source *p;
2233
2234         assert(e);
2235
2236         p = prioq_peek(e->pending);
2237         if (!p)
2238                 return NULL;
2239
2240         if (p->enabled == SD_EVENT_OFF)
2241                 return NULL;
2242
2243         return p;
2244 }
2245
2246 static int arm_watchdog(sd_event *e) {
2247         struct itimerspec its = {};
2248         usec_t t;
2249         int r;
2250
2251         assert(e);
2252         assert(e->watchdog_fd >= 0);
2253
2254         t = sleep_between(e,
2255                           e->watchdog_last + (e->watchdog_period / 2),
2256                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2257
2258         timespec_store(&its.it_value, t);
2259
2260         /* Make sure we never set the watchdog to 0, which tells the
2261          * kernel to disable it. */
2262         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2263                 its.it_value.tv_nsec = 1;
2264
2265         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2266         if (r < 0)
2267                 return -errno;
2268
2269         return 0;
2270 }
2271
2272 static int process_watchdog(sd_event *e) {
2273         assert(e);
2274
2275         if (!e->watchdog)
2276                 return 0;
2277
2278         /* Don't notify watchdog too often */
2279         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2280                 return 0;
2281
2282         sd_notify(false, "WATCHDOG=1");
2283         e->watchdog_last = e->timestamp.monotonic;
2284
2285         return arm_watchdog(e);
2286 }
2287
2288 _public_ int sd_event_prepare(sd_event *e) {
2289         int r;
2290
2291         assert_return(e, -EINVAL);
2292         assert_return(!event_pid_changed(e), -ECHILD);
2293         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2294         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2295
2296         if (e->exit_requested)
2297                 goto pending;
2298
2299         e->iteration++;
2300
2301         r = event_prepare(e);
2302         if (r < 0)
2303                 return r;
2304
2305         r = event_arm_timer(e, &e->realtime);
2306         if (r < 0)
2307                 return r;
2308
2309         r = event_arm_timer(e, &e->boottime);
2310         if (r < 0)
2311                 return r;
2312
2313         r = event_arm_timer(e, &e->monotonic);
2314         if (r < 0)
2315                 return r;
2316
2317         r = event_arm_timer(e, &e->realtime_alarm);
2318         if (r < 0)
2319                 return r;
2320
2321         r = event_arm_timer(e, &e->boottime_alarm);
2322         if (r < 0)
2323                 return r;
2324
2325         if (event_next_pending(e) || e->need_process_child)
2326                 goto pending;
2327
2328         e->state = SD_EVENT_PREPARED;
2329
2330         return 0;
2331
2332 pending:
2333         e->state = SD_EVENT_PREPARED;
2334         r = sd_event_wait(e, 0);
2335         if (r == 0)
2336                 e->state = SD_EVENT_PREPARED;
2337
2338         return r;
2339 }
2340
2341 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2342         struct epoll_event *ev_queue;
2343         unsigned ev_queue_max;
2344         int r, m, i;
2345
2346         assert_return(e, -EINVAL);
2347         assert_return(!event_pid_changed(e), -ECHILD);
2348         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2349         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2350
2351         if (e->exit_requested) {
2352                 e->state = SD_EVENT_PENDING;
2353                 return 1;
2354         }
2355
2356         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2357         ev_queue = newa(struct epoll_event, ev_queue_max);
2358
2359         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2360                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2361         if (m < 0) {
2362                 if (errno == EINTR) {
2363                         e->state = SD_EVENT_PENDING;
2364                         return 1;
2365                 }
2366
2367                 r = -errno;
2368
2369                 goto finish;
2370         }
2371
2372         dual_timestamp_get(&e->timestamp);
2373         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2374
2375         for (i = 0; i < m; i++) {
2376
2377                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2378                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2379                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2380                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2381                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2382                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2383                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2384                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2385                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2386                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2387                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2388                         r = process_signal(e, ev_queue[i].events);
2389                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2390                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2391                 else
2392                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2393
2394                 if (r < 0)
2395                         goto finish;
2396         }
2397
2398         r = process_watchdog(e);
2399         if (r < 0)
2400                 goto finish;
2401
2402         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2403         if (r < 0)
2404                 goto finish;
2405
2406         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2407         if (r < 0)
2408                 goto finish;
2409
2410         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2411         if (r < 0)
2412                 goto finish;
2413
2414         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2415         if (r < 0)
2416                 goto finish;
2417
2418         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2419         if (r < 0)
2420                 goto finish;
2421
2422         if (e->need_process_child) {
2423                 r = process_child(e);
2424                 if (r < 0)
2425                         goto finish;
2426         }
2427
2428         if (event_next_pending(e)) {
2429                 e->state = SD_EVENT_PENDING;
2430
2431                 return 1;
2432         }
2433
2434         r = 0;
2435
2436 finish:
2437         e->state = SD_EVENT_PASSIVE;
2438
2439         return r;
2440 }
2441
2442 _public_ int sd_event_dispatch(sd_event *e) {
2443         sd_event_source *p;
2444         int r;
2445
2446         assert_return(e, -EINVAL);
2447         assert_return(!event_pid_changed(e), -ECHILD);
2448         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2449         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2450
2451         if (e->exit_requested)
2452                 return dispatch_exit(e);
2453
2454         p = event_next_pending(e);
2455         if (p) {
2456                 sd_event_ref(e);
2457
2458                 e->state = SD_EVENT_RUNNING;
2459                 r = source_dispatch(p);
2460                 e->state = SD_EVENT_PASSIVE;
2461
2462                 sd_event_unref(e);
2463
2464                 return r;
2465         }
2466
2467         e->state = SD_EVENT_PASSIVE;
2468
2469         return 1;
2470 }
2471
2472 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2473         int r;
2474
2475         assert_return(e, -EINVAL);
2476         assert_return(!event_pid_changed(e), -ECHILD);
2477         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2478         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2479
2480         r = sd_event_prepare(e);
2481         if (r > 0)
2482                 return sd_event_dispatch(e);
2483         else if (r < 0)
2484                 return r;
2485
2486         r = sd_event_wait(e, timeout);
2487         if (r > 0)
2488                 return sd_event_dispatch(e);
2489         else
2490                 return r;
2491 }
2492
2493 _public_ int sd_event_loop(sd_event *e) {
2494         int r;
2495
2496         assert_return(e, -EINVAL);
2497         assert_return(!event_pid_changed(e), -ECHILD);
2498         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2499
2500         sd_event_ref(e);
2501
2502         while (e->state != SD_EVENT_FINISHED) {
2503                 r = sd_event_run(e, (uint64_t) -1);
2504                 if (r < 0)
2505                         goto finish;
2506         }
2507
2508         r = e->exit_code;
2509
2510 finish:
2511         sd_event_unref(e);
2512         return r;
2513 }
2514
2515 _public_ int sd_event_get_fd(sd_event *e) {
2516
2517         assert_return(e, -EINVAL);
2518         assert_return(!event_pid_changed(e), -ECHILD);
2519
2520         return e->epoll_fd;
2521 }
2522
2523 _public_ int sd_event_get_state(sd_event *e) {
2524         assert_return(e, -EINVAL);
2525         assert_return(!event_pid_changed(e), -ECHILD);
2526
2527         return e->state;
2528 }
2529
2530 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2531         assert_return(e, -EINVAL);
2532         assert_return(code, -EINVAL);
2533         assert_return(!event_pid_changed(e), -ECHILD);
2534
2535         if (!e->exit_requested)
2536                 return -ENODATA;
2537
2538         *code = e->exit_code;
2539         return 0;
2540 }
2541
2542 _public_ int sd_event_exit(sd_event *e, int code) {
2543         assert_return(e, -EINVAL);
2544         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2545         assert_return(!event_pid_changed(e), -ECHILD);
2546
2547         e->exit_requested = true;
2548         e->exit_code = code;
2549
2550         return 0;
2551 }
2552
2553 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2554         assert_return(e, -EINVAL);
2555         assert_return(usec, -EINVAL);
2556         assert_return(!event_pid_changed(e), -ECHILD);
2557
2558         /* If we haven't run yet, just get the actual time */
2559         if (!dual_timestamp_is_set(&e->timestamp))
2560                 return -ENODATA;
2561
2562         switch (clock) {
2563
2564         case CLOCK_REALTIME:
2565         case CLOCK_REALTIME_ALARM:
2566                 *usec = e->timestamp.realtime;
2567                 break;
2568
2569         case CLOCK_MONOTONIC:
2570                 *usec = e->timestamp.monotonic;
2571                 break;
2572
2573         case CLOCK_BOOTTIME:
2574         case CLOCK_BOOTTIME_ALARM:
2575                 *usec = e->timestamp_boottime;
2576                 break;
2577         }
2578
2579         return 0;
2580 }
2581
2582 _public_ int sd_event_default(sd_event **ret) {
2583
2584         static thread_local sd_event *default_event = NULL;
2585         sd_event *e = NULL;
2586         int r;
2587
2588         if (!ret)
2589                 return !!default_event;
2590
2591         if (default_event) {
2592                 *ret = sd_event_ref(default_event);
2593                 return 0;
2594         }
2595
2596         r = sd_event_new(&e);
2597         if (r < 0)
2598                 return r;
2599
2600         e->default_event_ptr = &default_event;
2601         e->tid = gettid();
2602         default_event = e;
2603
2604         *ret = e;
2605         return 1;
2606 }
2607
2608 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2609         assert_return(e, -EINVAL);
2610         assert_return(tid, -EINVAL);
2611         assert_return(!event_pid_changed(e), -ECHILD);
2612
2613         if (e->tid != 0) {
2614                 *tid = e->tid;
2615                 return 0;
2616         }
2617
2618         return -ENXIO;
2619 }
2620
2621 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2622         int r;
2623
2624         assert_return(e, -EINVAL);
2625         assert_return(!event_pid_changed(e), -ECHILD);
2626
2627         if (e->watchdog == !!b)
2628                 return e->watchdog;
2629
2630         if (b) {
2631                 struct epoll_event ev = {};
2632
2633                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2634                 if (r <= 0)
2635                         return r;
2636
2637                 /* Issue first ping immediately */
2638                 sd_notify(false, "WATCHDOG=1");
2639                 e->watchdog_last = now(CLOCK_MONOTONIC);
2640
2641                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2642                 if (e->watchdog_fd < 0)
2643                         return -errno;
2644
2645                 r = arm_watchdog(e);
2646                 if (r < 0)
2647                         goto fail;
2648
2649                 ev.events = EPOLLIN;
2650                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2651
2652                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2653                 if (r < 0) {
2654                         r = -errno;
2655                         goto fail;
2656                 }
2657
2658         } else {
2659                 if (e->watchdog_fd >= 0) {
2660                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2661                         e->watchdog_fd = safe_close(e->watchdog_fd);
2662                 }
2663         }
2664
2665         e->watchdog = !!b;
2666         return e->watchdog;
2667
2668 fail:
2669         e->watchdog_fd = safe_close(e->watchdog_fd);
2670         return r;
2671 }
2672
2673 _public_ int sd_event_get_watchdog(sd_event *e) {
2674         assert_return(e, -EINVAL);
2675         assert_return(!event_pid_changed(e), -ECHILD);
2676
2677         return e->watchdog;
2678 }