chiark / gitweb /
a14ade221b3ae89b76a6eccc5e76e8f23f6586f1
[elogind.git] / src / libelogind / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36 #include "signal-util.h"
37
38 #include "sd-event.h"
39
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_BOOTTIME,
46         SOURCE_TIME_MONOTONIC,
47         SOURCE_TIME_REALTIME_ALARM,
48         SOURCE_TIME_BOOTTIME_ALARM,
49         SOURCE_SIGNAL,
50         SOURCE_CHILD,
51         SOURCE_DEFER,
52         SOURCE_POST,
53         SOURCE_EXIT,
54         SOURCE_WATCHDOG,
55         _SOURCE_EVENT_SOURCE_TYPE_MAX,
56         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60
61 struct sd_event_source {
62         unsigned n_ref;
63
64         sd_event *event;
65         void *userdata;
66         sd_event_handler_t prepare;
67
68         char *description;
69
70         EventSourceType type:5;
71         int enabled:3;
72         bool pending:1;
73         bool dispatching:1;
74         bool floating:1;
75
76         int64_t priority;
77         unsigned pending_index;
78         unsigned prepare_index;
79         unsigned pending_iteration;
80         unsigned prepare_iteration;
81
82         LIST_FIELDS(sd_event_source, sources);
83
84         union {
85                 struct {
86                         sd_event_io_handler_t callback;
87                         int fd;
88                         uint32_t events;
89                         uint32_t revents;
90                         bool registered:1;
91                 } io;
92                 struct {
93                         sd_event_time_handler_t callback;
94                         usec_t next, accuracy;
95                         unsigned earliest_index;
96                         unsigned latest_index;
97                 } time;
98                 struct {
99                         sd_event_signal_handler_t callback;
100                         struct signalfd_siginfo siginfo;
101                         int sig;
102                 } signal;
103                 struct {
104                         sd_event_child_handler_t callback;
105                         siginfo_t siginfo;
106                         pid_t pid;
107                         int options;
108                 } child;
109                 struct {
110                         sd_event_handler_t callback;
111                 } defer;
112                 struct {
113                         sd_event_handler_t callback;
114                 } post;
115                 struct {
116                         sd_event_handler_t callback;
117                         unsigned prioq_index;
118                 } exit;
119         };
120 };
121
122 struct clock_data {
123         int fd;
124
125         /* For all clocks we maintain two priority queues each, one
126          * ordered for the earliest times the events may be
127          * dispatched, and one ordered by the latest times they must
128          * have been dispatched. The range between the top entries in
129          * the two prioqs is the time window we can freely schedule
130          * wakeups in */
131
132         Prioq *earliest;
133         Prioq *latest;
134         usec_t next;
135
136         bool needs_rearm:1;
137 };
138
139 struct sd_event {
140         unsigned n_ref;
141
142         int epoll_fd;
143         int signal_fd;
144         int watchdog_fd;
145
146         Prioq *pending;
147         Prioq *prepare;
148
149         /* timerfd_create() only supports these five clocks so far. We
150          * can add support for more clocks when the kernel learns to
151          * deal with them, too. */
152         struct clock_data realtime;
153         struct clock_data boottime;
154         struct clock_data monotonic;
155         struct clock_data realtime_alarm;
156         struct clock_data boottime_alarm;
157
158         usec_t perturb;
159
160         sigset_t sigset;
161         sd_event_source **signal_sources;
162
163         Hashmap *child_sources;
164         unsigned n_enabled_child_sources;
165
166         Set *post_sources;
167
168         Prioq *exit;
169
170         pid_t original_pid;
171
172         unsigned iteration;
173         dual_timestamp timestamp;
174         usec_t timestamp_boottime;
175         int state;
176
177         bool exit_requested:1;
178         bool need_process_child:1;
179         bool watchdog:1;
180
181         int exit_code;
182
183         pid_t tid;
184         sd_event **default_event_ptr;
185
186         usec_t watchdog_last, watchdog_period;
187
188         unsigned n_sources;
189
190         LIST_HEAD(sd_event_source, sources);
191 };
192
193 static void source_disconnect(sd_event_source *s);
194
195 static int pending_prioq_compare(const void *a, const void *b) {
196         const sd_event_source *x = a, *y = b;
197
198         assert(x->pending);
199         assert(y->pending);
200
201         /* Enabled ones first */
202         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203                 return -1;
204         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205                 return 1;
206
207         /* Lower priority values first */
208         if (x->priority < y->priority)
209                 return -1;
210         if (x->priority > y->priority)
211                 return 1;
212
213         /* Older entries first */
214         if (x->pending_iteration < y->pending_iteration)
215                 return -1;
216         if (x->pending_iteration > y->pending_iteration)
217                 return 1;
218
219         /* Stability for the rest */
220         if (x < y)
221                 return -1;
222         if (x > y)
223                 return 1;
224
225         return 0;
226 }
227
228 static int prepare_prioq_compare(const void *a, const void *b) {
229         const sd_event_source *x = a, *y = b;
230
231         assert(x->prepare);
232         assert(y->prepare);
233
234         /* Move most recently prepared ones last, so that we can stop
235          * preparing as soon as we hit one that has already been
236          * prepared in the current iteration */
237         if (x->prepare_iteration < y->prepare_iteration)
238                 return -1;
239         if (x->prepare_iteration > y->prepare_iteration)
240                 return 1;
241
242         /* Enabled ones first */
243         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
244                 return -1;
245         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
246                 return 1;
247
248         /* Lower priority values first */
249         if (x->priority < y->priority)
250                 return -1;
251         if (x->priority > y->priority)
252                 return 1;
253
254         /* Stability for the rest */
255         if (x < y)
256                 return -1;
257         if (x > y)
258                 return 1;
259
260         return 0;
261 }
262
263 static int earliest_time_prioq_compare(const void *a, const void *b) {
264         const sd_event_source *x = a, *y = b;
265
266         assert(EVENT_SOURCE_IS_TIME(x->type));
267         assert(x->type == y->type);
268
269         /* Enabled ones first */
270         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
271                 return -1;
272         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
273                 return 1;
274
275         /* Move the pending ones to the end */
276         if (!x->pending && y->pending)
277                 return -1;
278         if (x->pending && !y->pending)
279                 return 1;
280
281         /* Order by time */
282         if (x->time.next < y->time.next)
283                 return -1;
284         if (x->time.next > y->time.next)
285                 return 1;
286
287         /* Stability for the rest */
288         if (x < y)
289                 return -1;
290         if (x > y)
291                 return 1;
292
293         return 0;
294 }
295
296 static int latest_time_prioq_compare(const void *a, const void *b) {
297         const sd_event_source *x = a, *y = b;
298
299         assert(EVENT_SOURCE_IS_TIME(x->type));
300         assert(x->type == y->type);
301
302         /* Enabled ones first */
303         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304                 return -1;
305         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
306                 return 1;
307
308         /* Move the pending ones to the end */
309         if (!x->pending && y->pending)
310                 return -1;
311         if (x->pending && !y->pending)
312                 return 1;
313
314         /* Order by time */
315         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
316                 return -1;
317         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
318                 return 1;
319
320         /* Stability for the rest */
321         if (x < y)
322                 return -1;
323         if (x > y)
324                 return 1;
325
326         return 0;
327 }
328
329 static int exit_prioq_compare(const void *a, const void *b) {
330         const sd_event_source *x = a, *y = b;
331
332         assert(x->type == SOURCE_EXIT);
333         assert(y->type == SOURCE_EXIT);
334
335         /* Enabled ones first */
336         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
337                 return -1;
338         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
339                 return 1;
340
341         /* Lower priority values first */
342         if (x->priority < y->priority)
343                 return -1;
344         if (x->priority > y->priority)
345                 return 1;
346
347         /* Stability for the rest */
348         if (x < y)
349                 return -1;
350         if (x > y)
351                 return 1;
352
353         return 0;
354 }
355
356 static void free_clock_data(struct clock_data *d) {
357         assert(d);
358
359         safe_close(d->fd);
360         prioq_free(d->earliest);
361         prioq_free(d->latest);
362 }
363
364 static void event_free(sd_event *e) {
365         sd_event_source *s;
366
367         assert(e);
368
369         while ((s = e->sources)) {
370                 assert(s->floating);
371                 source_disconnect(s);
372                 sd_event_source_unref(s);
373         }
374
375         assert(e->n_sources == 0);
376
377         if (e->default_event_ptr)
378                 *(e->default_event_ptr) = NULL;
379
380         safe_close(e->epoll_fd);
381         safe_close(e->signal_fd);
382         safe_close(e->watchdog_fd);
383
384         free_clock_data(&e->realtime);
385         free_clock_data(&e->boottime);
386         free_clock_data(&e->monotonic);
387         free_clock_data(&e->realtime_alarm);
388         free_clock_data(&e->boottime_alarm);
389
390         prioq_free(e->pending);
391         prioq_free(e->prepare);
392         prioq_free(e->exit);
393
394         free(e->signal_sources);
395
396         hashmap_free(e->child_sources);
397         set_free(e->post_sources);
398         free(e);
399 }
400
401 _public_ int sd_event_new(sd_event** ret) {
402         sd_event *e;
403         int r;
404
405         assert_return(ret, -EINVAL);
406
407         e = new0(sd_event, 1);
408         if (!e)
409                 return -ENOMEM;
410
411         e->n_ref = 1;
412         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
413         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
414         e->original_pid = getpid();
415         e->perturb = USEC_INFINITY;
416
417         assert_se(sigemptyset(&e->sigset) == 0);
418
419         e->pending = prioq_new(pending_prioq_compare);
420         if (!e->pending) {
421                 r = -ENOMEM;
422                 goto fail;
423         }
424
425         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
426         if (e->epoll_fd < 0) {
427                 r = -errno;
428                 goto fail;
429         }
430
431         *ret = e;
432         return 0;
433
434 fail:
435         event_free(e);
436         return r;
437 }
438
439 _public_ sd_event* sd_event_ref(sd_event *e) {
440         assert_return(e, NULL);
441
442         assert(e->n_ref >= 1);
443         e->n_ref++;
444
445         return e;
446 }
447
448 _public_ sd_event* sd_event_unref(sd_event *e) {
449
450         if (!e)
451                 return NULL;
452
453         assert(e->n_ref >= 1);
454         e->n_ref--;
455
456         if (e->n_ref <= 0)
457                 event_free(e);
458
459         return NULL;
460 }
461
462 static bool event_pid_changed(sd_event *e) {
463         assert(e);
464
465         /* We don't support people creating an event loop and keeping
466          * it around over a fork(). Let's complain. */
467
468         return e->original_pid != getpid();
469 }
470
471 static void source_io_unregister(sd_event_source *s) {
472         int r;
473
474         assert(s);
475         assert(s->type == SOURCE_IO);
476
477         if (event_pid_changed(s->event))
478                 return;
479
480         if (!s->io.registered)
481                 return;
482
483         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
484         if (r < 0)
485                 log_debug_errno(errno, "Failed to remove source %s from epoll: %m", strna(s->description));
486
487         s->io.registered = false;
488 }
489
490 static int source_io_register(
491                 sd_event_source *s,
492                 int enabled,
493                 uint32_t events) {
494
495         struct epoll_event ev = {};
496         int r;
497
498         assert(s);
499         assert(s->type == SOURCE_IO);
500         assert(enabled != SD_EVENT_OFF);
501
502         ev.events = events;
503         ev.data.ptr = s;
504
505         if (enabled == SD_EVENT_ONESHOT)
506                 ev.events |= EPOLLONESHOT;
507
508         if (s->io.registered)
509                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
510         else
511                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
512
513         if (r < 0)
514                 return -errno;
515
516         s->io.registered = true;
517
518         return 0;
519 }
520
521 /// UNNEEDED by elogind
522 #if 0
523 static clockid_t event_source_type_to_clock(EventSourceType t) {
524
525         switch (t) {
526
527         case SOURCE_TIME_REALTIME:
528                 return CLOCK_REALTIME;
529
530         case SOURCE_TIME_BOOTTIME:
531                 return CLOCK_BOOTTIME;
532
533         case SOURCE_TIME_MONOTONIC:
534                 return CLOCK_MONOTONIC;
535
536         case SOURCE_TIME_REALTIME_ALARM:
537                 return CLOCK_REALTIME_ALARM;
538
539         case SOURCE_TIME_BOOTTIME_ALARM:
540                 return CLOCK_BOOTTIME_ALARM;
541
542         default:
543                 return (clockid_t) -1;
544         }
545 }
546 #endif // 0
547
548 static EventSourceType clock_to_event_source_type(clockid_t clock) {
549
550         switch (clock) {
551
552         case CLOCK_REALTIME:
553                 return SOURCE_TIME_REALTIME;
554
555         case CLOCK_BOOTTIME:
556                 return SOURCE_TIME_BOOTTIME;
557
558         case CLOCK_MONOTONIC:
559                 return SOURCE_TIME_MONOTONIC;
560
561         case CLOCK_REALTIME_ALARM:
562                 return SOURCE_TIME_REALTIME_ALARM;
563
564         case CLOCK_BOOTTIME_ALARM:
565                 return SOURCE_TIME_BOOTTIME_ALARM;
566
567         default:
568                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
569         }
570 }
571
572 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
573         assert(e);
574
575         switch (t) {
576
577         case SOURCE_TIME_REALTIME:
578                 return &e->realtime;
579
580         case SOURCE_TIME_BOOTTIME:
581                 return &e->boottime;
582
583         case SOURCE_TIME_MONOTONIC:
584                 return &e->monotonic;
585
586         case SOURCE_TIME_REALTIME_ALARM:
587                 return &e->realtime_alarm;
588
589         case SOURCE_TIME_BOOTTIME_ALARM:
590                 return &e->boottime_alarm;
591
592         default:
593                 return NULL;
594         }
595 }
596
597 static bool need_signal(sd_event *e, int signal) {
598         return (e->signal_sources && e->signal_sources[signal] &&
599                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
600                 ||
601                (signal == SIGCHLD &&
602                 e->n_enabled_child_sources > 0);
603 }
604
605 static int event_update_signal_fd(sd_event *e) {
606         struct epoll_event ev = {};
607         bool add_to_epoll;
608         int r;
609
610         assert(e);
611
612         if (event_pid_changed(e))
613                 return 0;
614
615         add_to_epoll = e->signal_fd < 0;
616
617         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
618         if (r < 0)
619                 return -errno;
620
621         e->signal_fd = r;
622
623         if (!add_to_epoll)
624                 return 0;
625
626         ev.events = EPOLLIN;
627         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
628
629         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
630         if (r < 0) {
631                 e->signal_fd = safe_close(e->signal_fd);
632                 return -errno;
633         }
634
635         return 0;
636 }
637
638 static void source_disconnect(sd_event_source *s) {
639         sd_event *event;
640
641         assert(s);
642
643         if (!s->event)
644                 return;
645
646         assert(s->event->n_sources > 0);
647
648         switch (s->type) {
649
650         case SOURCE_IO:
651                 if (s->io.fd >= 0)
652                         source_io_unregister(s);
653
654                 break;
655
656         case SOURCE_TIME_REALTIME:
657         case SOURCE_TIME_BOOTTIME:
658         case SOURCE_TIME_MONOTONIC:
659         case SOURCE_TIME_REALTIME_ALARM:
660         case SOURCE_TIME_BOOTTIME_ALARM: {
661                 struct clock_data *d;
662
663                 d = event_get_clock_data(s->event, s->type);
664                 assert(d);
665
666                 prioq_remove(d->earliest, s, &s->time.earliest_index);
667                 prioq_remove(d->latest, s, &s->time.latest_index);
668                 d->needs_rearm = true;
669                 break;
670         }
671
672         case SOURCE_SIGNAL:
673                 if (s->signal.sig > 0) {
674                         if (s->event->signal_sources)
675                                 s->event->signal_sources[s->signal.sig] = NULL;
676
677                         /* If the signal was on and now it is off... */
678                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
679                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
680
681                                 (void) event_update_signal_fd(s->event);
682                                 /* If disabling failed, we might get a spurious event,
683                                  * but otherwise nothing bad should happen. */
684                         }
685                 }
686
687                 break;
688
689         case SOURCE_CHILD:
690                 if (s->child.pid > 0) {
691                         if (s->enabled != SD_EVENT_OFF) {
692                                 assert(s->event->n_enabled_child_sources > 0);
693                                 s->event->n_enabled_child_sources--;
694
695                                 /* We know the signal was on, if it is off now... */
696                                 if (!need_signal(s->event, SIGCHLD)) {
697                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
698
699                                         (void) event_update_signal_fd(s->event);
700                                         /* If disabling failed, we might get a spurious event,
701                                          * but otherwise nothing bad should happen. */
702                                 }
703                         }
704
705                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
706                 }
707
708                 break;
709
710         case SOURCE_DEFER:
711                 /* nothing */
712                 break;
713
714         case SOURCE_POST:
715                 set_remove(s->event->post_sources, s);
716                 break;
717
718         case SOURCE_EXIT:
719                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
720                 break;
721
722         default:
723                 assert_not_reached("Wut? I shouldn't exist.");
724         }
725
726         if (s->pending)
727                 prioq_remove(s->event->pending, s, &s->pending_index);
728
729         if (s->prepare)
730                 prioq_remove(s->event->prepare, s, &s->prepare_index);
731
732         event = s->event;
733
734         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
735         s->event = NULL;
736         LIST_REMOVE(sources, event->sources, s);
737         event->n_sources--;
738
739         if (!s->floating)
740                 sd_event_unref(event);
741 }
742
743 static void source_free(sd_event_source *s) {
744         assert(s);
745
746         source_disconnect(s);
747         free(s->description);
748         free(s);
749 }
750
751 static int source_set_pending(sd_event_source *s, bool b) {
752         int r;
753
754         assert(s);
755         assert(s->type != SOURCE_EXIT);
756
757         if (s->pending == b)
758                 return 0;
759
760         s->pending = b;
761
762         if (b) {
763                 s->pending_iteration = s->event->iteration;
764
765                 r = prioq_put(s->event->pending, s, &s->pending_index);
766                 if (r < 0) {
767                         s->pending = false;
768                         return r;
769                 }
770         } else
771                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
772
773         if (EVENT_SOURCE_IS_TIME(s->type)) {
774                 struct clock_data *d;
775
776                 d = event_get_clock_data(s->event, s->type);
777                 assert(d);
778
779                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
780                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
781                 d->needs_rearm = true;
782         }
783
784         return 0;
785 }
786
787 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
788         sd_event_source *s;
789
790         assert(e);
791
792         s = new0(sd_event_source, 1);
793         if (!s)
794                 return NULL;
795
796         s->n_ref = 1;
797         s->event = e;
798         s->floating = floating;
799         s->type = type;
800         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
801
802         if (!floating)
803                 sd_event_ref(e);
804
805         LIST_PREPEND(sources, e->sources, s);
806         e->n_sources ++;
807
808         return s;
809 }
810
811 _public_ int sd_event_add_io(
812                 sd_event *e,
813                 sd_event_source **ret,
814                 int fd,
815                 uint32_t events,
816                 sd_event_io_handler_t callback,
817                 void *userdata) {
818
819         sd_event_source *s;
820         int r;
821
822         assert_return(e, -EINVAL);
823         assert_return(fd >= 0, -EBADF);
824         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
825         assert_return(callback, -EINVAL);
826         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
827         assert_return(!event_pid_changed(e), -ECHILD);
828
829         s = source_new(e, !ret, SOURCE_IO);
830         if (!s)
831                 return -ENOMEM;
832
833         s->io.fd = fd;
834         s->io.events = events;
835         s->io.callback = callback;
836         s->userdata = userdata;
837         s->enabled = SD_EVENT_ON;
838
839         r = source_io_register(s, s->enabled, events);
840         if (r < 0) {
841                 source_free(s);
842                 return r;
843         }
844
845         if (ret)
846                 *ret = s;
847
848         return 0;
849 }
850
851 static void initialize_perturb(sd_event *e) {
852         sd_id128_t bootid = {};
853
854         /* When we sleep for longer, we try to realign the wakeup to
855            the same time wihtin each minute/second/250ms, so that
856            events all across the system can be coalesced into a single
857            CPU wakeup. However, let's take some system-specific
858            randomness for this value, so that in a network of systems
859            with synced clocks timer events are distributed a
860            bit. Here, we calculate a perturbation usec offset from the
861            boot ID. */
862
863         if (_likely_(e->perturb != USEC_INFINITY))
864                 return;
865
866         if (sd_id128_get_boot(&bootid) >= 0)
867                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
868 }
869
870 static int event_setup_timer_fd(
871                 sd_event *e,
872                 struct clock_data *d,
873                 clockid_t clock) {
874
875         struct epoll_event ev = {};
876         int r, fd;
877
878         assert(e);
879         assert(d);
880
881         if (_likely_(d->fd >= 0))
882                 return 0;
883
884         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
885         if (fd < 0)
886                 return -errno;
887
888         ev.events = EPOLLIN;
889         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
890
891         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
892         if (r < 0) {
893                 safe_close(fd);
894                 return -errno;
895         }
896
897         d->fd = fd;
898         return 0;
899 }
900
901 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
902         assert(s);
903
904         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
905 }
906
907 _public_ int sd_event_add_time(
908                 sd_event *e,
909                 sd_event_source **ret,
910                 clockid_t clock,
911                 uint64_t usec,
912                 uint64_t accuracy,
913                 sd_event_time_handler_t callback,
914                 void *userdata) {
915
916         EventSourceType type;
917         sd_event_source *s;
918         struct clock_data *d;
919         int r;
920
921         assert_return(e, -EINVAL);
922         assert_return(usec != (uint64_t) -1, -EINVAL);
923         assert_return(accuracy != (uint64_t) -1, -EINVAL);
924         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
925         assert_return(!event_pid_changed(e), -ECHILD);
926
927         if (!callback)
928                 callback = time_exit_callback;
929
930         type = clock_to_event_source_type(clock);
931         assert_return(type >= 0, -EOPNOTSUPP);
932
933         d = event_get_clock_data(e, type);
934         assert(d);
935
936         if (!d->earliest) {
937                 d->earliest = prioq_new(earliest_time_prioq_compare);
938                 if (!d->earliest)
939                         return -ENOMEM;
940         }
941
942         if (!d->latest) {
943                 d->latest = prioq_new(latest_time_prioq_compare);
944                 if (!d->latest)
945                         return -ENOMEM;
946         }
947
948         if (d->fd < 0) {
949                 r = event_setup_timer_fd(e, d, clock);
950                 if (r < 0)
951                         return r;
952         }
953
954         s = source_new(e, !ret, type);
955         if (!s)
956                 return -ENOMEM;
957
958         s->time.next = usec;
959         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
960         s->time.callback = callback;
961         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
962         s->userdata = userdata;
963         s->enabled = SD_EVENT_ONESHOT;
964
965         d->needs_rearm = true;
966
967         r = prioq_put(d->earliest, s, &s->time.earliest_index);
968         if (r < 0)
969                 goto fail;
970
971         r = prioq_put(d->latest, s, &s->time.latest_index);
972         if (r < 0)
973                 goto fail;
974
975         if (ret)
976                 *ret = s;
977
978         return 0;
979
980 fail:
981         source_free(s);
982         return r;
983 }
984
985 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
986         assert(s);
987
988         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
989 }
990
991 _public_ int sd_event_add_signal(
992                 sd_event *e,
993                 sd_event_source **ret,
994                 int sig,
995                 sd_event_signal_handler_t callback,
996                 void *userdata) {
997
998         sd_event_source *s;
999         sigset_t ss;
1000         int r;
1001         bool previous;
1002
1003         assert_return(e, -EINVAL);
1004         assert_return(sig > 0, -EINVAL);
1005         assert_return(sig < _NSIG, -EINVAL);
1006         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1007         assert_return(!event_pid_changed(e), -ECHILD);
1008
1009         if (!callback)
1010                 callback = signal_exit_callback;
1011
1012         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1013         if (r < 0)
1014                 return -errno;
1015
1016         if (!sigismember(&ss, sig))
1017                 return -EBUSY;
1018
1019         if (!e->signal_sources) {
1020                 e->signal_sources = new0(sd_event_source*, _NSIG);
1021                 if (!e->signal_sources)
1022                         return -ENOMEM;
1023         } else if (e->signal_sources[sig])
1024                 return -EBUSY;
1025
1026         previous = need_signal(e, sig);
1027
1028         s = source_new(e, !ret, SOURCE_SIGNAL);
1029         if (!s)
1030                 return -ENOMEM;
1031
1032         s->signal.sig = sig;
1033         s->signal.callback = callback;
1034         s->userdata = userdata;
1035         s->enabled = SD_EVENT_ON;
1036
1037         e->signal_sources[sig] = s;
1038
1039         if (!previous) {
1040                 assert_se(sigaddset(&e->sigset, sig) == 0);
1041
1042                 r = event_update_signal_fd(e);
1043                 if (r < 0) {
1044                         source_free(s);
1045                         return r;
1046                 }
1047         }
1048
1049         /* Use the signal name as description for the event source by default */
1050         (void) sd_event_source_set_description(s, signal_to_string(sig));
1051
1052         if (ret)
1053                 *ret = s;
1054
1055         return 0;
1056 }
1057
1058 _public_ int sd_event_add_child(
1059                 sd_event *e,
1060                 sd_event_source **ret,
1061                 pid_t pid,
1062                 int options,
1063                 sd_event_child_handler_t callback,
1064                 void *userdata) {
1065
1066         sd_event_source *s;
1067         int r;
1068         bool previous;
1069
1070         assert_return(e, -EINVAL);
1071         assert_return(pid > 1, -EINVAL);
1072         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1073         assert_return(options != 0, -EINVAL);
1074         assert_return(callback, -EINVAL);
1075         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1076         assert_return(!event_pid_changed(e), -ECHILD);
1077
1078         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1079         if (r < 0)
1080                 return r;
1081
1082         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1083                 return -EBUSY;
1084
1085         previous = need_signal(e, SIGCHLD);
1086
1087         s = source_new(e, !ret, SOURCE_CHILD);
1088         if (!s)
1089                 return -ENOMEM;
1090
1091         s->child.pid = pid;
1092         s->child.options = options;
1093         s->child.callback = callback;
1094         s->userdata = userdata;
1095         s->enabled = SD_EVENT_ONESHOT;
1096
1097         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1098         if (r < 0) {
1099                 source_free(s);
1100                 return r;
1101         }
1102
1103         e->n_enabled_child_sources ++;
1104
1105         if (!previous) {
1106                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1107
1108                 r = event_update_signal_fd(e);
1109                 if (r < 0) {
1110                         source_free(s);
1111                         return r;
1112                 }
1113         }
1114
1115         e->need_process_child = true;
1116
1117         if (ret)
1118                 *ret = s;
1119
1120         return 0;
1121 }
1122
1123 _public_ int sd_event_add_defer(
1124                 sd_event *e,
1125                 sd_event_source **ret,
1126                 sd_event_handler_t callback,
1127                 void *userdata) {
1128
1129         sd_event_source *s;
1130         int r;
1131
1132         assert_return(e, -EINVAL);
1133         assert_return(callback, -EINVAL);
1134         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1135         assert_return(!event_pid_changed(e), -ECHILD);
1136
1137         s = source_new(e, !ret, SOURCE_DEFER);
1138         if (!s)
1139                 return -ENOMEM;
1140
1141         s->defer.callback = callback;
1142         s->userdata = userdata;
1143         s->enabled = SD_EVENT_ONESHOT;
1144
1145         r = source_set_pending(s, true);
1146         if (r < 0) {
1147                 source_free(s);
1148                 return r;
1149         }
1150
1151         if (ret)
1152                 *ret = s;
1153
1154         return 0;
1155 }
1156
1157 _public_ int sd_event_add_post(
1158                 sd_event *e,
1159                 sd_event_source **ret,
1160                 sd_event_handler_t callback,
1161                 void *userdata) {
1162
1163         sd_event_source *s;
1164         int r;
1165
1166         assert_return(e, -EINVAL);
1167         assert_return(callback, -EINVAL);
1168         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1169         assert_return(!event_pid_changed(e), -ECHILD);
1170
1171         r = set_ensure_allocated(&e->post_sources, NULL);
1172         if (r < 0)
1173                 return r;
1174
1175         s = source_new(e, !ret, SOURCE_POST);
1176         if (!s)
1177                 return -ENOMEM;
1178
1179         s->post.callback = callback;
1180         s->userdata = userdata;
1181         s->enabled = SD_EVENT_ON;
1182
1183         r = set_put(e->post_sources, s);
1184         if (r < 0) {
1185                 source_free(s);
1186                 return r;
1187         }
1188
1189         if (ret)
1190                 *ret = s;
1191
1192         return 0;
1193 }
1194
1195 _public_ int sd_event_add_exit(
1196                 sd_event *e,
1197                 sd_event_source **ret,
1198                 sd_event_handler_t callback,
1199                 void *userdata) {
1200
1201         sd_event_source *s;
1202         int r;
1203
1204         assert_return(e, -EINVAL);
1205         assert_return(callback, -EINVAL);
1206         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1207         assert_return(!event_pid_changed(e), -ECHILD);
1208
1209         if (!e->exit) {
1210                 e->exit = prioq_new(exit_prioq_compare);
1211                 if (!e->exit)
1212                         return -ENOMEM;
1213         }
1214
1215         s = source_new(e, !ret, SOURCE_EXIT);
1216         if (!s)
1217                 return -ENOMEM;
1218
1219         s->exit.callback = callback;
1220         s->userdata = userdata;
1221         s->exit.prioq_index = PRIOQ_IDX_NULL;
1222         s->enabled = SD_EVENT_ONESHOT;
1223
1224         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1225         if (r < 0) {
1226                 source_free(s);
1227                 return r;
1228         }
1229
1230         if (ret)
1231                 *ret = s;
1232
1233         return 0;
1234 }
1235
1236 /// UNNEEDED by elogind
1237 #if 0
1238 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1239         assert_return(s, NULL);
1240
1241         assert(s->n_ref >= 1);
1242         s->n_ref++;
1243
1244         return s;
1245 }
1246 #endif // 0
1247
1248 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1249
1250         if (!s)
1251                 return NULL;
1252
1253         assert(s->n_ref >= 1);
1254         s->n_ref--;
1255
1256         if (s->n_ref <= 0) {
1257                 /* Here's a special hack: when we are called from a
1258                  * dispatch handler we won't free the event source
1259                  * immediately, but we will detach the fd from the
1260                  * epoll. This way it is safe for the caller to unref
1261                  * the event source and immediately close the fd, but
1262                  * we still retain a valid event source object after
1263                  * the callback. */
1264
1265                 if (s->dispatching) {
1266                         if (s->type == SOURCE_IO)
1267                                 source_io_unregister(s);
1268
1269                         source_disconnect(s);
1270                 } else
1271                         source_free(s);
1272         }
1273
1274         return NULL;
1275 }
1276
1277 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1278         assert_return(s, -EINVAL);
1279         assert_return(!event_pid_changed(s->event), -ECHILD);
1280
1281         return free_and_strdup(&s->description, description);
1282 }
1283
1284 /// UNNEEDED by elogind
1285 #if 0
1286 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1287         assert_return(s, -EINVAL);
1288         assert_return(description, -EINVAL);
1289         assert_return(s->description, -ENXIO);
1290         assert_return(!event_pid_changed(s->event), -ECHILD);
1291
1292         *description = s->description;
1293         return 0;
1294 }
1295 #endif // 0
1296
1297 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1298         assert_return(s, NULL);
1299
1300         return s->event;
1301 }
1302
1303 /// UNNEEDED by elogind
1304 #if 0
1305 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1306         assert_return(s, -EINVAL);
1307         assert_return(s->type != SOURCE_EXIT, -EDOM);
1308         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1309         assert_return(!event_pid_changed(s->event), -ECHILD);
1310
1311         return s->pending;
1312 }
1313
1314 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1315         assert_return(s, -EINVAL);
1316         assert_return(s->type == SOURCE_IO, -EDOM);
1317         assert_return(!event_pid_changed(s->event), -ECHILD);
1318
1319         return s->io.fd;
1320 }
1321 #endif // 0
1322
1323 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1324         int r;
1325
1326         assert_return(s, -EINVAL);
1327         assert_return(fd >= 0, -EBADF);
1328         assert_return(s->type == SOURCE_IO, -EDOM);
1329         assert_return(!event_pid_changed(s->event), -ECHILD);
1330
1331         if (s->io.fd == fd)
1332                 return 0;
1333
1334         if (s->enabled == SD_EVENT_OFF) {
1335                 s->io.fd = fd;
1336                 s->io.registered = false;
1337         } else {
1338                 int saved_fd;
1339
1340                 saved_fd = s->io.fd;
1341                 assert(s->io.registered);
1342
1343                 s->io.fd = fd;
1344                 s->io.registered = false;
1345
1346                 r = source_io_register(s, s->enabled, s->io.events);
1347                 if (r < 0) {
1348                         s->io.fd = saved_fd;
1349                         s->io.registered = true;
1350                         return r;
1351                 }
1352
1353                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1354         }
1355
1356         return 0;
1357 }
1358
1359 /// UNNEEDED by elogind
1360 #if 0
1361 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1362         assert_return(s, -EINVAL);
1363         assert_return(events, -EINVAL);
1364         assert_return(s->type == SOURCE_IO, -EDOM);
1365         assert_return(!event_pid_changed(s->event), -ECHILD);
1366
1367         *events = s->io.events;
1368         return 0;
1369 }
1370 #endif // 0
1371
1372 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1373         int r;
1374
1375         assert_return(s, -EINVAL);
1376         assert_return(s->type == SOURCE_IO, -EDOM);
1377         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1378         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1379         assert_return(!event_pid_changed(s->event), -ECHILD);
1380
1381         /* edge-triggered updates are never skipped, so we can reset edges */
1382         if (s->io.events == events && !(events & EPOLLET))
1383                 return 0;
1384
1385         if (s->enabled != SD_EVENT_OFF) {
1386                 r = source_io_register(s, s->enabled, events);
1387                 if (r < 0)
1388                         return r;
1389         }
1390
1391         s->io.events = events;
1392         source_set_pending(s, false);
1393
1394         return 0;
1395 }
1396
1397 /// UNNEEDED by elogind
1398 #if 0
1399 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1400         assert_return(s, -EINVAL);
1401         assert_return(revents, -EINVAL);
1402         assert_return(s->type == SOURCE_IO, -EDOM);
1403         assert_return(s->pending, -ENODATA);
1404         assert_return(!event_pid_changed(s->event), -ECHILD);
1405
1406         *revents = s->io.revents;
1407         return 0;
1408 }
1409
1410 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1411         assert_return(s, -EINVAL);
1412         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1413         assert_return(!event_pid_changed(s->event), -ECHILD);
1414
1415         return s->signal.sig;
1416 }
1417
1418 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1419         assert_return(s, -EINVAL);
1420         assert_return(!event_pid_changed(s->event), -ECHILD);
1421
1422         return s->priority;
1423 }
1424 #endif // 0
1425
1426 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1427         assert_return(s, -EINVAL);
1428         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1429         assert_return(!event_pid_changed(s->event), -ECHILD);
1430
1431         if (s->priority == priority)
1432                 return 0;
1433
1434         s->priority = priority;
1435
1436         if (s->pending)
1437                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1438
1439         if (s->prepare)
1440                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1441
1442         if (s->type == SOURCE_EXIT)
1443                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1444
1445         return 0;
1446 }
1447
1448 /// UNNEEDED by elogind
1449 #if 0
1450 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1451         assert_return(s, -EINVAL);
1452         assert_return(m, -EINVAL);
1453         assert_return(!event_pid_changed(s->event), -ECHILD);
1454
1455         *m = s->enabled;
1456         return 0;
1457 }
1458 #endif // 0
1459
1460 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1461         int r;
1462
1463         assert_return(s, -EINVAL);
1464         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1465         assert_return(!event_pid_changed(s->event), -ECHILD);
1466
1467         /* If we are dead anyway, we are fine with turning off
1468          * sources, but everything else needs to fail. */
1469         if (s->event->state == SD_EVENT_FINISHED)
1470                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1471
1472         if (s->enabled == m)
1473                 return 0;
1474
1475         if (m == SD_EVENT_OFF) {
1476
1477                 switch (s->type) {
1478
1479                 case SOURCE_IO:
1480                         source_io_unregister(s);
1481                         s->enabled = m;
1482                         break;
1483
1484                 case SOURCE_TIME_REALTIME:
1485                 case SOURCE_TIME_BOOTTIME:
1486                 case SOURCE_TIME_MONOTONIC:
1487                 case SOURCE_TIME_REALTIME_ALARM:
1488                 case SOURCE_TIME_BOOTTIME_ALARM: {
1489                         struct clock_data *d;
1490
1491                         s->enabled = m;
1492                         d = event_get_clock_data(s->event, s->type);
1493                         assert(d);
1494
1495                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1496                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1497                         d->needs_rearm = true;
1498                         break;
1499                 }
1500
1501                 case SOURCE_SIGNAL:
1502                         assert(need_signal(s->event, s->signal.sig));
1503
1504                         s->enabled = m;
1505
1506                         if (!need_signal(s->event, s->signal.sig)) {
1507                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1508
1509                                 (void) event_update_signal_fd(s->event);
1510                                 /* If disabling failed, we might get a spurious event,
1511                                  * but otherwise nothing bad should happen. */
1512                         }
1513
1514                         break;
1515
1516                 case SOURCE_CHILD:
1517                         assert(need_signal(s->event, SIGCHLD));
1518
1519                         s->enabled = m;
1520
1521                         assert(s->event->n_enabled_child_sources > 0);
1522                         s->event->n_enabled_child_sources--;
1523
1524                         if (!need_signal(s->event, SIGCHLD)) {
1525                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1526
1527                                 (void) event_update_signal_fd(s->event);
1528                         }
1529
1530                         break;
1531
1532                 case SOURCE_EXIT:
1533                         s->enabled = m;
1534                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1535                         break;
1536
1537                 case SOURCE_DEFER:
1538                 case SOURCE_POST:
1539                         s->enabled = m;
1540                         break;
1541
1542                 default:
1543                         assert_not_reached("Wut? I shouldn't exist.");
1544                 }
1545
1546         } else {
1547                 switch (s->type) {
1548
1549                 case SOURCE_IO:
1550                         r = source_io_register(s, m, s->io.events);
1551                         if (r < 0)
1552                                 return r;
1553
1554                         s->enabled = m;
1555                         break;
1556
1557                 case SOURCE_TIME_REALTIME:
1558                 case SOURCE_TIME_BOOTTIME:
1559                 case SOURCE_TIME_MONOTONIC:
1560                 case SOURCE_TIME_REALTIME_ALARM:
1561                 case SOURCE_TIME_BOOTTIME_ALARM: {
1562                         struct clock_data *d;
1563
1564                         s->enabled = m;
1565                         d = event_get_clock_data(s->event, s->type);
1566                         assert(d);
1567
1568                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1569                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1570                         d->needs_rearm = true;
1571                         break;
1572                 }
1573
1574                 case SOURCE_SIGNAL:
1575                         /* Check status before enabling. */
1576                         if (!need_signal(s->event, s->signal.sig)) {
1577                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1578
1579                                 r = event_update_signal_fd(s->event);
1580                                 if (r < 0) {
1581                                         s->enabled = SD_EVENT_OFF;
1582                                         return r;
1583                                 }
1584                         }
1585
1586                         s->enabled = m;
1587                         break;
1588
1589                 case SOURCE_CHILD:
1590                         /* Check status before enabling. */
1591                         if (s->enabled == SD_EVENT_OFF) {
1592                                 if (!need_signal(s->event, SIGCHLD)) {
1593                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1594
1595                                         r = event_update_signal_fd(s->event);
1596                                         if (r < 0) {
1597                                                 s->enabled = SD_EVENT_OFF;
1598                                                 return r;
1599                                         }
1600                                 }
1601
1602                                 s->event->n_enabled_child_sources++;
1603                         }
1604
1605                         s->enabled = m;
1606                         break;
1607
1608                 case SOURCE_EXIT:
1609                         s->enabled = m;
1610                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1611                         break;
1612
1613                 case SOURCE_DEFER:
1614                 case SOURCE_POST:
1615                         s->enabled = m;
1616                         break;
1617
1618                 default:
1619                         assert_not_reached("Wut? I shouldn't exist.");
1620                 }
1621         }
1622
1623         if (s->pending)
1624                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1625
1626         if (s->prepare)
1627                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1628
1629         return 0;
1630 }
1631
1632 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1633         assert_return(s, -EINVAL);
1634         assert_return(usec, -EINVAL);
1635         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1636         assert_return(!event_pid_changed(s->event), -ECHILD);
1637
1638         *usec = s->time.next;
1639         return 0;
1640 }
1641
1642 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1643         struct clock_data *d;
1644
1645         assert_return(s, -EINVAL);
1646         assert_return(usec != (uint64_t) -1, -EINVAL);
1647         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1648         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1649         assert_return(!event_pid_changed(s->event), -ECHILD);
1650
1651         s->time.next = usec;
1652
1653         source_set_pending(s, false);
1654
1655         d = event_get_clock_data(s->event, s->type);
1656         assert(d);
1657
1658         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1659         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1660         d->needs_rearm = true;
1661
1662         return 0;
1663 }
1664
1665 /// UNNEEDED by elogind
1666 #if 0
1667 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1668         assert_return(s, -EINVAL);
1669         assert_return(usec, -EINVAL);
1670         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1671         assert_return(!event_pid_changed(s->event), -ECHILD);
1672
1673         *usec = s->time.accuracy;
1674         return 0;
1675 }
1676
1677 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1678         struct clock_data *d;
1679
1680         assert_return(s, -EINVAL);
1681         assert_return(usec != (uint64_t) -1, -EINVAL);
1682         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1683         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1684         assert_return(!event_pid_changed(s->event), -ECHILD);
1685
1686         if (usec == 0)
1687                 usec = DEFAULT_ACCURACY_USEC;
1688
1689         s->time.accuracy = usec;
1690
1691         source_set_pending(s, false);
1692
1693         d = event_get_clock_data(s->event, s->type);
1694         assert(d);
1695
1696         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1697         d->needs_rearm = true;
1698
1699         return 0;
1700 }
1701
1702 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1703         assert_return(s, -EINVAL);
1704         assert_return(clock, -EINVAL);
1705         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1706         assert_return(!event_pid_changed(s->event), -ECHILD);
1707
1708         *clock = event_source_type_to_clock(s->type);
1709         return 0;
1710 }
1711
1712 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1713         assert_return(s, -EINVAL);
1714         assert_return(pid, -EINVAL);
1715         assert_return(s->type == SOURCE_CHILD, -EDOM);
1716         assert_return(!event_pid_changed(s->event), -ECHILD);
1717
1718         *pid = s->child.pid;
1719         return 0;
1720 }
1721 #endif // 0
1722
1723 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1724         int r;
1725
1726         assert_return(s, -EINVAL);
1727         assert_return(s->type != SOURCE_EXIT, -EDOM);
1728         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1729         assert_return(!event_pid_changed(s->event), -ECHILD);
1730
1731         if (s->prepare == callback)
1732                 return 0;
1733
1734         if (callback && s->prepare) {
1735                 s->prepare = callback;
1736                 return 0;
1737         }
1738
1739         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1740         if (r < 0)
1741                 return r;
1742
1743         s->prepare = callback;
1744
1745         if (callback) {
1746                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1747                 if (r < 0)
1748                         return r;
1749         } else
1750                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1751
1752         return 0;
1753 }
1754
1755 /// UNNEEDED by elogind
1756 #if 0
1757 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1758         assert_return(s, NULL);
1759
1760         return s->userdata;
1761 }
1762
1763 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1764         void *ret;
1765
1766         assert_return(s, NULL);
1767
1768         ret = s->userdata;
1769         s->userdata = userdata;
1770
1771         return ret;
1772 }
1773 #endif // 0
1774
1775 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1776         usec_t c;
1777         assert(e);
1778         assert(a <= b);
1779
1780         if (a <= 0)
1781                 return 0;
1782
1783         if (b <= a + 1)
1784                 return a;
1785
1786         initialize_perturb(e);
1787
1788         /*
1789           Find a good time to wake up again between times a and b. We
1790           have two goals here:
1791
1792           a) We want to wake up as seldom as possible, hence prefer
1793              later times over earlier times.
1794
1795           b) But if we have to wake up, then let's make sure to
1796              dispatch as much as possible on the entire system.
1797
1798           We implement this by waking up everywhere at the same time
1799           within any given minute if we can, synchronised via the
1800           perturbation value determined from the boot ID. If we can't,
1801           then we try to find the same spot in every 10s, then 1s and
1802           then 250ms step. Otherwise, we pick the last possible time
1803           to wake up.
1804         */
1805
1806         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1807         if (c >= b) {
1808                 if (_unlikely_(c < USEC_PER_MINUTE))
1809                         return b;
1810
1811                 c -= USEC_PER_MINUTE;
1812         }
1813
1814         if (c >= a)
1815                 return c;
1816
1817         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1818         if (c >= b) {
1819                 if (_unlikely_(c < USEC_PER_SEC*10))
1820                         return b;
1821
1822                 c -= USEC_PER_SEC*10;
1823         }
1824
1825         if (c >= a)
1826                 return c;
1827
1828         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1829         if (c >= b) {
1830                 if (_unlikely_(c < USEC_PER_SEC))
1831                         return b;
1832
1833                 c -= USEC_PER_SEC;
1834         }
1835
1836         if (c >= a)
1837                 return c;
1838
1839         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1840         if (c >= b) {
1841                 if (_unlikely_(c < USEC_PER_MSEC*250))
1842                         return b;
1843
1844                 c -= USEC_PER_MSEC*250;
1845         }
1846
1847         if (c >= a)
1848                 return c;
1849
1850         return b;
1851 }
1852
1853 static int event_arm_timer(
1854                 sd_event *e,
1855                 struct clock_data *d) {
1856
1857         struct itimerspec its = {};
1858         sd_event_source *a, *b;
1859         usec_t t;
1860         int r;
1861
1862         assert(e);
1863         assert(d);
1864
1865         if (!d->needs_rearm)
1866                 return 0;
1867         else
1868                 d->needs_rearm = false;
1869
1870         a = prioq_peek(d->earliest);
1871         if (!a || a->enabled == SD_EVENT_OFF) {
1872
1873                 if (d->fd < 0)
1874                         return 0;
1875
1876                 if (d->next == USEC_INFINITY)
1877                         return 0;
1878
1879                 /* disarm */
1880                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1881                 if (r < 0)
1882                         return r;
1883
1884                 d->next = USEC_INFINITY;
1885                 return 0;
1886         }
1887
1888         b = prioq_peek(d->latest);
1889         assert_se(b && b->enabled != SD_EVENT_OFF);
1890
1891         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1892         if (d->next == t)
1893                 return 0;
1894
1895         assert_se(d->fd >= 0);
1896
1897         if (t == 0) {
1898                 /* We don' want to disarm here, just mean some time looooong ago. */
1899                 its.it_value.tv_sec = 0;
1900                 its.it_value.tv_nsec = 1;
1901         } else
1902                 timespec_store(&its.it_value, t);
1903
1904         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1905         if (r < 0)
1906                 return -errno;
1907
1908         d->next = t;
1909         return 0;
1910 }
1911
1912 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1913         assert(e);
1914         assert(s);
1915         assert(s->type == SOURCE_IO);
1916
1917         /* If the event source was already pending, we just OR in the
1918          * new revents, otherwise we reset the value. The ORing is
1919          * necessary to handle EPOLLONESHOT events properly where
1920          * readability might happen independently of writability, and
1921          * we need to keep track of both */
1922
1923         if (s->pending)
1924                 s->io.revents |= revents;
1925         else
1926                 s->io.revents = revents;
1927
1928         return source_set_pending(s, true);
1929 }
1930
1931 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1932         uint64_t x;
1933         ssize_t ss;
1934
1935         assert(e);
1936         assert(fd >= 0);
1937
1938         assert_return(events == EPOLLIN, -EIO);
1939
1940         ss = read(fd, &x, sizeof(x));
1941         if (ss < 0) {
1942                 if (errno == EAGAIN || errno == EINTR)
1943                         return 0;
1944
1945                 return -errno;
1946         }
1947
1948         if (_unlikely_(ss != sizeof(x)))
1949                 return -EIO;
1950
1951         if (next)
1952                 *next = USEC_INFINITY;
1953
1954         return 0;
1955 }
1956
1957 static int process_timer(
1958                 sd_event *e,
1959                 usec_t n,
1960                 struct clock_data *d) {
1961
1962         sd_event_source *s;
1963         int r;
1964
1965         assert(e);
1966         assert(d);
1967
1968         for (;;) {
1969                 s = prioq_peek(d->earliest);
1970                 if (!s ||
1971                     s->time.next > n ||
1972                     s->enabled == SD_EVENT_OFF ||
1973                     s->pending)
1974                         break;
1975
1976                 r = source_set_pending(s, true);
1977                 if (r < 0)
1978                         return r;
1979
1980                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1981                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1982                 d->needs_rearm = true;
1983         }
1984
1985         return 0;
1986 }
1987
1988 static int process_child(sd_event *e) {
1989         sd_event_source *s;
1990         Iterator i;
1991         int r;
1992
1993         assert(e);
1994
1995         e->need_process_child = false;
1996
1997         /*
1998            So, this is ugly. We iteratively invoke waitid() with P_PID
1999            + WNOHANG for each PID we wait for, instead of using
2000            P_ALL. This is because we only want to get child
2001            information of very specific child processes, and not all
2002            of them. We might not have processed the SIGCHLD even of a
2003            previous invocation and we don't want to maintain a
2004            unbounded *per-child* event queue, hence we really don't
2005            want anything flushed out of the kernel's queue that we
2006            don't care about. Since this is O(n) this means that if you
2007            have a lot of processes you probably want to handle SIGCHLD
2008            yourself.
2009
2010            We do not reap the children here (by using WNOWAIT), this
2011            is only done after the event source is dispatched so that
2012            the callback still sees the process as a zombie.
2013         */
2014
2015         HASHMAP_FOREACH(s, e->child_sources, i) {
2016                 assert(s->type == SOURCE_CHILD);
2017
2018                 if (s->pending)
2019                         continue;
2020
2021                 if (s->enabled == SD_EVENT_OFF)
2022                         continue;
2023
2024                 zero(s->child.siginfo);
2025                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2026                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2027                 if (r < 0)
2028                         return -errno;
2029
2030                 if (s->child.siginfo.si_pid != 0) {
2031                         bool zombie =
2032                                 s->child.siginfo.si_code == CLD_EXITED ||
2033                                 s->child.siginfo.si_code == CLD_KILLED ||
2034                                 s->child.siginfo.si_code == CLD_DUMPED;
2035
2036                         if (!zombie && (s->child.options & WEXITED)) {
2037                                 /* If the child isn't dead then let's
2038                                  * immediately remove the state change
2039                                  * from the queue, since there's no
2040                                  * benefit in leaving it queued */
2041
2042                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2043                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2044                         }
2045
2046                         r = source_set_pending(s, true);
2047                         if (r < 0)
2048                                 return r;
2049                 }
2050         }
2051
2052         return 0;
2053 }
2054
2055 static int process_signal(sd_event *e, uint32_t events) {
2056         bool read_one = false;
2057         int r;
2058
2059         assert(e);
2060
2061         assert_return(events == EPOLLIN, -EIO);
2062
2063         for (;;) {
2064                 struct signalfd_siginfo si;
2065                 ssize_t n;
2066                 sd_event_source *s = NULL;
2067
2068                 n = read(e->signal_fd, &si, sizeof(si));
2069                 if (n < 0) {
2070                         if (errno == EAGAIN || errno == EINTR)
2071                                 return read_one;
2072
2073                         return -errno;
2074                 }
2075
2076                 if (_unlikely_(n != sizeof(si)))
2077                         return -EIO;
2078
2079                 assert(si.ssi_signo < _NSIG);
2080
2081                 read_one = true;
2082
2083                 if (si.ssi_signo == SIGCHLD) {
2084                         r = process_child(e);
2085                         if (r < 0)
2086                                 return r;
2087                         if (r > 0)
2088                                 continue;
2089                 }
2090
2091                 if (e->signal_sources)
2092                         s = e->signal_sources[si.ssi_signo];
2093
2094                 if (!s)
2095                         continue;
2096
2097                 s->signal.siginfo = si;
2098                 r = source_set_pending(s, true);
2099                 if (r < 0)
2100                         return r;
2101         }
2102 }
2103
2104 static int source_dispatch(sd_event_source *s) {
2105         int r = 0;
2106
2107         assert(s);
2108         assert(s->pending || s->type == SOURCE_EXIT);
2109
2110         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2111                 r = source_set_pending(s, false);
2112                 if (r < 0)
2113                         return r;
2114         }
2115
2116         if (s->type != SOURCE_POST) {
2117                 sd_event_source *z;
2118                 Iterator i;
2119
2120                 /* If we execute a non-post source, let's mark all
2121                  * post sources as pending */
2122
2123                 SET_FOREACH(z, s->event->post_sources, i) {
2124                         if (z->enabled == SD_EVENT_OFF)
2125                                 continue;
2126
2127                         r = source_set_pending(z, true);
2128                         if (r < 0)
2129                                 return r;
2130                 }
2131         }
2132
2133         if (s->enabled == SD_EVENT_ONESHOT) {
2134                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2135                 if (r < 0)
2136                         return r;
2137         }
2138
2139         s->dispatching = true;
2140
2141         switch (s->type) {
2142
2143         case SOURCE_IO:
2144                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2145                 break;
2146
2147         case SOURCE_TIME_REALTIME:
2148         case SOURCE_TIME_BOOTTIME:
2149         case SOURCE_TIME_MONOTONIC:
2150         case SOURCE_TIME_REALTIME_ALARM:
2151         case SOURCE_TIME_BOOTTIME_ALARM:
2152                 r = s->time.callback(s, s->time.next, s->userdata);
2153                 break;
2154
2155         case SOURCE_SIGNAL:
2156                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2157                 break;
2158
2159         case SOURCE_CHILD: {
2160                 bool zombie;
2161
2162                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2163                          s->child.siginfo.si_code == CLD_KILLED ||
2164                          s->child.siginfo.si_code == CLD_DUMPED;
2165
2166                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2167
2168                 /* Now, reap the PID for good. */
2169                 if (zombie)
2170                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2171
2172                 break;
2173         }
2174
2175         case SOURCE_DEFER:
2176                 r = s->defer.callback(s, s->userdata);
2177                 break;
2178
2179         case SOURCE_POST:
2180                 r = s->post.callback(s, s->userdata);
2181                 break;
2182
2183         case SOURCE_EXIT:
2184                 r = s->exit.callback(s, s->userdata);
2185                 break;
2186
2187         case SOURCE_WATCHDOG:
2188         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2189         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2190                 assert_not_reached("Wut? I shouldn't exist.");
2191         }
2192
2193         s->dispatching = false;
2194
2195         if (r < 0) {
2196                 if (s->description)
2197                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2198                 else
2199                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2200         }
2201
2202         if (s->n_ref == 0)
2203                 source_free(s);
2204         else if (r < 0)
2205                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2206
2207         return 1;
2208 }
2209
2210 static int event_prepare(sd_event *e) {
2211         int r;
2212
2213         assert(e);
2214
2215         for (;;) {
2216                 sd_event_source *s;
2217
2218                 s = prioq_peek(e->prepare);
2219                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2220                         break;
2221
2222                 s->prepare_iteration = e->iteration;
2223                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2224                 if (r < 0)
2225                         return r;
2226
2227                 assert(s->prepare);
2228
2229                 s->dispatching = true;
2230                 r = s->prepare(s, s->userdata);
2231                 s->dispatching = false;
2232
2233                 if (r < 0) {
2234                         if (s->description)
2235                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2236                         else
2237                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2238                 }
2239
2240                 if (s->n_ref == 0)
2241                         source_free(s);
2242                 else if (r < 0)
2243                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2244         }
2245
2246         return 0;
2247 }
2248
2249 static int dispatch_exit(sd_event *e) {
2250         sd_event_source *p;
2251         int r;
2252
2253         assert(e);
2254
2255         p = prioq_peek(e->exit);
2256         if (!p || p->enabled == SD_EVENT_OFF) {
2257                 e->state = SD_EVENT_FINISHED;
2258                 return 0;
2259         }
2260
2261         sd_event_ref(e);
2262         e->iteration++;
2263         e->state = SD_EVENT_EXITING;
2264
2265         r = source_dispatch(p);
2266
2267         e->state = SD_EVENT_INITIAL;
2268         sd_event_unref(e);
2269
2270         return r;
2271 }
2272
2273 static sd_event_source* event_next_pending(sd_event *e) {
2274         sd_event_source *p;
2275
2276         assert(e);
2277
2278         p = prioq_peek(e->pending);
2279         if (!p)
2280                 return NULL;
2281
2282         if (p->enabled == SD_EVENT_OFF)
2283                 return NULL;
2284
2285         return p;
2286 }
2287
2288 static int arm_watchdog(sd_event *e) {
2289         struct itimerspec its = {};
2290         usec_t t;
2291         int r;
2292
2293         assert(e);
2294         assert(e->watchdog_fd >= 0);
2295
2296         t = sleep_between(e,
2297                           e->watchdog_last + (e->watchdog_period / 2),
2298                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2299
2300         timespec_store(&its.it_value, t);
2301
2302         /* Make sure we never set the watchdog to 0, which tells the
2303          * kernel to disable it. */
2304         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2305                 its.it_value.tv_nsec = 1;
2306
2307         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2308         if (r < 0)
2309                 return -errno;
2310
2311         return 0;
2312 }
2313
2314 static int process_watchdog(sd_event *e) {
2315         assert(e);
2316
2317         if (!e->watchdog)
2318                 return 0;
2319
2320         /* Don't notify watchdog too often */
2321         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2322                 return 0;
2323
2324         sd_notify(false, "WATCHDOG=1");
2325         e->watchdog_last = e->timestamp.monotonic;
2326
2327         return arm_watchdog(e);
2328 }
2329
2330 _public_ int sd_event_prepare(sd_event *e) {
2331         int r;
2332
2333         assert_return(e, -EINVAL);
2334         assert_return(!event_pid_changed(e), -ECHILD);
2335         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2336         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2337
2338         if (e->exit_requested)
2339                 goto pending;
2340
2341         e->iteration++;
2342
2343         r = event_prepare(e);
2344         if (r < 0)
2345                 return r;
2346
2347         r = event_arm_timer(e, &e->realtime);
2348         if (r < 0)
2349                 return r;
2350
2351         r = event_arm_timer(e, &e->boottime);
2352         if (r < 0)
2353                 return r;
2354
2355         r = event_arm_timer(e, &e->monotonic);
2356         if (r < 0)
2357                 return r;
2358
2359         r = event_arm_timer(e, &e->realtime_alarm);
2360         if (r < 0)
2361                 return r;
2362
2363         r = event_arm_timer(e, &e->boottime_alarm);
2364         if (r < 0)
2365                 return r;
2366
2367         if (event_next_pending(e) || e->need_process_child)
2368                 goto pending;
2369
2370         e->state = SD_EVENT_ARMED;
2371
2372         return 0;
2373
2374 pending:
2375         e->state = SD_EVENT_ARMED;
2376         r = sd_event_wait(e, 0);
2377         if (r == 0)
2378                 e->state = SD_EVENT_ARMED;
2379
2380         return r;
2381 }
2382
2383 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2384         struct epoll_event *ev_queue;
2385         unsigned ev_queue_max;
2386         int r, m, i;
2387
2388         assert_return(e, -EINVAL);
2389         assert_return(!event_pid_changed(e), -ECHILD);
2390         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2391         assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2392
2393         if (e->exit_requested) {
2394                 e->state = SD_EVENT_PENDING;
2395                 return 1;
2396         }
2397
2398         ev_queue_max = MAX(e->n_sources, 1u);
2399         ev_queue = newa(struct epoll_event, ev_queue_max);
2400
2401         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2402                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2403         if (m < 0) {
2404                 if (errno == EINTR) {
2405                         e->state = SD_EVENT_PENDING;
2406                         return 1;
2407                 }
2408
2409                 r = -errno;
2410                 goto finish;
2411         }
2412
2413         dual_timestamp_get(&e->timestamp);
2414         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2415
2416         for (i = 0; i < m; i++) {
2417
2418                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2419                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2420                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2421                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2422                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2423                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2424                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2425                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2426                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2427                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2428                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2429                         r = process_signal(e, ev_queue[i].events);
2430                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2431                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2432                 else
2433                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2434
2435                 if (r < 0)
2436                         goto finish;
2437         }
2438
2439         r = process_watchdog(e);
2440         if (r < 0)
2441                 goto finish;
2442
2443         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2444         if (r < 0)
2445                 goto finish;
2446
2447         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2448         if (r < 0)
2449                 goto finish;
2450
2451         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2452         if (r < 0)
2453                 goto finish;
2454
2455         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2456         if (r < 0)
2457                 goto finish;
2458
2459         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2460         if (r < 0)
2461                 goto finish;
2462
2463         if (e->need_process_child) {
2464                 r = process_child(e);
2465                 if (r < 0)
2466                         goto finish;
2467         }
2468
2469         if (event_next_pending(e)) {
2470                 e->state = SD_EVENT_PENDING;
2471
2472                 return 1;
2473         }
2474
2475         r = 0;
2476
2477 finish:
2478         e->state = SD_EVENT_INITIAL;
2479
2480         return r;
2481 }
2482
2483 _public_ int sd_event_dispatch(sd_event *e) {
2484         sd_event_source *p;
2485         int r;
2486
2487         assert_return(e, -EINVAL);
2488         assert_return(!event_pid_changed(e), -ECHILD);
2489         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2490         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2491
2492         if (e->exit_requested)
2493                 return dispatch_exit(e);
2494
2495         p = event_next_pending(e);
2496         if (p) {
2497                 sd_event_ref(e);
2498
2499                 e->state = SD_EVENT_RUNNING;
2500                 r = source_dispatch(p);
2501                 e->state = SD_EVENT_INITIAL;
2502
2503                 sd_event_unref(e);
2504
2505                 return r;
2506         }
2507
2508         e->state = SD_EVENT_INITIAL;
2509
2510         return 1;
2511 }
2512
2513 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2514         int r;
2515
2516         assert_return(e, -EINVAL);
2517         assert_return(!event_pid_changed(e), -ECHILD);
2518         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2519         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2520
2521         r = sd_event_prepare(e);
2522         if (r == 0)
2523                 /* There was nothing? Then wait... */
2524                 r = sd_event_wait(e, timeout);
2525
2526         if (r > 0) {
2527                 /* There's something now, then let's dispatch it */
2528                 r = sd_event_dispatch(e);
2529                 if (r < 0)
2530                         return r;
2531
2532                 return 1;
2533         }
2534
2535         return r;
2536 }
2537
2538 /// UNNEEDED by elogind
2539 #if 0
2540 _public_ int sd_event_loop(sd_event *e) {
2541         int r;
2542
2543         assert_return(e, -EINVAL);
2544         assert_return(!event_pid_changed(e), -ECHILD);
2545         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2546
2547         sd_event_ref(e);
2548
2549         while (e->state != SD_EVENT_FINISHED) {
2550                 r = sd_event_run(e, (uint64_t) -1);
2551                 if (r < 0)
2552                         goto finish;
2553         }
2554
2555         r = e->exit_code;
2556
2557 finish:
2558         sd_event_unref(e);
2559         return r;
2560 }
2561
2562 _public_ int sd_event_get_fd(sd_event *e) {
2563
2564         assert_return(e, -EINVAL);
2565         assert_return(!event_pid_changed(e), -ECHILD);
2566
2567         return e->epoll_fd;
2568 }
2569 #endif // 0
2570
2571 _public_ int sd_event_get_state(sd_event *e) {
2572         assert_return(e, -EINVAL);
2573         assert_return(!event_pid_changed(e), -ECHILD);
2574
2575         return e->state;
2576 }
2577
2578 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2579         assert_return(e, -EINVAL);
2580         assert_return(code, -EINVAL);
2581         assert_return(!event_pid_changed(e), -ECHILD);
2582
2583         if (!e->exit_requested)
2584                 return -ENODATA;
2585
2586         *code = e->exit_code;
2587         return 0;
2588 }
2589
2590 _public_ int sd_event_exit(sd_event *e, int code) {
2591         assert_return(e, -EINVAL);
2592         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2593         assert_return(!event_pid_changed(e), -ECHILD);
2594
2595         e->exit_requested = true;
2596         e->exit_code = code;
2597
2598         return 0;
2599 }
2600
2601 /// UNNEEDED by elogind
2602 #if 0
2603 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2604         assert_return(e, -EINVAL);
2605         assert_return(usec, -EINVAL);
2606         assert_return(!event_pid_changed(e), -ECHILD);
2607
2608         if (!dual_timestamp_is_set(&e->timestamp)) {
2609                 /* Implicitly fall back to now() if we never ran
2610                  * before and thus have no cached time. */
2611                 *usec = now(clock);
2612                 return 1;
2613         }
2614
2615         switch (clock) {
2616
2617         case CLOCK_REALTIME:
2618         case CLOCK_REALTIME_ALARM:
2619                 *usec = e->timestamp.realtime;
2620                 break;
2621
2622         case CLOCK_MONOTONIC:
2623                 *usec = e->timestamp.monotonic;
2624                 break;
2625
2626         case CLOCK_BOOTTIME:
2627         case CLOCK_BOOTTIME_ALARM:
2628                 *usec = e->timestamp_boottime;
2629                 break;
2630         }
2631
2632         return 0;
2633 }
2634 #endif // 0
2635
2636 _public_ int sd_event_default(sd_event **ret) {
2637
2638         static thread_local sd_event *default_event = NULL;
2639         sd_event *e = NULL;
2640         int r;
2641
2642         if (!ret)
2643                 return !!default_event;
2644
2645         if (default_event) {
2646                 *ret = sd_event_ref(default_event);
2647                 return 0;
2648         }
2649
2650         r = sd_event_new(&e);
2651         if (r < 0)
2652                 return r;
2653
2654         e->default_event_ptr = &default_event;
2655         e->tid = gettid();
2656         default_event = e;
2657
2658         *ret = e;
2659         return 1;
2660 }
2661
2662 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2663         assert_return(e, -EINVAL);
2664         assert_return(tid, -EINVAL);
2665         assert_return(!event_pid_changed(e), -ECHILD);
2666
2667         if (e->tid != 0) {
2668                 *tid = e->tid;
2669                 return 0;
2670         }
2671
2672         return -ENXIO;
2673 }
2674
2675 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2676         int r;
2677
2678         assert_return(e, -EINVAL);
2679         assert_return(!event_pid_changed(e), -ECHILD);
2680
2681         if (e->watchdog == !!b)
2682                 return e->watchdog;
2683
2684         if (b) {
2685                 struct epoll_event ev = {};
2686
2687                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2688                 if (r <= 0)
2689                         return r;
2690
2691                 /* Issue first ping immediately */
2692                 sd_notify(false, "WATCHDOG=1");
2693                 e->watchdog_last = now(CLOCK_MONOTONIC);
2694
2695                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2696                 if (e->watchdog_fd < 0)
2697                         return -errno;
2698
2699                 r = arm_watchdog(e);
2700                 if (r < 0)
2701                         goto fail;
2702
2703                 ev.events = EPOLLIN;
2704                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2705
2706                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2707                 if (r < 0) {
2708                         r = -errno;
2709                         goto fail;
2710                 }
2711
2712         } else {
2713                 if (e->watchdog_fd >= 0) {
2714                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2715                         e->watchdog_fd = safe_close(e->watchdog_fd);
2716                 }
2717         }
2718
2719         e->watchdog = !!b;
2720         return e->watchdog;
2721
2722 fail:
2723         e->watchdog_fd = safe_close(e->watchdog_fd);
2724         return r;
2725 }
2726
2727 /// UNNEEDED by elogind
2728 #if 0
2729 _public_ int sd_event_get_watchdog(sd_event *e) {
2730         assert_return(e, -EINVAL);
2731         assert_return(!event_pid_changed(e), -ECHILD);
2732
2733         return e->watchdog;
2734 }
2735 #endif // 0