chiark / gitweb /
Prep v225: Applying various fixes and changes to src/libelogind/sd-event that got...
[elogind.git] / src / libelogind / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36 #include "signal-util.h"
37
38 #include "sd-event.h"
39
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_BOOTTIME,
46         SOURCE_TIME_MONOTONIC,
47         SOURCE_TIME_REALTIME_ALARM,
48         SOURCE_TIME_BOOTTIME_ALARM,
49         SOURCE_SIGNAL,
50         SOURCE_CHILD,
51         SOURCE_DEFER,
52         SOURCE_POST,
53         SOURCE_EXIT,
54         SOURCE_WATCHDOG,
55         _SOURCE_EVENT_SOURCE_TYPE_MAX,
56         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60
61 struct sd_event_source {
62         unsigned n_ref;
63
64         sd_event *event;
65         void *userdata;
66         sd_event_handler_t prepare;
67
68         char *description;
69
70         EventSourceType type:5;
71         int enabled:3;
72         bool pending:1;
73         bool dispatching:1;
74         bool floating:1;
75
76         int64_t priority;
77         unsigned pending_index;
78         unsigned prepare_index;
79         unsigned pending_iteration;
80         unsigned prepare_iteration;
81
82         LIST_FIELDS(sd_event_source, sources);
83
84         union {
85                 struct {
86                         sd_event_io_handler_t callback;
87                         int fd;
88                         uint32_t events;
89                         uint32_t revents;
90                         bool registered:1;
91                 } io;
92                 struct {
93                         sd_event_time_handler_t callback;
94                         usec_t next, accuracy;
95                         unsigned earliest_index;
96                         unsigned latest_index;
97                 } time;
98                 struct {
99                         sd_event_signal_handler_t callback;
100                         struct signalfd_siginfo siginfo;
101                         int sig;
102                 } signal;
103                 struct {
104                         sd_event_child_handler_t callback;
105                         siginfo_t siginfo;
106                         pid_t pid;
107                         int options;
108                 } child;
109                 struct {
110                         sd_event_handler_t callback;
111                 } defer;
112                 struct {
113                         sd_event_handler_t callback;
114                 } post;
115                 struct {
116                         sd_event_handler_t callback;
117                         unsigned prioq_index;
118                 } exit;
119         };
120 };
121
122 struct clock_data {
123         int fd;
124
125         /* For all clocks we maintain two priority queues each, one
126          * ordered for the earliest times the events may be
127          * dispatched, and one ordered by the latest times they must
128          * have been dispatched. The range between the top entries in
129          * the two prioqs is the time window we can freely schedule
130          * wakeups in */
131
132         Prioq *earliest;
133         Prioq *latest;
134         usec_t next;
135
136         bool needs_rearm:1;
137 };
138
139 struct sd_event {
140         unsigned n_ref;
141
142         int epoll_fd;
143         int signal_fd;
144         int watchdog_fd;
145
146         Prioq *pending;
147         Prioq *prepare;
148
149         /* timerfd_create() only supports these five clocks so far. We
150          * can add support for more clocks when the kernel learns to
151          * deal with them, too. */
152         struct clock_data realtime;
153         struct clock_data boottime;
154         struct clock_data monotonic;
155         struct clock_data realtime_alarm;
156         struct clock_data boottime_alarm;
157
158         usec_t perturb;
159
160         sigset_t sigset;
161         sd_event_source **signal_sources;
162
163         Hashmap *child_sources;
164         unsigned n_enabled_child_sources;
165
166         Set *post_sources;
167
168         Prioq *exit;
169
170         pid_t original_pid;
171
172         unsigned iteration;
173         dual_timestamp timestamp;
174         usec_t timestamp_boottime;
175         int state;
176
177         bool exit_requested:1;
178         bool need_process_child:1;
179         bool watchdog:1;
180
181         int exit_code;
182
183         pid_t tid;
184         sd_event **default_event_ptr;
185
186         usec_t watchdog_last, watchdog_period;
187
188         unsigned n_sources;
189
190         LIST_HEAD(sd_event_source, sources);
191 };
192
193 static void source_disconnect(sd_event_source *s);
194
195 static int pending_prioq_compare(const void *a, const void *b) {
196         const sd_event_source *x = a, *y = b;
197
198         assert(x->pending);
199         assert(y->pending);
200
201         /* Enabled ones first */
202         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203                 return -1;
204         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205                 return 1;
206
207         /* Lower priority values first */
208         if (x->priority < y->priority)
209                 return -1;
210         if (x->priority > y->priority)
211                 return 1;
212
213         /* Older entries first */
214         if (x->pending_iteration < y->pending_iteration)
215                 return -1;
216         if (x->pending_iteration > y->pending_iteration)
217                 return 1;
218
219         /* Stability for the rest */
220         if (x < y)
221                 return -1;
222         if (x > y)
223                 return 1;
224
225         return 0;
226 }
227
228 static int prepare_prioq_compare(const void *a, const void *b) {
229         const sd_event_source *x = a, *y = b;
230
231         assert(x->prepare);
232         assert(y->prepare);
233
234         /* Move most recently prepared ones last, so that we can stop
235          * preparing as soon as we hit one that has already been
236          * prepared in the current iteration */
237         if (x->prepare_iteration < y->prepare_iteration)
238                 return -1;
239         if (x->prepare_iteration > y->prepare_iteration)
240                 return 1;
241
242         /* Enabled ones first */
243         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
244                 return -1;
245         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
246                 return 1;
247
248         /* Lower priority values first */
249         if (x->priority < y->priority)
250                 return -1;
251         if (x->priority > y->priority)
252                 return 1;
253
254         /* Stability for the rest */
255         if (x < y)
256                 return -1;
257         if (x > y)
258                 return 1;
259
260         return 0;
261 }
262
263 static int earliest_time_prioq_compare(const void *a, const void *b) {
264         const sd_event_source *x = a, *y = b;
265
266         assert(EVENT_SOURCE_IS_TIME(x->type));
267         assert(x->type == y->type);
268
269         /* Enabled ones first */
270         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
271                 return -1;
272         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
273                 return 1;
274
275         /* Move the pending ones to the end */
276         if (!x->pending && y->pending)
277                 return -1;
278         if (x->pending && !y->pending)
279                 return 1;
280
281         /* Order by time */
282         if (x->time.next < y->time.next)
283                 return -1;
284         if (x->time.next > y->time.next)
285                 return 1;
286
287         /* Stability for the rest */
288         if (x < y)
289                 return -1;
290         if (x > y)
291                 return 1;
292
293         return 0;
294 }
295
296 static int latest_time_prioq_compare(const void *a, const void *b) {
297         const sd_event_source *x = a, *y = b;
298
299         assert(EVENT_SOURCE_IS_TIME(x->type));
300         assert(x->type == y->type);
301
302         /* Enabled ones first */
303         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304                 return -1;
305         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
306                 return 1;
307
308         /* Move the pending ones to the end */
309         if (!x->pending && y->pending)
310                 return -1;
311         if (x->pending && !y->pending)
312                 return 1;
313
314         /* Order by time */
315         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
316                 return -1;
317         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
318                 return 1;
319
320         /* Stability for the rest */
321         if (x < y)
322                 return -1;
323         if (x > y)
324                 return 1;
325
326         return 0;
327 }
328
329 static int exit_prioq_compare(const void *a, const void *b) {
330         const sd_event_source *x = a, *y = b;
331
332         assert(x->type == SOURCE_EXIT);
333         assert(y->type == SOURCE_EXIT);
334
335         /* Enabled ones first */
336         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
337                 return -1;
338         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
339                 return 1;
340
341         /* Lower priority values first */
342         if (x->priority < y->priority)
343                 return -1;
344         if (x->priority > y->priority)
345                 return 1;
346
347         /* Stability for the rest */
348         if (x < y)
349                 return -1;
350         if (x > y)
351                 return 1;
352
353         return 0;
354 }
355
356 static void free_clock_data(struct clock_data *d) {
357         assert(d);
358
359         safe_close(d->fd);
360         prioq_free(d->earliest);
361         prioq_free(d->latest);
362 }
363
364 static void event_free(sd_event *e) {
365         sd_event_source *s;
366
367         assert(e);
368
369         while ((s = e->sources)) {
370                 assert(s->floating);
371                 source_disconnect(s);
372                 sd_event_source_unref(s);
373         }
374
375         assert(e->n_sources == 0);
376
377         if (e->default_event_ptr)
378                 *(e->default_event_ptr) = NULL;
379
380         safe_close(e->epoll_fd);
381         safe_close(e->signal_fd);
382         safe_close(e->watchdog_fd);
383
384         free_clock_data(&e->realtime);
385         free_clock_data(&e->boottime);
386         free_clock_data(&e->monotonic);
387         free_clock_data(&e->realtime_alarm);
388         free_clock_data(&e->boottime_alarm);
389
390         prioq_free(e->pending);
391         prioq_free(e->prepare);
392         prioq_free(e->exit);
393
394         free(e->signal_sources);
395
396         hashmap_free(e->child_sources);
397         set_free(e->post_sources);
398         free(e);
399 }
400
401 _public_ int sd_event_new(sd_event** ret) {
402         sd_event *e;
403         int r;
404
405         assert_return(ret, -EINVAL);
406
407         e = new0(sd_event, 1);
408         if (!e)
409                 return -ENOMEM;
410
411         e->n_ref = 1;
412         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
413         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
414         e->original_pid = getpid();
415         e->perturb = USEC_INFINITY;
416
417         assert_se(sigemptyset(&e->sigset) == 0);
418
419         e->pending = prioq_new(pending_prioq_compare);
420         if (!e->pending) {
421                 r = -ENOMEM;
422                 goto fail;
423         }
424
425         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
426         if (e->epoll_fd < 0) {
427                 r = -errno;
428                 goto fail;
429         }
430
431         *ret = e;
432         return 0;
433
434 fail:
435         event_free(e);
436         return r;
437 }
438
439 _public_ sd_event* sd_event_ref(sd_event *e) {
440         assert_return(e, NULL);
441
442         assert(e->n_ref >= 1);
443         e->n_ref++;
444
445         return e;
446 }
447
448 _public_ sd_event* sd_event_unref(sd_event *e) {
449
450         if (!e)
451                 return NULL;
452
453         assert(e->n_ref >= 1);
454         e->n_ref--;
455
456         if (e->n_ref <= 0)
457                 event_free(e);
458
459         return NULL;
460 }
461
462 static bool event_pid_changed(sd_event *e) {
463         assert(e);
464
465         /* We don't support people creating an event loop and keeping
466          * it around over a fork(). Let's complain. */
467
468         return e->original_pid != getpid();
469 }
470
471 static void source_io_unregister(sd_event_source *s) {
472         int r;
473
474         assert(s);
475         assert(s->type == SOURCE_IO);
476
477         if (event_pid_changed(s->event))
478                 return;
479
480         if (!s->io.registered)
481                 return;
482
483         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
484         assert_log(r >= 0);
485
486         s->io.registered = false;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 /// UNNEEDED by elogind
521 #if 0
522 static clockid_t event_source_type_to_clock(EventSourceType t) {
523
524         switch (t) {
525
526         case SOURCE_TIME_REALTIME:
527                 return CLOCK_REALTIME;
528
529         case SOURCE_TIME_BOOTTIME:
530                 return CLOCK_BOOTTIME;
531
532         case SOURCE_TIME_MONOTONIC:
533                 return CLOCK_MONOTONIC;
534
535         case SOURCE_TIME_REALTIME_ALARM:
536                 return CLOCK_REALTIME_ALARM;
537
538         case SOURCE_TIME_BOOTTIME_ALARM:
539                 return CLOCK_BOOTTIME_ALARM;
540
541         default:
542                 return (clockid_t) -1;
543         }
544 }
545 #endif // 0
546
547 static EventSourceType clock_to_event_source_type(clockid_t clock) {
548
549         switch (clock) {
550
551         case CLOCK_REALTIME:
552                 return SOURCE_TIME_REALTIME;
553
554         case CLOCK_BOOTTIME:
555                 return SOURCE_TIME_BOOTTIME;
556
557         case CLOCK_MONOTONIC:
558                 return SOURCE_TIME_MONOTONIC;
559
560         case CLOCK_REALTIME_ALARM:
561                 return SOURCE_TIME_REALTIME_ALARM;
562
563         case CLOCK_BOOTTIME_ALARM:
564                 return SOURCE_TIME_BOOTTIME_ALARM;
565
566         default:
567                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
568         }
569 }
570
571 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
572         assert(e);
573
574         switch (t) {
575
576         case SOURCE_TIME_REALTIME:
577                 return &e->realtime;
578
579         case SOURCE_TIME_BOOTTIME:
580                 return &e->boottime;
581
582         case SOURCE_TIME_MONOTONIC:
583                 return &e->monotonic;
584
585         case SOURCE_TIME_REALTIME_ALARM:
586                 return &e->realtime_alarm;
587
588         case SOURCE_TIME_BOOTTIME_ALARM:
589                 return &e->boottime_alarm;
590
591         default:
592                 return NULL;
593         }
594 }
595
596 static bool need_signal(sd_event *e, int signal) {
597         return (e->signal_sources && e->signal_sources[signal] &&
598                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
599                 ||
600                (signal == SIGCHLD &&
601                 e->n_enabled_child_sources > 0);
602 }
603
604 static int event_update_signal_fd(sd_event *e) {
605         struct epoll_event ev = {};
606         bool add_to_epoll;
607         int r;
608
609         assert(e);
610
611         if (event_pid_changed(e))
612                 return 0;
613
614         add_to_epoll = e->signal_fd < 0;
615
616         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
617         if (r < 0)
618                 return -errno;
619
620         e->signal_fd = r;
621
622         if (!add_to_epoll)
623                 return 0;
624
625         ev.events = EPOLLIN;
626         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
627
628         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
629         if (r < 0) {
630                 e->signal_fd = safe_close(e->signal_fd);
631                 return -errno;
632         }
633
634         return 0;
635 }
636
637 static void source_disconnect(sd_event_source *s) {
638         sd_event *event;
639
640         assert(s);
641
642         if (!s->event)
643                 return;
644
645         assert(s->event->n_sources > 0);
646
647         switch (s->type) {
648
649         case SOURCE_IO:
650                 if (s->io.fd >= 0)
651                         source_io_unregister(s);
652
653                 break;
654
655         case SOURCE_TIME_REALTIME:
656         case SOURCE_TIME_BOOTTIME:
657         case SOURCE_TIME_MONOTONIC:
658         case SOURCE_TIME_REALTIME_ALARM:
659         case SOURCE_TIME_BOOTTIME_ALARM: {
660                 struct clock_data *d;
661
662                 d = event_get_clock_data(s->event, s->type);
663                 assert(d);
664
665                 prioq_remove(d->earliest, s, &s->time.earliest_index);
666                 prioq_remove(d->latest, s, &s->time.latest_index);
667                 d->needs_rearm = true;
668                 break;
669         }
670
671         case SOURCE_SIGNAL:
672                 if (s->signal.sig > 0) {
673                         if (s->event->signal_sources)
674                                 s->event->signal_sources[s->signal.sig] = NULL;
675
676                         /* If the signal was on and now it is off... */
677                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
678                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
679
680                                 (void) event_update_signal_fd(s->event);
681                                 /* If disabling failed, we might get a spurious event,
682                                  * but otherwise nothing bad should happen. */
683                         }
684                 }
685
686                 break;
687
688         case SOURCE_CHILD:
689                 if (s->child.pid > 0) {
690                         if (s->enabled != SD_EVENT_OFF) {
691                                 assert(s->event->n_enabled_child_sources > 0);
692                                 s->event->n_enabled_child_sources--;
693
694                                 /* We know the signal was on, if it is off now... */
695                                 if (!need_signal(s->event, SIGCHLD)) {
696                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
697
698                                         (void) event_update_signal_fd(s->event);
699                                         /* If disabling failed, we might get a spurious event,
700                                          * but otherwise nothing bad should happen. */
701                                 }
702                         }
703
704                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
705                 }
706
707                 break;
708
709         case SOURCE_DEFER:
710                 /* nothing */
711                 break;
712
713         case SOURCE_POST:
714                 set_remove(s->event->post_sources, s);
715                 break;
716
717         case SOURCE_EXIT:
718                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
719                 break;
720
721         default:
722                 assert_not_reached("Wut? I shouldn't exist.");
723         }
724
725         if (s->pending)
726                 prioq_remove(s->event->pending, s, &s->pending_index);
727
728         if (s->prepare)
729                 prioq_remove(s->event->prepare, s, &s->prepare_index);
730
731         event = s->event;
732
733         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
734         s->event = NULL;
735         LIST_REMOVE(sources, event->sources, s);
736         event->n_sources--;
737
738         if (!s->floating)
739                 sd_event_unref(event);
740 }
741
742 static void source_free(sd_event_source *s) {
743         assert(s);
744
745         source_disconnect(s);
746         free(s->description);
747         free(s);
748 }
749
750 static int source_set_pending(sd_event_source *s, bool b) {
751         int r;
752
753         assert(s);
754         assert(s->type != SOURCE_EXIT);
755
756         if (s->pending == b)
757                 return 0;
758
759         s->pending = b;
760
761         if (b) {
762                 s->pending_iteration = s->event->iteration;
763
764                 r = prioq_put(s->event->pending, s, &s->pending_index);
765                 if (r < 0) {
766                         s->pending = false;
767                         return r;
768                 }
769         } else
770                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
771
772         if (EVENT_SOURCE_IS_TIME(s->type)) {
773                 struct clock_data *d;
774
775                 d = event_get_clock_data(s->event, s->type);
776                 assert(d);
777
778                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
779                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
780                 d->needs_rearm = true;
781         }
782
783         return 0;
784 }
785
786 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
787         sd_event_source *s;
788
789         assert(e);
790
791         s = new0(sd_event_source, 1);
792         if (!s)
793                 return NULL;
794
795         s->n_ref = 1;
796         s->event = e;
797         s->floating = floating;
798         s->type = type;
799         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
800
801         if (!floating)
802                 sd_event_ref(e);
803
804         LIST_PREPEND(sources, e->sources, s);
805         e->n_sources ++;
806
807         return s;
808 }
809
810 _public_ int sd_event_add_io(
811                 sd_event *e,
812                 sd_event_source **ret,
813                 int fd,
814                 uint32_t events,
815                 sd_event_io_handler_t callback,
816                 void *userdata) {
817
818         sd_event_source *s;
819         int r;
820
821         assert_return(e, -EINVAL);
822         assert_return(fd >= 0, -EBADF);
823         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
824         assert_return(callback, -EINVAL);
825         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
826         assert_return(!event_pid_changed(e), -ECHILD);
827
828         s = source_new(e, !ret, SOURCE_IO);
829         if (!s)
830                 return -ENOMEM;
831
832         s->io.fd = fd;
833         s->io.events = events;
834         s->io.callback = callback;
835         s->userdata = userdata;
836         s->enabled = SD_EVENT_ON;
837
838         r = source_io_register(s, s->enabled, events);
839         if (r < 0) {
840                 source_free(s);
841                 return r;
842         }
843
844         if (ret)
845                 *ret = s;
846
847         return 0;
848 }
849
850 static void initialize_perturb(sd_event *e) {
851         sd_id128_t bootid = {};
852
853         /* When we sleep for longer, we try to realign the wakeup to
854            the same time wihtin each minute/second/250ms, so that
855            events all across the system can be coalesced into a single
856            CPU wakeup. However, let's take some system-specific
857            randomness for this value, so that in a network of systems
858            with synced clocks timer events are distributed a
859            bit. Here, we calculate a perturbation usec offset from the
860            boot ID. */
861
862         if (_likely_(e->perturb != USEC_INFINITY))
863                 return;
864
865         if (sd_id128_get_boot(&bootid) >= 0)
866                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
867 }
868
869 static int event_setup_timer_fd(
870                 sd_event *e,
871                 struct clock_data *d,
872                 clockid_t clock) {
873
874         struct epoll_event ev = {};
875         int r, fd;
876
877         assert(e);
878         assert(d);
879
880         if (_likely_(d->fd >= 0))
881                 return 0;
882
883         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
884         if (fd < 0)
885                 return -errno;
886
887         ev.events = EPOLLIN;
888         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
889
890         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
891         if (r < 0) {
892                 safe_close(fd);
893                 return -errno;
894         }
895
896         d->fd = fd;
897         return 0;
898 }
899
900 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
901         assert(s);
902
903         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
904 }
905
906 _public_ int sd_event_add_time(
907                 sd_event *e,
908                 sd_event_source **ret,
909                 clockid_t clock,
910                 uint64_t usec,
911                 uint64_t accuracy,
912                 sd_event_time_handler_t callback,
913                 void *userdata) {
914
915         EventSourceType type;
916         sd_event_source *s;
917         struct clock_data *d;
918         int r;
919
920         assert_return(e, -EINVAL);
921         assert_return(usec != (uint64_t) -1, -EINVAL);
922         assert_return(accuracy != (uint64_t) -1, -EINVAL);
923         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
924         assert_return(!event_pid_changed(e), -ECHILD);
925
926         if (!callback)
927                 callback = time_exit_callback;
928
929         type = clock_to_event_source_type(clock);
930         assert_return(type >= 0, -EOPNOTSUPP);
931
932         d = event_get_clock_data(e, type);
933         assert(d);
934
935         if (!d->earliest) {
936                 d->earliest = prioq_new(earliest_time_prioq_compare);
937                 if (!d->earliest)
938                         return -ENOMEM;
939         }
940
941         if (!d->latest) {
942                 d->latest = prioq_new(latest_time_prioq_compare);
943                 if (!d->latest)
944                         return -ENOMEM;
945         }
946
947         if (d->fd < 0) {
948                 r = event_setup_timer_fd(e, d, clock);
949                 if (r < 0)
950                         return r;
951         }
952
953         s = source_new(e, !ret, type);
954         if (!s)
955                 return -ENOMEM;
956
957         s->time.next = usec;
958         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
959         s->time.callback = callback;
960         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
961         s->userdata = userdata;
962         s->enabled = SD_EVENT_ONESHOT;
963
964         d->needs_rearm = true;
965
966         r = prioq_put(d->earliest, s, &s->time.earliest_index);
967         if (r < 0)
968                 goto fail;
969
970         r = prioq_put(d->latest, s, &s->time.latest_index);
971         if (r < 0)
972                 goto fail;
973
974         if (ret)
975                 *ret = s;
976
977         return 0;
978
979 fail:
980         source_free(s);
981         return r;
982 }
983
984 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
985         assert(s);
986
987         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
988 }
989
990 _public_ int sd_event_add_signal(
991                 sd_event *e,
992                 sd_event_source **ret,
993                 int sig,
994                 sd_event_signal_handler_t callback,
995                 void *userdata) {
996
997         sd_event_source *s;
998         sigset_t ss;
999         int r;
1000         bool previous;
1001
1002         assert_return(e, -EINVAL);
1003         assert_return(sig > 0, -EINVAL);
1004         assert_return(sig < _NSIG, -EINVAL);
1005         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1006         assert_return(!event_pid_changed(e), -ECHILD);
1007
1008         if (!callback)
1009                 callback = signal_exit_callback;
1010
1011         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1012         if (r < 0)
1013                 return -errno;
1014
1015         if (!sigismember(&ss, sig))
1016                 return -EBUSY;
1017
1018         if (!e->signal_sources) {
1019                 e->signal_sources = new0(sd_event_source*, _NSIG);
1020                 if (!e->signal_sources)
1021                         return -ENOMEM;
1022         } else if (e->signal_sources[sig])
1023                 return -EBUSY;
1024
1025         previous = need_signal(e, sig);
1026
1027         s = source_new(e, !ret, SOURCE_SIGNAL);
1028         if (!s)
1029                 return -ENOMEM;
1030
1031         s->signal.sig = sig;
1032         s->signal.callback = callback;
1033         s->userdata = userdata;
1034         s->enabled = SD_EVENT_ON;
1035
1036         e->signal_sources[sig] = s;
1037
1038         if (!previous) {
1039                 assert_se(sigaddset(&e->sigset, sig) == 0);
1040
1041                 r = event_update_signal_fd(e);
1042                 if (r < 0) {
1043                         source_free(s);
1044                         return r;
1045                 }
1046         }
1047
1048         /* Use the signal name as description for the event source by default */
1049         (void) sd_event_source_set_description(s, signal_to_string(sig));
1050
1051         if (ret)
1052                 *ret = s;
1053
1054         return 0;
1055 }
1056
1057 _public_ int sd_event_add_child(
1058                 sd_event *e,
1059                 sd_event_source **ret,
1060                 pid_t pid,
1061                 int options,
1062                 sd_event_child_handler_t callback,
1063                 void *userdata) {
1064
1065         sd_event_source *s;
1066         int r;
1067         bool previous;
1068
1069         assert_return(e, -EINVAL);
1070         assert_return(pid > 1, -EINVAL);
1071         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1072         assert_return(options != 0, -EINVAL);
1073         assert_return(callback, -EINVAL);
1074         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1075         assert_return(!event_pid_changed(e), -ECHILD);
1076
1077         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1078         if (r < 0)
1079                 return r;
1080
1081         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1082                 return -EBUSY;
1083
1084         previous = need_signal(e, SIGCHLD);
1085
1086         s = source_new(e, !ret, SOURCE_CHILD);
1087         if (!s)
1088                 return -ENOMEM;
1089
1090         s->child.pid = pid;
1091         s->child.options = options;
1092         s->child.callback = callback;
1093         s->userdata = userdata;
1094         s->enabled = SD_EVENT_ONESHOT;
1095
1096         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1097         if (r < 0) {
1098                 source_free(s);
1099                 return r;
1100         }
1101
1102         e->n_enabled_child_sources ++;
1103
1104         if (!previous) {
1105                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1106
1107                 r = event_update_signal_fd(e);
1108                 if (r < 0) {
1109                         source_free(s);
1110                         return r;
1111                 }
1112         }
1113
1114         e->need_process_child = true;
1115
1116         if (ret)
1117                 *ret = s;
1118
1119         return 0;
1120 }
1121
1122 _public_ int sd_event_add_defer(
1123                 sd_event *e,
1124                 sd_event_source **ret,
1125                 sd_event_handler_t callback,
1126                 void *userdata) {
1127
1128         sd_event_source *s;
1129         int r;
1130
1131         assert_return(e, -EINVAL);
1132         assert_return(callback, -EINVAL);
1133         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1134         assert_return(!event_pid_changed(e), -ECHILD);
1135
1136         s = source_new(e, !ret, SOURCE_DEFER);
1137         if (!s)
1138                 return -ENOMEM;
1139
1140         s->defer.callback = callback;
1141         s->userdata = userdata;
1142         s->enabled = SD_EVENT_ONESHOT;
1143
1144         r = source_set_pending(s, true);
1145         if (r < 0) {
1146                 source_free(s);
1147                 return r;
1148         }
1149
1150         if (ret)
1151                 *ret = s;
1152
1153         return 0;
1154 }
1155
1156 _public_ int sd_event_add_post(
1157                 sd_event *e,
1158                 sd_event_source **ret,
1159                 sd_event_handler_t callback,
1160                 void *userdata) {
1161
1162         sd_event_source *s;
1163         int r;
1164
1165         assert_return(e, -EINVAL);
1166         assert_return(callback, -EINVAL);
1167         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1168         assert_return(!event_pid_changed(e), -ECHILD);
1169
1170         r = set_ensure_allocated(&e->post_sources, NULL);
1171         if (r < 0)
1172                 return r;
1173
1174         s = source_new(e, !ret, SOURCE_POST);
1175         if (!s)
1176                 return -ENOMEM;
1177
1178         s->post.callback = callback;
1179         s->userdata = userdata;
1180         s->enabled = SD_EVENT_ON;
1181
1182         r = set_put(e->post_sources, s);
1183         if (r < 0) {
1184                 source_free(s);
1185                 return r;
1186         }
1187
1188         if (ret)
1189                 *ret = s;
1190
1191         return 0;
1192 }
1193
1194 _public_ int sd_event_add_exit(
1195                 sd_event *e,
1196                 sd_event_source **ret,
1197                 sd_event_handler_t callback,
1198                 void *userdata) {
1199
1200         sd_event_source *s;
1201         int r;
1202
1203         assert_return(e, -EINVAL);
1204         assert_return(callback, -EINVAL);
1205         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1206         assert_return(!event_pid_changed(e), -ECHILD);
1207
1208         if (!e->exit) {
1209                 e->exit = prioq_new(exit_prioq_compare);
1210                 if (!e->exit)
1211                         return -ENOMEM;
1212         }
1213
1214         s = source_new(e, !ret, SOURCE_EXIT);
1215         if (!s)
1216                 return -ENOMEM;
1217
1218         s->exit.callback = callback;
1219         s->userdata = userdata;
1220         s->exit.prioq_index = PRIOQ_IDX_NULL;
1221         s->enabled = SD_EVENT_ONESHOT;
1222
1223         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1224         if (r < 0) {
1225                 source_free(s);
1226                 return r;
1227         }
1228
1229         if (ret)
1230                 *ret = s;
1231
1232         return 0;
1233 }
1234
1235 /// UNNEEDED by elogind
1236 #if 0
1237 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1238         assert_return(s, NULL);
1239
1240         assert(s->n_ref >= 1);
1241         s->n_ref++;
1242
1243         return s;
1244 }
1245 #endif // 0
1246
1247 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1248
1249         if (!s)
1250                 return NULL;
1251
1252         assert(s->n_ref >= 1);
1253         s->n_ref--;
1254
1255         if (s->n_ref <= 0) {
1256                 /* Here's a special hack: when we are called from a
1257                  * dispatch handler we won't free the event source
1258                  * immediately, but we will detach the fd from the
1259                  * epoll. This way it is safe for the caller to unref
1260                  * the event source and immediately close the fd, but
1261                  * we still retain a valid event source object after
1262                  * the callback. */
1263
1264                 if (s->dispatching) {
1265                         if (s->type == SOURCE_IO)
1266                                 source_io_unregister(s);
1267
1268                         source_disconnect(s);
1269                 } else
1270                         source_free(s);
1271         }
1272
1273         return NULL;
1274 }
1275
1276 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1277         assert_return(s, -EINVAL);
1278         assert_return(!event_pid_changed(s->event), -ECHILD);
1279
1280         return free_and_strdup(&s->description, description);
1281 }
1282
1283 /// UNNEEDED by elogind
1284 #if 0
1285 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1286         assert_return(s, -EINVAL);
1287         assert_return(description, -EINVAL);
1288         assert_return(s->description, -ENXIO);
1289         assert_return(!event_pid_changed(s->event), -ECHILD);
1290
1291         *description = s->description;
1292         return 0;
1293 }
1294 #endif // 0
1295
1296 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1297         assert_return(s, NULL);
1298
1299         return s->event;
1300 }
1301
1302 /// UNNEEDED by elogind
1303 #if 0
1304 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1305         assert_return(s, -EINVAL);
1306         assert_return(s->type != SOURCE_EXIT, -EDOM);
1307         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1308         assert_return(!event_pid_changed(s->event), -ECHILD);
1309
1310         return s->pending;
1311 }
1312
1313 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1314         assert_return(s, -EINVAL);
1315         assert_return(s->type == SOURCE_IO, -EDOM);
1316         assert_return(!event_pid_changed(s->event), -ECHILD);
1317
1318         return s->io.fd;
1319 }
1320 #endif // 0
1321
1322 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1323         int r;
1324
1325         assert_return(s, -EINVAL);
1326         assert_return(fd >= 0, -EBADF);
1327         assert_return(s->type == SOURCE_IO, -EDOM);
1328         assert_return(!event_pid_changed(s->event), -ECHILD);
1329
1330         if (s->io.fd == fd)
1331                 return 0;
1332
1333         if (s->enabled == SD_EVENT_OFF) {
1334                 s->io.fd = fd;
1335                 s->io.registered = false;
1336         } else {
1337                 int saved_fd;
1338
1339                 saved_fd = s->io.fd;
1340                 assert(s->io.registered);
1341
1342                 s->io.fd = fd;
1343                 s->io.registered = false;
1344
1345                 r = source_io_register(s, s->enabled, s->io.events);
1346                 if (r < 0) {
1347                         s->io.fd = saved_fd;
1348                         s->io.registered = true;
1349                         return r;
1350                 }
1351
1352                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1353         }
1354
1355         return 0;
1356 }
1357
1358 /// UNNEEDED by elogind
1359 #if 0
1360 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1361         assert_return(s, -EINVAL);
1362         assert_return(events, -EINVAL);
1363         assert_return(s->type == SOURCE_IO, -EDOM);
1364         assert_return(!event_pid_changed(s->event), -ECHILD);
1365
1366         *events = s->io.events;
1367         return 0;
1368 }
1369 #endif // 0
1370
1371 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1372         int r;
1373
1374         assert_return(s, -EINVAL);
1375         assert_return(s->type == SOURCE_IO, -EDOM);
1376         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1377         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1378         assert_return(!event_pid_changed(s->event), -ECHILD);
1379
1380         /* edge-triggered updates are never skipped, so we can reset edges */
1381         if (s->io.events == events && !(events & EPOLLET))
1382                 return 0;
1383
1384         if (s->enabled != SD_EVENT_OFF) {
1385                 r = source_io_register(s, s->enabled, events);
1386                 if (r < 0)
1387                         return r;
1388         }
1389
1390         s->io.events = events;
1391         source_set_pending(s, false);
1392
1393         return 0;
1394 }
1395
1396 /// UNNEEDED by elogind
1397 #if 0
1398 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1399         assert_return(s, -EINVAL);
1400         assert_return(revents, -EINVAL);
1401         assert_return(s->type == SOURCE_IO, -EDOM);
1402         assert_return(s->pending, -ENODATA);
1403         assert_return(!event_pid_changed(s->event), -ECHILD);
1404
1405         *revents = s->io.revents;
1406         return 0;
1407 }
1408
1409 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1410         assert_return(s, -EINVAL);
1411         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1412         assert_return(!event_pid_changed(s->event), -ECHILD);
1413
1414         return s->signal.sig;
1415 }
1416
1417 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1418         assert_return(s, -EINVAL);
1419         assert_return(!event_pid_changed(s->event), -ECHILD);
1420
1421         return s->priority;
1422 }
1423 #endif // 0
1424
1425 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1426         assert_return(s, -EINVAL);
1427         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1428         assert_return(!event_pid_changed(s->event), -ECHILD);
1429
1430         if (s->priority == priority)
1431                 return 0;
1432
1433         s->priority = priority;
1434
1435         if (s->pending)
1436                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1437
1438         if (s->prepare)
1439                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1440
1441         if (s->type == SOURCE_EXIT)
1442                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1443
1444         return 0;
1445 }
1446
1447 /// UNNEEDED by elogind
1448 #if 0
1449 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1450         assert_return(s, -EINVAL);
1451         assert_return(m, -EINVAL);
1452         assert_return(!event_pid_changed(s->event), -ECHILD);
1453
1454         *m = s->enabled;
1455         return 0;
1456 }
1457 #endif // 0
1458
1459 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1460         int r;
1461
1462         assert_return(s, -EINVAL);
1463         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1464         assert_return(!event_pid_changed(s->event), -ECHILD);
1465
1466         /* If we are dead anyway, we are fine with turning off
1467          * sources, but everything else needs to fail. */
1468         if (s->event->state == SD_EVENT_FINISHED)
1469                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1470
1471         if (s->enabled == m)
1472                 return 0;
1473
1474         if (m == SD_EVENT_OFF) {
1475
1476                 switch (s->type) {
1477
1478                 case SOURCE_IO:
1479                         source_io_unregister(s);
1480                         s->enabled = m;
1481                         break;
1482
1483                 case SOURCE_TIME_REALTIME:
1484                 case SOURCE_TIME_BOOTTIME:
1485                 case SOURCE_TIME_MONOTONIC:
1486                 case SOURCE_TIME_REALTIME_ALARM:
1487                 case SOURCE_TIME_BOOTTIME_ALARM: {
1488                         struct clock_data *d;
1489
1490                         s->enabled = m;
1491                         d = event_get_clock_data(s->event, s->type);
1492                         assert(d);
1493
1494                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1495                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1496                         d->needs_rearm = true;
1497                         break;
1498                 }
1499
1500                 case SOURCE_SIGNAL:
1501                         assert(need_signal(s->event, s->signal.sig));
1502
1503                         s->enabled = m;
1504
1505                         if (!need_signal(s->event, s->signal.sig)) {
1506                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1507
1508                                 (void) event_update_signal_fd(s->event);
1509                                 /* If disabling failed, we might get a spurious event,
1510                                  * but otherwise nothing bad should happen. */
1511                         }
1512
1513                         break;
1514
1515                 case SOURCE_CHILD:
1516                         assert(need_signal(s->event, SIGCHLD));
1517
1518                         s->enabled = m;
1519
1520                         assert(s->event->n_enabled_child_sources > 0);
1521                         s->event->n_enabled_child_sources--;
1522
1523                         if (!need_signal(s->event, SIGCHLD)) {
1524                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1525
1526                                 (void) event_update_signal_fd(s->event);
1527                         }
1528
1529                         break;
1530
1531                 case SOURCE_EXIT:
1532                         s->enabled = m;
1533                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1534                         break;
1535
1536                 case SOURCE_DEFER:
1537                 case SOURCE_POST:
1538                         s->enabled = m;
1539                         break;
1540
1541                 default:
1542                         assert_not_reached("Wut? I shouldn't exist.");
1543                 }
1544
1545         } else {
1546                 switch (s->type) {
1547
1548                 case SOURCE_IO:
1549                         r = source_io_register(s, m, s->io.events);
1550                         if (r < 0)
1551                                 return r;
1552
1553                         s->enabled = m;
1554                         break;
1555
1556                 case SOURCE_TIME_REALTIME:
1557                 case SOURCE_TIME_BOOTTIME:
1558                 case SOURCE_TIME_MONOTONIC:
1559                 case SOURCE_TIME_REALTIME_ALARM:
1560                 case SOURCE_TIME_BOOTTIME_ALARM: {
1561                         struct clock_data *d;
1562
1563                         s->enabled = m;
1564                         d = event_get_clock_data(s->event, s->type);
1565                         assert(d);
1566
1567                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1568                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1569                         d->needs_rearm = true;
1570                         break;
1571                 }
1572
1573                 case SOURCE_SIGNAL:
1574                         /* Check status before enabling. */
1575                         if (!need_signal(s->event, s->signal.sig)) {
1576                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1577
1578                                 r = event_update_signal_fd(s->event);
1579                                 if (r < 0) {
1580                                         s->enabled = SD_EVENT_OFF;
1581                                         return r;
1582                                 }
1583                         }
1584
1585                         s->enabled = m;
1586                         break;
1587
1588                 case SOURCE_CHILD:
1589                         /* Check status before enabling. */
1590                         if (s->enabled == SD_EVENT_OFF) {
1591                                 if (!need_signal(s->event, SIGCHLD)) {
1592                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1593
1594                                         r = event_update_signal_fd(s->event);
1595                                         if (r < 0) {
1596                                                 s->enabled = SD_EVENT_OFF;
1597                                                 return r;
1598                                         }
1599                                 }
1600
1601                                 s->event->n_enabled_child_sources++;
1602                         }
1603
1604                         s->enabled = m;
1605                         break;
1606
1607                 case SOURCE_EXIT:
1608                         s->enabled = m;
1609                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1610                         break;
1611
1612                 case SOURCE_DEFER:
1613                 case SOURCE_POST:
1614                         s->enabled = m;
1615                         break;
1616
1617                 default:
1618                         assert_not_reached("Wut? I shouldn't exist.");
1619                 }
1620         }
1621
1622         if (s->pending)
1623                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1624
1625         if (s->prepare)
1626                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1627
1628         return 0;
1629 }
1630
1631 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1632         assert_return(s, -EINVAL);
1633         assert_return(usec, -EINVAL);
1634         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1635         assert_return(!event_pid_changed(s->event), -ECHILD);
1636
1637         *usec = s->time.next;
1638         return 0;
1639 }
1640
1641 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1642         struct clock_data *d;
1643
1644         assert_return(s, -EINVAL);
1645         assert_return(usec != (uint64_t) -1, -EINVAL);
1646         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1647         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1648         assert_return(!event_pid_changed(s->event), -ECHILD);
1649
1650         s->time.next = usec;
1651
1652         source_set_pending(s, false);
1653
1654         d = event_get_clock_data(s->event, s->type);
1655         assert(d);
1656
1657         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1658         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1659         d->needs_rearm = true;
1660
1661         return 0;
1662 }
1663
1664 /// UNNEEDED by elogind
1665 #if 0
1666 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1667         assert_return(s, -EINVAL);
1668         assert_return(usec, -EINVAL);
1669         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1670         assert_return(!event_pid_changed(s->event), -ECHILD);
1671
1672         *usec = s->time.accuracy;
1673         return 0;
1674 }
1675
1676 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1677         struct clock_data *d;
1678
1679         assert_return(s, -EINVAL);
1680         assert_return(usec != (uint64_t) -1, -EINVAL);
1681         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1682         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1683         assert_return(!event_pid_changed(s->event), -ECHILD);
1684
1685         if (usec == 0)
1686                 usec = DEFAULT_ACCURACY_USEC;
1687
1688         s->time.accuracy = usec;
1689
1690         source_set_pending(s, false);
1691
1692         d = event_get_clock_data(s->event, s->type);
1693         assert(d);
1694
1695         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1696         d->needs_rearm = true;
1697
1698         return 0;
1699 }
1700
1701 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1702         assert_return(s, -EINVAL);
1703         assert_return(clock, -EINVAL);
1704         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1705         assert_return(!event_pid_changed(s->event), -ECHILD);
1706
1707         *clock = event_source_type_to_clock(s->type);
1708         return 0;
1709 }
1710
1711 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1712         assert_return(s, -EINVAL);
1713         assert_return(pid, -EINVAL);
1714         assert_return(s->type == SOURCE_CHILD, -EDOM);
1715         assert_return(!event_pid_changed(s->event), -ECHILD);
1716
1717         *pid = s->child.pid;
1718         return 0;
1719 }
1720 #endif // 0
1721
1722 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1723         int r;
1724
1725         assert_return(s, -EINVAL);
1726         assert_return(s->type != SOURCE_EXIT, -EDOM);
1727         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1728         assert_return(!event_pid_changed(s->event), -ECHILD);
1729
1730         if (s->prepare == callback)
1731                 return 0;
1732
1733         if (callback && s->prepare) {
1734                 s->prepare = callback;
1735                 return 0;
1736         }
1737
1738         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1739         if (r < 0)
1740                 return r;
1741
1742         s->prepare = callback;
1743
1744         if (callback) {
1745                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1746                 if (r < 0)
1747                         return r;
1748         } else
1749                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1750
1751         return 0;
1752 }
1753
1754 /// UNNEEDED by elogind
1755 #if 0
1756 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1757         assert_return(s, NULL);
1758
1759         return s->userdata;
1760 }
1761
1762 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1763         void *ret;
1764
1765         assert_return(s, NULL);
1766
1767         ret = s->userdata;
1768         s->userdata = userdata;
1769
1770         return ret;
1771 }
1772 #endif // 0
1773
1774 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1775         usec_t c;
1776         assert(e);
1777         assert(a <= b);
1778
1779         if (a <= 0)
1780                 return 0;
1781
1782         if (b <= a + 1)
1783                 return a;
1784
1785         initialize_perturb(e);
1786
1787         /*
1788           Find a good time to wake up again between times a and b. We
1789           have two goals here:
1790
1791           a) We want to wake up as seldom as possible, hence prefer
1792              later times over earlier times.
1793
1794           b) But if we have to wake up, then let's make sure to
1795              dispatch as much as possible on the entire system.
1796
1797           We implement this by waking up everywhere at the same time
1798           within any given minute if we can, synchronised via the
1799           perturbation value determined from the boot ID. If we can't,
1800           then we try to find the same spot in every 10s, then 1s and
1801           then 250ms step. Otherwise, we pick the last possible time
1802           to wake up.
1803         */
1804
1805         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1806         if (c >= b) {
1807                 if (_unlikely_(c < USEC_PER_MINUTE))
1808                         return b;
1809
1810                 c -= USEC_PER_MINUTE;
1811         }
1812
1813         if (c >= a)
1814                 return c;
1815
1816         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1817         if (c >= b) {
1818                 if (_unlikely_(c < USEC_PER_SEC*10))
1819                         return b;
1820
1821                 c -= USEC_PER_SEC*10;
1822         }
1823
1824         if (c >= a)
1825                 return c;
1826
1827         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1828         if (c >= b) {
1829                 if (_unlikely_(c < USEC_PER_SEC))
1830                         return b;
1831
1832                 c -= USEC_PER_SEC;
1833         }
1834
1835         if (c >= a)
1836                 return c;
1837
1838         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1839         if (c >= b) {
1840                 if (_unlikely_(c < USEC_PER_MSEC*250))
1841                         return b;
1842
1843                 c -= USEC_PER_MSEC*250;
1844         }
1845
1846         if (c >= a)
1847                 return c;
1848
1849         return b;
1850 }
1851
1852 static int event_arm_timer(
1853                 sd_event *e,
1854                 struct clock_data *d) {
1855
1856         struct itimerspec its = {};
1857         sd_event_source *a, *b;
1858         usec_t t;
1859         int r;
1860
1861         assert(e);
1862         assert(d);
1863
1864         if (!d->needs_rearm)
1865                 return 0;
1866         else
1867                 d->needs_rearm = false;
1868
1869         a = prioq_peek(d->earliest);
1870         if (!a || a->enabled == SD_EVENT_OFF) {
1871
1872                 if (d->fd < 0)
1873                         return 0;
1874
1875                 if (d->next == USEC_INFINITY)
1876                         return 0;
1877
1878                 /* disarm */
1879                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1880                 if (r < 0)
1881                         return r;
1882
1883                 d->next = USEC_INFINITY;
1884                 return 0;
1885         }
1886
1887         b = prioq_peek(d->latest);
1888         assert_se(b && b->enabled != SD_EVENT_OFF);
1889
1890         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1891         if (d->next == t)
1892                 return 0;
1893
1894         assert_se(d->fd >= 0);
1895
1896         if (t == 0) {
1897                 /* We don' want to disarm here, just mean some time looooong ago. */
1898                 its.it_value.tv_sec = 0;
1899                 its.it_value.tv_nsec = 1;
1900         } else
1901                 timespec_store(&its.it_value, t);
1902
1903         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1904         if (r < 0)
1905                 return -errno;
1906
1907         d->next = t;
1908         return 0;
1909 }
1910
1911 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1912         assert(e);
1913         assert(s);
1914         assert(s->type == SOURCE_IO);
1915
1916         /* If the event source was already pending, we just OR in the
1917          * new revents, otherwise we reset the value. The ORing is
1918          * necessary to handle EPOLLONESHOT events properly where
1919          * readability might happen independently of writability, and
1920          * we need to keep track of both */
1921
1922         if (s->pending)
1923                 s->io.revents |= revents;
1924         else
1925                 s->io.revents = revents;
1926
1927         return source_set_pending(s, true);
1928 }
1929
1930 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1931         uint64_t x;
1932         ssize_t ss;
1933
1934         assert(e);
1935         assert(fd >= 0);
1936
1937         assert_return(events == EPOLLIN, -EIO);
1938
1939         ss = read(fd, &x, sizeof(x));
1940         if (ss < 0) {
1941                 if (errno == EAGAIN || errno == EINTR)
1942                         return 0;
1943
1944                 return -errno;
1945         }
1946
1947         if (_unlikely_(ss != sizeof(x)))
1948                 return -EIO;
1949
1950         if (next)
1951                 *next = USEC_INFINITY;
1952
1953         return 0;
1954 }
1955
1956 static int process_timer(
1957                 sd_event *e,
1958                 usec_t n,
1959                 struct clock_data *d) {
1960
1961         sd_event_source *s;
1962         int r;
1963
1964         assert(e);
1965         assert(d);
1966
1967         for (;;) {
1968                 s = prioq_peek(d->earliest);
1969                 if (!s ||
1970                     s->time.next > n ||
1971                     s->enabled == SD_EVENT_OFF ||
1972                     s->pending)
1973                         break;
1974
1975                 r = source_set_pending(s, true);
1976                 if (r < 0)
1977                         return r;
1978
1979                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1980                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1981                 d->needs_rearm = true;
1982         }
1983
1984         return 0;
1985 }
1986
1987 static int process_child(sd_event *e) {
1988         sd_event_source *s;
1989         Iterator i;
1990         int r;
1991
1992         assert(e);
1993
1994         e->need_process_child = false;
1995
1996         /*
1997            So, this is ugly. We iteratively invoke waitid() with P_PID
1998            + WNOHANG for each PID we wait for, instead of using
1999            P_ALL. This is because we only want to get child
2000            information of very specific child processes, and not all
2001            of them. We might not have processed the SIGCHLD even of a
2002            previous invocation and we don't want to maintain a
2003            unbounded *per-child* event queue, hence we really don't
2004            want anything flushed out of the kernel's queue that we
2005            don't care about. Since this is O(n) this means that if you
2006            have a lot of processes you probably want to handle SIGCHLD
2007            yourself.
2008
2009            We do not reap the children here (by using WNOWAIT), this
2010            is only done after the event source is dispatched so that
2011            the callback still sees the process as a zombie.
2012         */
2013
2014         HASHMAP_FOREACH(s, e->child_sources, i) {
2015                 assert(s->type == SOURCE_CHILD);
2016
2017                 if (s->pending)
2018                         continue;
2019
2020                 if (s->enabled == SD_EVENT_OFF)
2021                         continue;
2022
2023                 zero(s->child.siginfo);
2024                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2025                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2026                 if (r < 0)
2027                         return -errno;
2028
2029                 if (s->child.siginfo.si_pid != 0) {
2030                         bool zombie =
2031                                 s->child.siginfo.si_code == CLD_EXITED ||
2032                                 s->child.siginfo.si_code == CLD_KILLED ||
2033                                 s->child.siginfo.si_code == CLD_DUMPED;
2034
2035                         if (!zombie && (s->child.options & WEXITED)) {
2036                                 /* If the child isn't dead then let's
2037                                  * immediately remove the state change
2038                                  * from the queue, since there's no
2039                                  * benefit in leaving it queued */
2040
2041                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2042                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2043                         }
2044
2045                         r = source_set_pending(s, true);
2046                         if (r < 0)
2047                                 return r;
2048                 }
2049         }
2050
2051         return 0;
2052 }
2053
2054 static int process_signal(sd_event *e, uint32_t events) {
2055         bool read_one = false;
2056         int r;
2057
2058         assert(e);
2059
2060         assert_return(events == EPOLLIN, -EIO);
2061
2062         for (;;) {
2063                 struct signalfd_siginfo si;
2064                 ssize_t n;
2065                 sd_event_source *s = NULL;
2066
2067                 n = read(e->signal_fd, &si, sizeof(si));
2068                 if (n < 0) {
2069                         if (errno == EAGAIN || errno == EINTR)
2070                                 return read_one;
2071
2072                         return -errno;
2073                 }
2074
2075                 if (_unlikely_(n != sizeof(si)))
2076                         return -EIO;
2077
2078                 assert(si.ssi_signo < _NSIG);
2079
2080                 read_one = true;
2081
2082                 if (si.ssi_signo == SIGCHLD) {
2083                         r = process_child(e);
2084                         if (r < 0)
2085                                 return r;
2086                         if (r > 0)
2087                                 continue;
2088                 }
2089
2090                 if (e->signal_sources)
2091                         s = e->signal_sources[si.ssi_signo];
2092
2093                 if (!s)
2094                         continue;
2095
2096                 s->signal.siginfo = si;
2097                 r = source_set_pending(s, true);
2098                 if (r < 0)
2099                         return r;
2100         }
2101 }
2102
2103 static int source_dispatch(sd_event_source *s) {
2104         int r = 0;
2105
2106         assert(s);
2107         assert(s->pending || s->type == SOURCE_EXIT);
2108
2109         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2110                 r = source_set_pending(s, false);
2111                 if (r < 0)
2112                         return r;
2113         }
2114
2115         if (s->type != SOURCE_POST) {
2116                 sd_event_source *z;
2117                 Iterator i;
2118
2119                 /* If we execute a non-post source, let's mark all
2120                  * post sources as pending */
2121
2122                 SET_FOREACH(z, s->event->post_sources, i) {
2123                         if (z->enabled == SD_EVENT_OFF)
2124                                 continue;
2125
2126                         r = source_set_pending(z, true);
2127                         if (r < 0)
2128                                 return r;
2129                 }
2130         }
2131
2132         if (s->enabled == SD_EVENT_ONESHOT) {
2133                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2134                 if (r < 0)
2135                         return r;
2136         }
2137
2138         s->dispatching = true;
2139
2140         switch (s->type) {
2141
2142         case SOURCE_IO:
2143                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2144                 break;
2145
2146         case SOURCE_TIME_REALTIME:
2147         case SOURCE_TIME_BOOTTIME:
2148         case SOURCE_TIME_MONOTONIC:
2149         case SOURCE_TIME_REALTIME_ALARM:
2150         case SOURCE_TIME_BOOTTIME_ALARM:
2151                 r = s->time.callback(s, s->time.next, s->userdata);
2152                 break;
2153
2154         case SOURCE_SIGNAL:
2155                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2156                 break;
2157
2158         case SOURCE_CHILD: {
2159                 bool zombie;
2160
2161                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2162                          s->child.siginfo.si_code == CLD_KILLED ||
2163                          s->child.siginfo.si_code == CLD_DUMPED;
2164
2165                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2166
2167                 /* Now, reap the PID for good. */
2168                 if (zombie)
2169                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2170
2171                 break;
2172         }
2173
2174         case SOURCE_DEFER:
2175                 r = s->defer.callback(s, s->userdata);
2176                 break;
2177
2178         case SOURCE_POST:
2179                 r = s->post.callback(s, s->userdata);
2180                 break;
2181
2182         case SOURCE_EXIT:
2183                 r = s->exit.callback(s, s->userdata);
2184                 break;
2185
2186         case SOURCE_WATCHDOG:
2187         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2188         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2189                 assert_not_reached("Wut? I shouldn't exist.");
2190         }
2191
2192         s->dispatching = false;
2193
2194         if (r < 0) {
2195                 if (s->description)
2196                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2197                 else
2198                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2199         }
2200
2201         if (s->n_ref == 0)
2202                 source_free(s);
2203         else if (r < 0)
2204                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2205
2206         return 1;
2207 }
2208
2209 static int event_prepare(sd_event *e) {
2210         int r;
2211
2212         assert(e);
2213
2214         for (;;) {
2215                 sd_event_source *s;
2216
2217                 s = prioq_peek(e->prepare);
2218                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2219                         break;
2220
2221                 s->prepare_iteration = e->iteration;
2222                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2223                 if (r < 0)
2224                         return r;
2225
2226                 assert(s->prepare);
2227
2228                 s->dispatching = true;
2229                 r = s->prepare(s, s->userdata);
2230                 s->dispatching = false;
2231
2232                 if (r < 0) {
2233                         if (s->description)
2234                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2235                         else
2236                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2237                 }
2238
2239                 if (s->n_ref == 0)
2240                         source_free(s);
2241                 else if (r < 0)
2242                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2243         }
2244
2245         return 0;
2246 }
2247
2248 static int dispatch_exit(sd_event *e) {
2249         sd_event_source *p;
2250         int r;
2251
2252         assert(e);
2253
2254         p = prioq_peek(e->exit);
2255         if (!p || p->enabled == SD_EVENT_OFF) {
2256                 e->state = SD_EVENT_FINISHED;
2257                 return 0;
2258         }
2259
2260         sd_event_ref(e);
2261         e->iteration++;
2262         e->state = SD_EVENT_EXITING;
2263
2264         r = source_dispatch(p);
2265
2266         e->state = SD_EVENT_INITIAL;
2267         sd_event_unref(e);
2268
2269         return r;
2270 }
2271
2272 static sd_event_source* event_next_pending(sd_event *e) {
2273         sd_event_source *p;
2274
2275         assert(e);
2276
2277         p = prioq_peek(e->pending);
2278         if (!p)
2279                 return NULL;
2280
2281         if (p->enabled == SD_EVENT_OFF)
2282                 return NULL;
2283
2284         return p;
2285 }
2286
2287 static int arm_watchdog(sd_event *e) {
2288         struct itimerspec its = {};
2289         usec_t t;
2290         int r;
2291
2292         assert(e);
2293         assert(e->watchdog_fd >= 0);
2294
2295         t = sleep_between(e,
2296                           e->watchdog_last + (e->watchdog_period / 2),
2297                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2298
2299         timespec_store(&its.it_value, t);
2300
2301         /* Make sure we never set the watchdog to 0, which tells the
2302          * kernel to disable it. */
2303         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2304                 its.it_value.tv_nsec = 1;
2305
2306         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2307         if (r < 0)
2308                 return -errno;
2309
2310         return 0;
2311 }
2312
2313 static int process_watchdog(sd_event *e) {
2314         assert(e);
2315
2316         if (!e->watchdog)
2317                 return 0;
2318
2319         /* Don't notify watchdog too often */
2320         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2321                 return 0;
2322
2323         sd_notify(false, "WATCHDOG=1");
2324         e->watchdog_last = e->timestamp.monotonic;
2325
2326         return arm_watchdog(e);
2327 }
2328
2329 _public_ int sd_event_prepare(sd_event *e) {
2330         int r;
2331
2332         assert_return(e, -EINVAL);
2333         assert_return(!event_pid_changed(e), -ECHILD);
2334         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2335         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2336
2337         if (e->exit_requested)
2338                 goto pending;
2339
2340         e->iteration++;
2341
2342         r = event_prepare(e);
2343         if (r < 0)
2344                 return r;
2345
2346         r = event_arm_timer(e, &e->realtime);
2347         if (r < 0)
2348                 return r;
2349
2350         r = event_arm_timer(e, &e->boottime);
2351         if (r < 0)
2352                 return r;
2353
2354         r = event_arm_timer(e, &e->monotonic);
2355         if (r < 0)
2356                 return r;
2357
2358         r = event_arm_timer(e, &e->realtime_alarm);
2359         if (r < 0)
2360                 return r;
2361
2362         r = event_arm_timer(e, &e->boottime_alarm);
2363         if (r < 0)
2364                 return r;
2365
2366         if (event_next_pending(e) || e->need_process_child)
2367                 goto pending;
2368
2369         e->state = SD_EVENT_ARMED;
2370
2371         return 0;
2372
2373 pending:
2374         e->state = SD_EVENT_ARMED;
2375         r = sd_event_wait(e, 0);
2376         if (r == 0)
2377                 e->state = SD_EVENT_ARMED;
2378
2379         return r;
2380 }
2381
2382 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2383         struct epoll_event *ev_queue;
2384         unsigned ev_queue_max;
2385         int r, m, i;
2386
2387         assert_return(e, -EINVAL);
2388         assert_return(!event_pid_changed(e), -ECHILD);
2389         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2390         assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2391
2392         if (e->exit_requested) {
2393                 e->state = SD_EVENT_PENDING;
2394                 return 1;
2395         }
2396
2397         ev_queue_max = MAX(e->n_sources, 1u);
2398         ev_queue = newa(struct epoll_event, ev_queue_max);
2399
2400         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2401                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2402         if (m < 0) {
2403                 if (errno == EINTR) {
2404                         e->state = SD_EVENT_PENDING;
2405                         return 1;
2406                 }
2407
2408                 r = -errno;
2409                 goto finish;
2410         }
2411
2412         dual_timestamp_get(&e->timestamp);
2413         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2414
2415         for (i = 0; i < m; i++) {
2416
2417                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2418                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2419                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2420                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2421                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2422                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2423                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2424                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2425                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2426                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2427                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2428                         r = process_signal(e, ev_queue[i].events);
2429                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2430                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2431                 else
2432                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2433
2434                 if (r < 0)
2435                         goto finish;
2436         }
2437
2438         r = process_watchdog(e);
2439         if (r < 0)
2440                 goto finish;
2441
2442         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2443         if (r < 0)
2444                 goto finish;
2445
2446         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2447         if (r < 0)
2448                 goto finish;
2449
2450         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2451         if (r < 0)
2452                 goto finish;
2453
2454         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2455         if (r < 0)
2456                 goto finish;
2457
2458         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2459         if (r < 0)
2460                 goto finish;
2461
2462         if (e->need_process_child) {
2463                 r = process_child(e);
2464                 if (r < 0)
2465                         goto finish;
2466         }
2467
2468         if (event_next_pending(e)) {
2469                 e->state = SD_EVENT_PENDING;
2470
2471                 return 1;
2472         }
2473
2474         r = 0;
2475
2476 finish:
2477         e->state = SD_EVENT_INITIAL;
2478
2479         return r;
2480 }
2481
2482 _public_ int sd_event_dispatch(sd_event *e) {
2483         sd_event_source *p;
2484         int r;
2485
2486         assert_return(e, -EINVAL);
2487         assert_return(!event_pid_changed(e), -ECHILD);
2488         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2489         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2490
2491         if (e->exit_requested)
2492                 return dispatch_exit(e);
2493
2494         p = event_next_pending(e);
2495         if (p) {
2496                 sd_event_ref(e);
2497
2498                 e->state = SD_EVENT_RUNNING;
2499                 r = source_dispatch(p);
2500                 e->state = SD_EVENT_INITIAL;
2501
2502                 sd_event_unref(e);
2503
2504                 return r;
2505         }
2506
2507         e->state = SD_EVENT_INITIAL;
2508
2509         return 1;
2510 }
2511
2512 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2513         int r;
2514
2515         assert_return(e, -EINVAL);
2516         assert_return(!event_pid_changed(e), -ECHILD);
2517         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2518         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2519
2520         r = sd_event_prepare(e);
2521         if (r == 0)
2522                 /* There was nothing? Then wait... */
2523                 r = sd_event_wait(e, timeout);
2524
2525         if (r > 0) {
2526                 /* There's something now, then let's dispatch it */
2527                 r = sd_event_dispatch(e);
2528                 if (r < 0)
2529                         return r;
2530
2531                 return 1;
2532         }
2533
2534         return r;
2535 }
2536
2537 /// UNNEEDED by elogind
2538 #if 0
2539 _public_ int sd_event_loop(sd_event *e) {
2540         int r;
2541
2542         assert_return(e, -EINVAL);
2543         assert_return(!event_pid_changed(e), -ECHILD);
2544         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2545
2546         sd_event_ref(e);
2547
2548         while (e->state != SD_EVENT_FINISHED) {
2549                 r = sd_event_run(e, (uint64_t) -1);
2550                 if (r < 0)
2551                         goto finish;
2552         }
2553
2554         r = e->exit_code;
2555
2556 finish:
2557         sd_event_unref(e);
2558         return r;
2559 }
2560
2561 _public_ int sd_event_get_fd(sd_event *e) {
2562
2563         assert_return(e, -EINVAL);
2564         assert_return(!event_pid_changed(e), -ECHILD);
2565
2566         return e->epoll_fd;
2567 }
2568 #endif // 0
2569
2570 _public_ int sd_event_get_state(sd_event *e) {
2571         assert_return(e, -EINVAL);
2572         assert_return(!event_pid_changed(e), -ECHILD);
2573
2574         return e->state;
2575 }
2576
2577 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2578         assert_return(e, -EINVAL);
2579         assert_return(code, -EINVAL);
2580         assert_return(!event_pid_changed(e), -ECHILD);
2581
2582         if (!e->exit_requested)
2583                 return -ENODATA;
2584
2585         *code = e->exit_code;
2586         return 0;
2587 }
2588
2589 _public_ int sd_event_exit(sd_event *e, int code) {
2590         assert_return(e, -EINVAL);
2591         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2592         assert_return(!event_pid_changed(e), -ECHILD);
2593
2594         e->exit_requested = true;
2595         e->exit_code = code;
2596
2597         return 0;
2598 }
2599
2600 /// UNNEEDED by elogind
2601 #if 0
2602 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2603         assert_return(e, -EINVAL);
2604         assert_return(usec, -EINVAL);
2605         assert_return(!event_pid_changed(e), -ECHILD);
2606
2607         if (!dual_timestamp_is_set(&e->timestamp)) {
2608                 /* Implicitly fall back to now() if we never ran
2609                  * before and thus have no cached time. */
2610                 *usec = now(clock);
2611                 return 1;
2612         }
2613
2614         switch (clock) {
2615
2616         case CLOCK_REALTIME:
2617         case CLOCK_REALTIME_ALARM:
2618                 *usec = e->timestamp.realtime;
2619                 break;
2620
2621         case CLOCK_MONOTONIC:
2622                 *usec = e->timestamp.monotonic;
2623                 break;
2624
2625         case CLOCK_BOOTTIME:
2626         case CLOCK_BOOTTIME_ALARM:
2627                 *usec = e->timestamp_boottime;
2628                 break;
2629         }
2630
2631         return 0;
2632 }
2633 #endif // 0
2634
2635 _public_ int sd_event_default(sd_event **ret) {
2636
2637         static thread_local sd_event *default_event = NULL;
2638         sd_event *e = NULL;
2639         int r;
2640
2641         if (!ret)
2642                 return !!default_event;
2643
2644         if (default_event) {
2645                 *ret = sd_event_ref(default_event);
2646                 return 0;
2647         }
2648
2649         r = sd_event_new(&e);
2650         if (r < 0)
2651                 return r;
2652
2653         e->default_event_ptr = &default_event;
2654         e->tid = gettid();
2655         default_event = e;
2656
2657         *ret = e;
2658         return 1;
2659 }
2660
2661 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2662         assert_return(e, -EINVAL);
2663         assert_return(tid, -EINVAL);
2664         assert_return(!event_pid_changed(e), -ECHILD);
2665
2666         if (e->tid != 0) {
2667                 *tid = e->tid;
2668                 return 0;
2669         }
2670
2671         return -ENXIO;
2672 }
2673
2674 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2675         int r;
2676
2677         assert_return(e, -EINVAL);
2678         assert_return(!event_pid_changed(e), -ECHILD);
2679
2680         if (e->watchdog == !!b)
2681                 return e->watchdog;
2682
2683         if (b) {
2684                 struct epoll_event ev = {};
2685
2686                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2687                 if (r <= 0)
2688                         return r;
2689
2690                 /* Issue first ping immediately */
2691                 sd_notify(false, "WATCHDOG=1");
2692                 e->watchdog_last = now(CLOCK_MONOTONIC);
2693
2694                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2695                 if (e->watchdog_fd < 0)
2696                         return -errno;
2697
2698                 r = arm_watchdog(e);
2699                 if (r < 0)
2700                         goto fail;
2701
2702                 ev.events = EPOLLIN;
2703                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2704
2705                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2706                 if (r < 0) {
2707                         r = -errno;
2708                         goto fail;
2709                 }
2710
2711         } else {
2712                 if (e->watchdog_fd >= 0) {
2713                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2714                         e->watchdog_fd = safe_close(e->watchdog_fd);
2715                 }
2716         }
2717
2718         e->watchdog = !!b;
2719         return e->watchdog;
2720
2721 fail:
2722         e->watchdog_fd = safe_close(e->watchdog_fd);
2723         return r;
2724 }
2725
2726 /// UNNEEDED by elogind
2727 #if 0
2728 _public_ int sd_event_get_watchdog(sd_event *e) {
2729         assert_return(e, -EINVAL);
2730         assert_return(!event_pid_changed(e), -ECHILD);
2731
2732         return e->watchdog;
2733 }
2734 #endif // 0