chiark / gitweb /
00880c983b543a563c0388d5346bbe73197d0a3d
[elogind.git] / src / libelogind / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36 #include "signal-util.h"
37
38 #include "sd-event.h"
39
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_BOOTTIME,
46         SOURCE_TIME_MONOTONIC,
47         SOURCE_TIME_REALTIME_ALARM,
48         SOURCE_TIME_BOOTTIME_ALARM,
49         SOURCE_SIGNAL,
50         SOURCE_CHILD,
51         SOURCE_DEFER,
52         SOURCE_POST,
53         SOURCE_EXIT,
54         SOURCE_WATCHDOG,
55         _SOURCE_EVENT_SOURCE_TYPE_MAX,
56         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60
61 struct sd_event_source {
62         unsigned n_ref;
63
64         sd_event *event;
65         void *userdata;
66         sd_event_handler_t prepare;
67
68         char *description;
69
70         EventSourceType type:5;
71         int enabled:3;
72         bool pending:1;
73         bool dispatching:1;
74         bool floating:1;
75
76         int64_t priority;
77         unsigned pending_index;
78         unsigned prepare_index;
79         unsigned pending_iteration;
80         unsigned prepare_iteration;
81
82         LIST_FIELDS(sd_event_source, sources);
83
84         union {
85                 struct {
86                         sd_event_io_handler_t callback;
87                         int fd;
88                         uint32_t events;
89                         uint32_t revents;
90                         bool registered:1;
91                 } io;
92                 struct {
93                         sd_event_time_handler_t callback;
94                         usec_t next, accuracy;
95                         unsigned earliest_index;
96                         unsigned latest_index;
97                 } time;
98                 struct {
99                         sd_event_signal_handler_t callback;
100                         struct signalfd_siginfo siginfo;
101                         int sig;
102                 } signal;
103                 struct {
104                         sd_event_child_handler_t callback;
105                         siginfo_t siginfo;
106                         pid_t pid;
107                         int options;
108                 } child;
109                 struct {
110                         sd_event_handler_t callback;
111                 } defer;
112                 struct {
113                         sd_event_handler_t callback;
114                 } post;
115                 struct {
116                         sd_event_handler_t callback;
117                         unsigned prioq_index;
118                 } exit;
119         };
120 };
121
122 struct clock_data {
123         int fd;
124
125         /* For all clocks we maintain two priority queues each, one
126          * ordered for the earliest times the events may be
127          * dispatched, and one ordered by the latest times they must
128          * have been dispatched. The range between the top entries in
129          * the two prioqs is the time window we can freely schedule
130          * wakeups in */
131
132         Prioq *earliest;
133         Prioq *latest;
134         usec_t next;
135
136         bool needs_rearm:1;
137 };
138
139 struct sd_event {
140         unsigned n_ref;
141
142         int epoll_fd;
143         int signal_fd;
144         int watchdog_fd;
145
146         Prioq *pending;
147         Prioq *prepare;
148
149         /* timerfd_create() only supports these five clocks so far. We
150          * can add support for more clocks when the kernel learns to
151          * deal with them, too. */
152         struct clock_data realtime;
153         struct clock_data boottime;
154         struct clock_data monotonic;
155         struct clock_data realtime_alarm;
156         struct clock_data boottime_alarm;
157
158         usec_t perturb;
159
160         sigset_t sigset;
161         sd_event_source **signal_sources;
162
163         Hashmap *child_sources;
164         unsigned n_enabled_child_sources;
165
166         Set *post_sources;
167
168         Prioq *exit;
169
170         pid_t original_pid;
171
172         unsigned iteration;
173         dual_timestamp timestamp;
174         usec_t timestamp_boottime;
175         int state;
176
177         bool exit_requested:1;
178         bool need_process_child:1;
179         bool watchdog:1;
180
181         int exit_code;
182
183         pid_t tid;
184         sd_event **default_event_ptr;
185
186         usec_t watchdog_last, watchdog_period;
187
188         unsigned n_sources;
189
190         LIST_HEAD(sd_event_source, sources);
191 };
192
193 static void source_disconnect(sd_event_source *s);
194
195 static int pending_prioq_compare(const void *a, const void *b) {
196         const sd_event_source *x = a, *y = b;
197
198         assert(x->pending);
199         assert(y->pending);
200
201         /* Enabled ones first */
202         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203                 return -1;
204         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205                 return 1;
206
207         /* Lower priority values first */
208         if (x->priority < y->priority)
209                 return -1;
210         if (x->priority > y->priority)
211                 return 1;
212
213         /* Older entries first */
214         if (x->pending_iteration < y->pending_iteration)
215                 return -1;
216         if (x->pending_iteration > y->pending_iteration)
217                 return 1;
218
219         /* Stability for the rest */
220         if (x < y)
221                 return -1;
222         if (x > y)
223                 return 1;
224
225         return 0;
226 }
227
228 static int prepare_prioq_compare(const void *a, const void *b) {
229         const sd_event_source *x = a, *y = b;
230
231         assert(x->prepare);
232         assert(y->prepare);
233
234         /* Move most recently prepared ones last, so that we can stop
235          * preparing as soon as we hit one that has already been
236          * prepared in the current iteration */
237         if (x->prepare_iteration < y->prepare_iteration)
238                 return -1;
239         if (x->prepare_iteration > y->prepare_iteration)
240                 return 1;
241
242         /* Enabled ones first */
243         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
244                 return -1;
245         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
246                 return 1;
247
248         /* Lower priority values first */
249         if (x->priority < y->priority)
250                 return -1;
251         if (x->priority > y->priority)
252                 return 1;
253
254         /* Stability for the rest */
255         if (x < y)
256                 return -1;
257         if (x > y)
258                 return 1;
259
260         return 0;
261 }
262
263 static int earliest_time_prioq_compare(const void *a, const void *b) {
264         const sd_event_source *x = a, *y = b;
265
266         assert(EVENT_SOURCE_IS_TIME(x->type));
267         assert(x->type == y->type);
268
269         /* Enabled ones first */
270         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
271                 return -1;
272         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
273                 return 1;
274
275         /* Move the pending ones to the end */
276         if (!x->pending && y->pending)
277                 return -1;
278         if (x->pending && !y->pending)
279                 return 1;
280
281         /* Order by time */
282         if (x->time.next < y->time.next)
283                 return -1;
284         if (x->time.next > y->time.next)
285                 return 1;
286
287         /* Stability for the rest */
288         if (x < y)
289                 return -1;
290         if (x > y)
291                 return 1;
292
293         return 0;
294 }
295
296 static int latest_time_prioq_compare(const void *a, const void *b) {
297         const sd_event_source *x = a, *y = b;
298
299         assert(EVENT_SOURCE_IS_TIME(x->type));
300         assert(x->type == y->type);
301
302         /* Enabled ones first */
303         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304                 return -1;
305         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
306                 return 1;
307
308         /* Move the pending ones to the end */
309         if (!x->pending && y->pending)
310                 return -1;
311         if (x->pending && !y->pending)
312                 return 1;
313
314         /* Order by time */
315         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
316                 return -1;
317         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
318                 return 1;
319
320         /* Stability for the rest */
321         if (x < y)
322                 return -1;
323         if (x > y)
324                 return 1;
325
326         return 0;
327 }
328
329 static int exit_prioq_compare(const void *a, const void *b) {
330         const sd_event_source *x = a, *y = b;
331
332         assert(x->type == SOURCE_EXIT);
333         assert(y->type == SOURCE_EXIT);
334
335         /* Enabled ones first */
336         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
337                 return -1;
338         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
339                 return 1;
340
341         /* Lower priority values first */
342         if (x->priority < y->priority)
343                 return -1;
344         if (x->priority > y->priority)
345                 return 1;
346
347         /* Stability for the rest */
348         if (x < y)
349                 return -1;
350         if (x > y)
351                 return 1;
352
353         return 0;
354 }
355
356 static void free_clock_data(struct clock_data *d) {
357         assert(d);
358
359         safe_close(d->fd);
360         prioq_free(d->earliest);
361         prioq_free(d->latest);
362 }
363
364 static void event_free(sd_event *e) {
365         sd_event_source *s;
366
367         assert(e);
368
369         while ((s = e->sources)) {
370                 assert(s->floating);
371                 source_disconnect(s);
372                 sd_event_source_unref(s);
373         }
374
375         assert(e->n_sources == 0);
376
377         if (e->default_event_ptr)
378                 *(e->default_event_ptr) = NULL;
379
380         safe_close(e->epoll_fd);
381         safe_close(e->signal_fd);
382         safe_close(e->watchdog_fd);
383
384         free_clock_data(&e->realtime);
385         free_clock_data(&e->boottime);
386         free_clock_data(&e->monotonic);
387         free_clock_data(&e->realtime_alarm);
388         free_clock_data(&e->boottime_alarm);
389
390         prioq_free(e->pending);
391         prioq_free(e->prepare);
392         prioq_free(e->exit);
393
394         free(e->signal_sources);
395
396         hashmap_free(e->child_sources);
397         set_free(e->post_sources);
398         free(e);
399 }
400
401 _public_ int sd_event_new(sd_event** ret) {
402         sd_event *e;
403         int r;
404
405         assert_return(ret, -EINVAL);
406
407         e = new0(sd_event, 1);
408         if (!e)
409                 return -ENOMEM;
410
411         e->n_ref = 1;
412         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
413         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
414         e->original_pid = getpid();
415         e->perturb = USEC_INFINITY;
416
417         assert_se(sigemptyset(&e->sigset) == 0);
418
419         e->pending = prioq_new(pending_prioq_compare);
420         if (!e->pending) {
421                 r = -ENOMEM;
422                 goto fail;
423         }
424
425         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
426         if (e->epoll_fd < 0) {
427                 r = -errno;
428                 goto fail;
429         }
430
431         *ret = e;
432         return 0;
433
434 fail:
435         event_free(e);
436         return r;
437 }
438
439 _public_ sd_event* sd_event_ref(sd_event *e) {
440         assert_return(e, NULL);
441
442         assert(e->n_ref >= 1);
443         e->n_ref++;
444
445         return e;
446 }
447
448 _public_ sd_event* sd_event_unref(sd_event *e) {
449
450         if (!e)
451                 return NULL;
452
453         assert(e->n_ref >= 1);
454         e->n_ref--;
455
456         if (e->n_ref <= 0)
457                 event_free(e);
458
459         return NULL;
460 }
461
462 static bool event_pid_changed(sd_event *e) {
463         assert(e);
464
465         /* We don't support people creating an event loop and keeping
466          * it around over a fork(). Let's complain. */
467
468         return e->original_pid != getpid();
469 }
470
471 static int source_io_unregister(sd_event_source *s) {
472         int r;
473
474         assert(s);
475         assert(s->type == SOURCE_IO);
476
477         if (event_pid_changed(s->event))
478                 return 0;
479
480         if (!s->io.registered)
481                 return 0;
482
483         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
484         if (r < 0)
485                 return -errno;
486
487         s->io.registered = false;
488         return 0;
489 }
490
491 static int source_io_register(
492                 sd_event_source *s,
493                 int enabled,
494                 uint32_t events) {
495
496         struct epoll_event ev = {};
497         int r;
498
499         assert(s);
500         assert(s->type == SOURCE_IO);
501         assert(enabled != SD_EVENT_OFF);
502
503         ev.events = events;
504         ev.data.ptr = s;
505
506         if (enabled == SD_EVENT_ONESHOT)
507                 ev.events |= EPOLLONESHOT;
508
509         if (s->io.registered)
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
511         else
512                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
513
514         if (r < 0)
515                 return -errno;
516
517         s->io.registered = true;
518
519         return 0;
520 }
521
522 static clockid_t event_source_type_to_clock(EventSourceType t) {
523
524         switch (t) {
525
526         case SOURCE_TIME_REALTIME:
527                 return CLOCK_REALTIME;
528
529         case SOURCE_TIME_BOOTTIME:
530                 return CLOCK_BOOTTIME;
531
532         case SOURCE_TIME_MONOTONIC:
533                 return CLOCK_MONOTONIC;
534
535         case SOURCE_TIME_REALTIME_ALARM:
536                 return CLOCK_REALTIME_ALARM;
537
538         case SOURCE_TIME_BOOTTIME_ALARM:
539                 return CLOCK_BOOTTIME_ALARM;
540
541         default:
542                 return (clockid_t) -1;
543         }
544 }
545
546 static EventSourceType clock_to_event_source_type(clockid_t clock) {
547
548         switch (clock) {
549
550         case CLOCK_REALTIME:
551                 return SOURCE_TIME_REALTIME;
552
553         case CLOCK_BOOTTIME:
554                 return SOURCE_TIME_BOOTTIME;
555
556         case CLOCK_MONOTONIC:
557                 return SOURCE_TIME_MONOTONIC;
558
559         case CLOCK_REALTIME_ALARM:
560                 return SOURCE_TIME_REALTIME_ALARM;
561
562         case CLOCK_BOOTTIME_ALARM:
563                 return SOURCE_TIME_BOOTTIME_ALARM;
564
565         default:
566                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
567         }
568 }
569
570 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
571         assert(e);
572
573         switch (t) {
574
575         case SOURCE_TIME_REALTIME:
576                 return &e->realtime;
577
578         case SOURCE_TIME_BOOTTIME:
579                 return &e->boottime;
580
581         case SOURCE_TIME_MONOTONIC:
582                 return &e->monotonic;
583
584         case SOURCE_TIME_REALTIME_ALARM:
585                 return &e->realtime_alarm;
586
587         case SOURCE_TIME_BOOTTIME_ALARM:
588                 return &e->boottime_alarm;
589
590         default:
591                 return NULL;
592         }
593 }
594
595 static bool need_signal(sd_event *e, int signal) {
596         return (e->signal_sources && e->signal_sources[signal] &&
597                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
598                 ||
599                (signal == SIGCHLD &&
600                 e->n_enabled_child_sources > 0);
601 }
602
603 static int event_update_signal_fd(sd_event *e) {
604         struct epoll_event ev = {};
605         bool add_to_epoll;
606         int r;
607
608         assert(e);
609
610         if (event_pid_changed(e))
611                 return 0;
612
613         add_to_epoll = e->signal_fd < 0;
614
615         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
616         if (r < 0)
617                 return -errno;
618
619         e->signal_fd = r;
620
621         if (!add_to_epoll)
622                 return 0;
623
624         ev.events = EPOLLIN;
625         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
626
627         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
628         if (r < 0) {
629                 e->signal_fd = safe_close(e->signal_fd);
630                 return -errno;
631         }
632
633         return 0;
634 }
635
636 static void source_disconnect(sd_event_source *s) {
637         sd_event *event;
638
639         assert(s);
640
641         if (!s->event)
642                 return;
643
644         assert(s->event->n_sources > 0);
645
646         switch (s->type) {
647
648         case SOURCE_IO:
649                 if (s->io.fd >= 0)
650                         source_io_unregister(s);
651
652                 break;
653
654         case SOURCE_TIME_REALTIME:
655         case SOURCE_TIME_BOOTTIME:
656         case SOURCE_TIME_MONOTONIC:
657         case SOURCE_TIME_REALTIME_ALARM:
658         case SOURCE_TIME_BOOTTIME_ALARM: {
659                 struct clock_data *d;
660
661                 d = event_get_clock_data(s->event, s->type);
662                 assert(d);
663
664                 prioq_remove(d->earliest, s, &s->time.earliest_index);
665                 prioq_remove(d->latest, s, &s->time.latest_index);
666                 d->needs_rearm = true;
667                 break;
668         }
669
670         case SOURCE_SIGNAL:
671                 if (s->signal.sig > 0) {
672                         if (s->event->signal_sources)
673                                 s->event->signal_sources[s->signal.sig] = NULL;
674
675                         /* If the signal was on and now it is off... */
676                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
677                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
678
679                                 (void) event_update_signal_fd(s->event);
680                                 /* If disabling failed, we might get a spurious event,
681                                  * but otherwise nothing bad should happen. */
682                         }
683                 }
684
685                 break;
686
687         case SOURCE_CHILD:
688                 if (s->child.pid > 0) {
689                         if (s->enabled != SD_EVENT_OFF) {
690                                 assert(s->event->n_enabled_child_sources > 0);
691                                 s->event->n_enabled_child_sources--;
692
693                                 /* We know the signal was on, if it is off now... */
694                                 if (!need_signal(s->event, SIGCHLD)) {
695                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
696
697                                         (void) event_update_signal_fd(s->event);
698                                         /* If disabling failed, we might get a spurious event,
699                                          * but otherwise nothing bad should happen. */
700                                 }
701                         }
702
703                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
704                 }
705
706                 break;
707
708         case SOURCE_DEFER:
709                 /* nothing */
710                 break;
711
712         case SOURCE_POST:
713                 set_remove(s->event->post_sources, s);
714                 break;
715
716         case SOURCE_EXIT:
717                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
718                 break;
719
720         default:
721                 assert_not_reached("Wut? I shouldn't exist.");
722         }
723
724         if (s->pending)
725                 prioq_remove(s->event->pending, s, &s->pending_index);
726
727         if (s->prepare)
728                 prioq_remove(s->event->prepare, s, &s->prepare_index);
729
730         event = s->event;
731
732         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
733         s->event = NULL;
734         LIST_REMOVE(sources, event->sources, s);
735         event->n_sources--;
736
737         if (!s->floating)
738                 sd_event_unref(event);
739 }
740
741 static void source_free(sd_event_source *s) {
742         assert(s);
743
744         source_disconnect(s);
745         free(s->description);
746         free(s);
747 }
748
749 static int source_set_pending(sd_event_source *s, bool b) {
750         int r;
751
752         assert(s);
753         assert(s->type != SOURCE_EXIT);
754
755         if (s->pending == b)
756                 return 0;
757
758         s->pending = b;
759
760         if (b) {
761                 s->pending_iteration = s->event->iteration;
762
763                 r = prioq_put(s->event->pending, s, &s->pending_index);
764                 if (r < 0) {
765                         s->pending = false;
766                         return r;
767                 }
768         } else
769                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
770
771         if (EVENT_SOURCE_IS_TIME(s->type)) {
772                 struct clock_data *d;
773
774                 d = event_get_clock_data(s->event, s->type);
775                 assert(d);
776
777                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
778                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
779                 d->needs_rearm = true;
780         }
781
782         return 0;
783 }
784
785 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
786         sd_event_source *s;
787
788         assert(e);
789
790         s = new0(sd_event_source, 1);
791         if (!s)
792                 return NULL;
793
794         s->n_ref = 1;
795         s->event = e;
796         s->floating = floating;
797         s->type = type;
798         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
799
800         if (!floating)
801                 sd_event_ref(e);
802
803         LIST_PREPEND(sources, e->sources, s);
804         e->n_sources ++;
805
806         return s;
807 }
808
809 _public_ int sd_event_add_io(
810                 sd_event *e,
811                 sd_event_source **ret,
812                 int fd,
813                 uint32_t events,
814                 sd_event_io_handler_t callback,
815                 void *userdata) {
816
817         sd_event_source *s;
818         int r;
819
820         assert_return(e, -EINVAL);
821         assert_return(fd >= 0, -EINVAL);
822         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
823         assert_return(callback, -EINVAL);
824         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
825         assert_return(!event_pid_changed(e), -ECHILD);
826
827         s = source_new(e, !ret, SOURCE_IO);
828         if (!s)
829                 return -ENOMEM;
830
831         s->io.fd = fd;
832         s->io.events = events;
833         s->io.callback = callback;
834         s->userdata = userdata;
835         s->enabled = SD_EVENT_ON;
836
837         r = source_io_register(s, s->enabled, events);
838         if (r < 0) {
839                 source_free(s);
840                 return r;
841         }
842
843         if (ret)
844                 *ret = s;
845
846         return 0;
847 }
848
849 static void initialize_perturb(sd_event *e) {
850         sd_id128_t bootid = {};
851
852         /* When we sleep for longer, we try to realign the wakeup to
853            the same time wihtin each minute/second/250ms, so that
854            events all across the system can be coalesced into a single
855            CPU wakeup. However, let's take some system-specific
856            randomness for this value, so that in a network of systems
857            with synced clocks timer events are distributed a
858            bit. Here, we calculate a perturbation usec offset from the
859            boot ID. */
860
861         if (_likely_(e->perturb != USEC_INFINITY))
862                 return;
863
864         if (sd_id128_get_boot(&bootid) >= 0)
865                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
866 }
867
868 static int event_setup_timer_fd(
869                 sd_event *e,
870                 struct clock_data *d,
871                 clockid_t clock) {
872
873         struct epoll_event ev = {};
874         int r, fd;
875
876         assert(e);
877         assert(d);
878
879         if (_likely_(d->fd >= 0))
880                 return 0;
881
882         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
883         if (fd < 0)
884                 return -errno;
885
886         ev.events = EPOLLIN;
887         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
888
889         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
890         if (r < 0) {
891                 safe_close(fd);
892                 return -errno;
893         }
894
895         d->fd = fd;
896         return 0;
897 }
898
899 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
900         assert(s);
901
902         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
903 }
904
905 _public_ int sd_event_add_time(
906                 sd_event *e,
907                 sd_event_source **ret,
908                 clockid_t clock,
909                 uint64_t usec,
910                 uint64_t accuracy,
911                 sd_event_time_handler_t callback,
912                 void *userdata) {
913
914         EventSourceType type;
915         sd_event_source *s;
916         struct clock_data *d;
917         int r;
918
919         assert_return(e, -EINVAL);
920         assert_return(usec != (uint64_t) -1, -EINVAL);
921         assert_return(accuracy != (uint64_t) -1, -EINVAL);
922         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
923         assert_return(!event_pid_changed(e), -ECHILD);
924
925         if (!callback)
926                 callback = time_exit_callback;
927
928         type = clock_to_event_source_type(clock);
929         assert_return(type >= 0, -EOPNOTSUPP);
930
931         d = event_get_clock_data(e, type);
932         assert(d);
933
934         if (!d->earliest) {
935                 d->earliest = prioq_new(earliest_time_prioq_compare);
936                 if (!d->earliest)
937                         return -ENOMEM;
938         }
939
940         if (!d->latest) {
941                 d->latest = prioq_new(latest_time_prioq_compare);
942                 if (!d->latest)
943                         return -ENOMEM;
944         }
945
946         if (d->fd < 0) {
947                 r = event_setup_timer_fd(e, d, clock);
948                 if (r < 0)
949                         return r;
950         }
951
952         s = source_new(e, !ret, type);
953         if (!s)
954                 return -ENOMEM;
955
956         s->time.next = usec;
957         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
958         s->time.callback = callback;
959         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
960         s->userdata = userdata;
961         s->enabled = SD_EVENT_ONESHOT;
962
963         d->needs_rearm = true;
964
965         r = prioq_put(d->earliest, s, &s->time.earliest_index);
966         if (r < 0)
967                 goto fail;
968
969         r = prioq_put(d->latest, s, &s->time.latest_index);
970         if (r < 0)
971                 goto fail;
972
973         if (ret)
974                 *ret = s;
975
976         return 0;
977
978 fail:
979         source_free(s);
980         return r;
981 }
982
983 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
984         assert(s);
985
986         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
987 }
988
989 _public_ int sd_event_add_signal(
990                 sd_event *e,
991                 sd_event_source **ret,
992                 int sig,
993                 sd_event_signal_handler_t callback,
994                 void *userdata) {
995
996         sd_event_source *s;
997         sigset_t ss;
998         int r;
999         bool previous;
1000
1001         assert_return(e, -EINVAL);
1002         assert_return(sig > 0, -EINVAL);
1003         assert_return(sig < _NSIG, -EINVAL);
1004         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1005         assert_return(!event_pid_changed(e), -ECHILD);
1006
1007         if (!callback)
1008                 callback = signal_exit_callback;
1009
1010         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1011         if (r < 0)
1012                 return -errno;
1013
1014         if (!sigismember(&ss, sig))
1015                 return -EBUSY;
1016
1017         if (!e->signal_sources) {
1018                 e->signal_sources = new0(sd_event_source*, _NSIG);
1019                 if (!e->signal_sources)
1020                         return -ENOMEM;
1021         } else if (e->signal_sources[sig])
1022                 return -EBUSY;
1023
1024         previous = need_signal(e, sig);
1025
1026         s = source_new(e, !ret, SOURCE_SIGNAL);
1027         if (!s)
1028                 return -ENOMEM;
1029
1030         s->signal.sig = sig;
1031         s->signal.callback = callback;
1032         s->userdata = userdata;
1033         s->enabled = SD_EVENT_ON;
1034
1035         e->signal_sources[sig] = s;
1036
1037         if (!previous) {
1038                 assert_se(sigaddset(&e->sigset, sig) == 0);
1039
1040                 r = event_update_signal_fd(e);
1041                 if (r < 0) {
1042                         source_free(s);
1043                         return r;
1044                 }
1045         }
1046
1047         /* Use the signal name as description for the event source by default */
1048         (void) sd_event_source_set_description(s, signal_to_string(sig));
1049
1050         if (ret)
1051                 *ret = s;
1052
1053         return 0;
1054 }
1055
1056 _public_ int sd_event_add_child(
1057                 sd_event *e,
1058                 sd_event_source **ret,
1059                 pid_t pid,
1060                 int options,
1061                 sd_event_child_handler_t callback,
1062                 void *userdata) {
1063
1064         sd_event_source *s;
1065         int r;
1066         bool previous;
1067
1068         assert_return(e, -EINVAL);
1069         assert_return(pid > 1, -EINVAL);
1070         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1071         assert_return(options != 0, -EINVAL);
1072         assert_return(callback, -EINVAL);
1073         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1074         assert_return(!event_pid_changed(e), -ECHILD);
1075
1076         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1077         if (r < 0)
1078                 return r;
1079
1080         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1081                 return -EBUSY;
1082
1083         previous = need_signal(e, SIGCHLD);
1084
1085         s = source_new(e, !ret, SOURCE_CHILD);
1086         if (!s)
1087                 return -ENOMEM;
1088
1089         s->child.pid = pid;
1090         s->child.options = options;
1091         s->child.callback = callback;
1092         s->userdata = userdata;
1093         s->enabled = SD_EVENT_ONESHOT;
1094
1095         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1096         if (r < 0) {
1097                 source_free(s);
1098                 return r;
1099         }
1100
1101         e->n_enabled_child_sources ++;
1102
1103         if (!previous) {
1104                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1105
1106                 r = event_update_signal_fd(e);
1107                 if (r < 0) {
1108                         source_free(s);
1109                         return r;
1110                 }
1111         }
1112
1113         e->need_process_child = true;
1114
1115         if (ret)
1116                 *ret = s;
1117
1118         return 0;
1119 }
1120
1121 _public_ int sd_event_add_defer(
1122                 sd_event *e,
1123                 sd_event_source **ret,
1124                 sd_event_handler_t callback,
1125                 void *userdata) {
1126
1127         sd_event_source *s;
1128         int r;
1129
1130         assert_return(e, -EINVAL);
1131         assert_return(callback, -EINVAL);
1132         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1133         assert_return(!event_pid_changed(e), -ECHILD);
1134
1135         s = source_new(e, !ret, SOURCE_DEFER);
1136         if (!s)
1137                 return -ENOMEM;
1138
1139         s->defer.callback = callback;
1140         s->userdata = userdata;
1141         s->enabled = SD_EVENT_ONESHOT;
1142
1143         r = source_set_pending(s, true);
1144         if (r < 0) {
1145                 source_free(s);
1146                 return r;
1147         }
1148
1149         if (ret)
1150                 *ret = s;
1151
1152         return 0;
1153 }
1154
1155 _public_ int sd_event_add_post(
1156                 sd_event *e,
1157                 sd_event_source **ret,
1158                 sd_event_handler_t callback,
1159                 void *userdata) {
1160
1161         sd_event_source *s;
1162         int r;
1163
1164         assert_return(e, -EINVAL);
1165         assert_return(callback, -EINVAL);
1166         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1167         assert_return(!event_pid_changed(e), -ECHILD);
1168
1169         r = set_ensure_allocated(&e->post_sources, NULL);
1170         if (r < 0)
1171                 return r;
1172
1173         s = source_new(e, !ret, SOURCE_POST);
1174         if (!s)
1175                 return -ENOMEM;
1176
1177         s->post.callback = callback;
1178         s->userdata = userdata;
1179         s->enabled = SD_EVENT_ON;
1180
1181         r = set_put(e->post_sources, s);
1182         if (r < 0) {
1183                 source_free(s);
1184                 return r;
1185         }
1186
1187         if (ret)
1188                 *ret = s;
1189
1190         return 0;
1191 }
1192
1193 _public_ int sd_event_add_exit(
1194                 sd_event *e,
1195                 sd_event_source **ret,
1196                 sd_event_handler_t callback,
1197                 void *userdata) {
1198
1199         sd_event_source *s;
1200         int r;
1201
1202         assert_return(e, -EINVAL);
1203         assert_return(callback, -EINVAL);
1204         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1205         assert_return(!event_pid_changed(e), -ECHILD);
1206
1207         if (!e->exit) {
1208                 e->exit = prioq_new(exit_prioq_compare);
1209                 if (!e->exit)
1210                         return -ENOMEM;
1211         }
1212
1213         s = source_new(e, !ret, SOURCE_EXIT);
1214         if (!s)
1215                 return -ENOMEM;
1216
1217         s->exit.callback = callback;
1218         s->userdata = userdata;
1219         s->exit.prioq_index = PRIOQ_IDX_NULL;
1220         s->enabled = SD_EVENT_ONESHOT;
1221
1222         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1223         if (r < 0) {
1224                 source_free(s);
1225                 return r;
1226         }
1227
1228         if (ret)
1229                 *ret = s;
1230
1231         return 0;
1232 }
1233
1234 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1235         assert_return(s, NULL);
1236
1237         assert(s->n_ref >= 1);
1238         s->n_ref++;
1239
1240         return s;
1241 }
1242
1243 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1244
1245         if (!s)
1246                 return NULL;
1247
1248         assert(s->n_ref >= 1);
1249         s->n_ref--;
1250
1251         if (s->n_ref <= 0) {
1252                 /* Here's a special hack: when we are called from a
1253                  * dispatch handler we won't free the event source
1254                  * immediately, but we will detach the fd from the
1255                  * epoll. This way it is safe for the caller to unref
1256                  * the event source and immediately close the fd, but
1257                  * we still retain a valid event source object after
1258                  * the callback. */
1259
1260                 if (s->dispatching) {
1261                         if (s->type == SOURCE_IO)
1262                                 source_io_unregister(s);
1263
1264                         source_disconnect(s);
1265                 } else
1266                         source_free(s);
1267         }
1268
1269         return NULL;
1270 }
1271
1272 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1273         assert_return(s, -EINVAL);
1274         assert_return(!event_pid_changed(s->event), -ECHILD);
1275
1276         return free_and_strdup(&s->description, description);
1277 }
1278
1279 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1280         assert_return(s, -EINVAL);
1281         assert_return(description, -EINVAL);
1282         assert_return(s->description, -ENXIO);
1283         assert_return(!event_pid_changed(s->event), -ECHILD);
1284
1285         *description = s->description;
1286         return 0;
1287 }
1288
1289 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1290         assert_return(s, NULL);
1291
1292         return s->event;
1293 }
1294
1295 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1296         assert_return(s, -EINVAL);
1297         assert_return(s->type != SOURCE_EXIT, -EDOM);
1298         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1299         assert_return(!event_pid_changed(s->event), -ECHILD);
1300
1301         return s->pending;
1302 }
1303
1304 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1305         assert_return(s, -EINVAL);
1306         assert_return(s->type == SOURCE_IO, -EDOM);
1307         assert_return(!event_pid_changed(s->event), -ECHILD);
1308
1309         return s->io.fd;
1310 }
1311
1312 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1313         int r;
1314
1315         assert_return(s, -EINVAL);
1316         assert_return(fd >= 0, -EINVAL);
1317         assert_return(s->type == SOURCE_IO, -EDOM);
1318         assert_return(!event_pid_changed(s->event), -ECHILD);
1319
1320         if (s->io.fd == fd)
1321                 return 0;
1322
1323         if (s->enabled == SD_EVENT_OFF) {
1324                 s->io.fd = fd;
1325                 s->io.registered = false;
1326         } else {
1327                 int saved_fd;
1328
1329                 saved_fd = s->io.fd;
1330                 assert(s->io.registered);
1331
1332                 s->io.fd = fd;
1333                 s->io.registered = false;
1334
1335                 r = source_io_register(s, s->enabled, s->io.events);
1336                 if (r < 0) {
1337                         s->io.fd = saved_fd;
1338                         s->io.registered = true;
1339                         return r;
1340                 }
1341
1342                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1343         }
1344
1345         return 0;
1346 }
1347
1348 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1349         assert_return(s, -EINVAL);
1350         assert_return(events, -EINVAL);
1351         assert_return(s->type == SOURCE_IO, -EDOM);
1352         assert_return(!event_pid_changed(s->event), -ECHILD);
1353
1354         *events = s->io.events;
1355         return 0;
1356 }
1357
1358 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1359         int r;
1360
1361         assert_return(s, -EINVAL);
1362         assert_return(s->type == SOURCE_IO, -EDOM);
1363         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1364         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1365         assert_return(!event_pid_changed(s->event), -ECHILD);
1366
1367         /* edge-triggered updates are never skipped, so we can reset edges */
1368         if (s->io.events == events && !(events & EPOLLET))
1369                 return 0;
1370
1371         if (s->enabled != SD_EVENT_OFF) {
1372                 r = source_io_register(s, s->enabled, events);
1373                 if (r < 0)
1374                         return r;
1375         }
1376
1377         s->io.events = events;
1378         source_set_pending(s, false);
1379
1380         return 0;
1381 }
1382
1383 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1384         assert_return(s, -EINVAL);
1385         assert_return(revents, -EINVAL);
1386         assert_return(s->type == SOURCE_IO, -EDOM);
1387         assert_return(s->pending, -ENODATA);
1388         assert_return(!event_pid_changed(s->event), -ECHILD);
1389
1390         *revents = s->io.revents;
1391         return 0;
1392 }
1393
1394 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1395         assert_return(s, -EINVAL);
1396         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1397         assert_return(!event_pid_changed(s->event), -ECHILD);
1398
1399         return s->signal.sig;
1400 }
1401
1402 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1403         assert_return(s, -EINVAL);
1404         assert_return(!event_pid_changed(s->event), -ECHILD);
1405
1406         return s->priority;
1407 }
1408
1409 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1410         assert_return(s, -EINVAL);
1411         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1412         assert_return(!event_pid_changed(s->event), -ECHILD);
1413
1414         if (s->priority == priority)
1415                 return 0;
1416
1417         s->priority = priority;
1418
1419         if (s->pending)
1420                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1421
1422         if (s->prepare)
1423                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1424
1425         if (s->type == SOURCE_EXIT)
1426                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1427
1428         return 0;
1429 }
1430
1431 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1432         assert_return(s, -EINVAL);
1433         assert_return(m, -EINVAL);
1434         assert_return(!event_pid_changed(s->event), -ECHILD);
1435
1436         *m = s->enabled;
1437         return 0;
1438 }
1439
1440 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1441         int r;
1442
1443         assert_return(s, -EINVAL);
1444         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1445         assert_return(!event_pid_changed(s->event), -ECHILD);
1446
1447         /* If we are dead anyway, we are fine with turning off
1448          * sources, but everything else needs to fail. */
1449         if (s->event->state == SD_EVENT_FINISHED)
1450                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1451
1452         if (s->enabled == m)
1453                 return 0;
1454
1455         if (m == SD_EVENT_OFF) {
1456
1457                 switch (s->type) {
1458
1459                 case SOURCE_IO:
1460                         r = source_io_unregister(s);
1461                         if (r < 0)
1462                                 return r;
1463
1464                         s->enabled = m;
1465                         break;
1466
1467                 case SOURCE_TIME_REALTIME:
1468                 case SOURCE_TIME_BOOTTIME:
1469                 case SOURCE_TIME_MONOTONIC:
1470                 case SOURCE_TIME_REALTIME_ALARM:
1471                 case SOURCE_TIME_BOOTTIME_ALARM: {
1472                         struct clock_data *d;
1473
1474                         s->enabled = m;
1475                         d = event_get_clock_data(s->event, s->type);
1476                         assert(d);
1477
1478                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1479                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1480                         d->needs_rearm = true;
1481                         break;
1482                 }
1483
1484                 case SOURCE_SIGNAL:
1485                         assert(need_signal(s->event, s->signal.sig));
1486
1487                         s->enabled = m;
1488
1489                         if (!need_signal(s->event, s->signal.sig)) {
1490                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1491
1492                                 (void) event_update_signal_fd(s->event);
1493                                 /* If disabling failed, we might get a spurious event,
1494                                  * but otherwise nothing bad should happen. */
1495                         }
1496
1497                         break;
1498
1499                 case SOURCE_CHILD:
1500                         assert(need_signal(s->event, SIGCHLD));
1501
1502                         s->enabled = m;
1503
1504                         assert(s->event->n_enabled_child_sources > 0);
1505                         s->event->n_enabled_child_sources--;
1506
1507                         if (!need_signal(s->event, SIGCHLD)) {
1508                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1509
1510                                 (void) event_update_signal_fd(s->event);
1511                         }
1512
1513                         break;
1514
1515                 case SOURCE_EXIT:
1516                         s->enabled = m;
1517                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1518                         break;
1519
1520                 case SOURCE_DEFER:
1521                 case SOURCE_POST:
1522                         s->enabled = m;
1523                         break;
1524
1525                 default:
1526                         assert_not_reached("Wut? I shouldn't exist.");
1527                 }
1528
1529         } else {
1530                 switch (s->type) {
1531
1532                 case SOURCE_IO:
1533                         r = source_io_register(s, m, s->io.events);
1534                         if (r < 0)
1535                                 return r;
1536
1537                         s->enabled = m;
1538                         break;
1539
1540                 case SOURCE_TIME_REALTIME:
1541                 case SOURCE_TIME_BOOTTIME:
1542                 case SOURCE_TIME_MONOTONIC:
1543                 case SOURCE_TIME_REALTIME_ALARM:
1544                 case SOURCE_TIME_BOOTTIME_ALARM: {
1545                         struct clock_data *d;
1546
1547                         s->enabled = m;
1548                         d = event_get_clock_data(s->event, s->type);
1549                         assert(d);
1550
1551                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1552                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1553                         d->needs_rearm = true;
1554                         break;
1555                 }
1556
1557                 case SOURCE_SIGNAL:
1558                         /* Check status before enabling. */
1559                         if (!need_signal(s->event, s->signal.sig)) {
1560                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1561
1562                                 r = event_update_signal_fd(s->event);
1563                                 if (r < 0) {
1564                                         s->enabled = SD_EVENT_OFF;
1565                                         return r;
1566                                 }
1567                         }
1568
1569                         s->enabled = m;
1570                         break;
1571
1572                 case SOURCE_CHILD:
1573                         /* Check status before enabling. */
1574                         if (s->enabled == SD_EVENT_OFF) {
1575                                 if (!need_signal(s->event, SIGCHLD)) {
1576                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1577
1578                                         r = event_update_signal_fd(s->event);
1579                                         if (r < 0) {
1580                                                 s->enabled = SD_EVENT_OFF;
1581                                                 return r;
1582                                         }
1583                                 }
1584
1585                                 s->event->n_enabled_child_sources++;
1586                         }
1587
1588                         s->enabled = m;
1589                         break;
1590
1591                 case SOURCE_EXIT:
1592                         s->enabled = m;
1593                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1594                         break;
1595
1596                 case SOURCE_DEFER:
1597                 case SOURCE_POST:
1598                         s->enabled = m;
1599                         break;
1600
1601                 default:
1602                         assert_not_reached("Wut? I shouldn't exist.");
1603                 }
1604         }
1605
1606         if (s->pending)
1607                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1608
1609         if (s->prepare)
1610                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1611
1612         return 0;
1613 }
1614
1615 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1616         assert_return(s, -EINVAL);
1617         assert_return(usec, -EINVAL);
1618         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1619         assert_return(!event_pid_changed(s->event), -ECHILD);
1620
1621         *usec = s->time.next;
1622         return 0;
1623 }
1624
1625 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1626         struct clock_data *d;
1627
1628         assert_return(s, -EINVAL);
1629         assert_return(usec != (uint64_t) -1, -EINVAL);
1630         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1631         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1632         assert_return(!event_pid_changed(s->event), -ECHILD);
1633
1634         s->time.next = usec;
1635
1636         source_set_pending(s, false);
1637
1638         d = event_get_clock_data(s->event, s->type);
1639         assert(d);
1640
1641         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1642         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1643         d->needs_rearm = true;
1644
1645         return 0;
1646 }
1647
1648 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1649         assert_return(s, -EINVAL);
1650         assert_return(usec, -EINVAL);
1651         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1652         assert_return(!event_pid_changed(s->event), -ECHILD);
1653
1654         *usec = s->time.accuracy;
1655         return 0;
1656 }
1657
1658 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1659         struct clock_data *d;
1660
1661         assert_return(s, -EINVAL);
1662         assert_return(usec != (uint64_t) -1, -EINVAL);
1663         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1664         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1665         assert_return(!event_pid_changed(s->event), -ECHILD);
1666
1667         if (usec == 0)
1668                 usec = DEFAULT_ACCURACY_USEC;
1669
1670         s->time.accuracy = usec;
1671
1672         source_set_pending(s, false);
1673
1674         d = event_get_clock_data(s->event, s->type);
1675         assert(d);
1676
1677         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1678         d->needs_rearm = true;
1679
1680         return 0;
1681 }
1682
1683 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1684         assert_return(s, -EINVAL);
1685         assert_return(clock, -EINVAL);
1686         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1687         assert_return(!event_pid_changed(s->event), -ECHILD);
1688
1689         *clock = event_source_type_to_clock(s->type);
1690         return 0;
1691 }
1692
1693 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1694         assert_return(s, -EINVAL);
1695         assert_return(pid, -EINVAL);
1696         assert_return(s->type == SOURCE_CHILD, -EDOM);
1697         assert_return(!event_pid_changed(s->event), -ECHILD);
1698
1699         *pid = s->child.pid;
1700         return 0;
1701 }
1702
1703 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1704         int r;
1705
1706         assert_return(s, -EINVAL);
1707         assert_return(s->type != SOURCE_EXIT, -EDOM);
1708         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1709         assert_return(!event_pid_changed(s->event), -ECHILD);
1710
1711         if (s->prepare == callback)
1712                 return 0;
1713
1714         if (callback && s->prepare) {
1715                 s->prepare = callback;
1716                 return 0;
1717         }
1718
1719         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1720         if (r < 0)
1721                 return r;
1722
1723         s->prepare = callback;
1724
1725         if (callback) {
1726                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1727                 if (r < 0)
1728                         return r;
1729         } else
1730                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1731
1732         return 0;
1733 }
1734
1735 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1736         assert_return(s, NULL);
1737
1738         return s->userdata;
1739 }
1740
1741 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1742         void *ret;
1743
1744         assert_return(s, NULL);
1745
1746         ret = s->userdata;
1747         s->userdata = userdata;
1748
1749         return ret;
1750 }
1751
1752 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1753         usec_t c;
1754         assert(e);
1755         assert(a <= b);
1756
1757         if (a <= 0)
1758                 return 0;
1759
1760         if (b <= a + 1)
1761                 return a;
1762
1763         initialize_perturb(e);
1764
1765         /*
1766           Find a good time to wake up again between times a and b. We
1767           have two goals here:
1768
1769           a) We want to wake up as seldom as possible, hence prefer
1770              later times over earlier times.
1771
1772           b) But if we have to wake up, then let's make sure to
1773              dispatch as much as possible on the entire system.
1774
1775           We implement this by waking up everywhere at the same time
1776           within any given minute if we can, synchronised via the
1777           perturbation value determined from the boot ID. If we can't,
1778           then we try to find the same spot in every 10s, then 1s and
1779           then 250ms step. Otherwise, we pick the last possible time
1780           to wake up.
1781         */
1782
1783         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1784         if (c >= b) {
1785                 if (_unlikely_(c < USEC_PER_MINUTE))
1786                         return b;
1787
1788                 c -= USEC_PER_MINUTE;
1789         }
1790
1791         if (c >= a)
1792                 return c;
1793
1794         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1795         if (c >= b) {
1796                 if (_unlikely_(c < USEC_PER_SEC*10))
1797                         return b;
1798
1799                 c -= USEC_PER_SEC*10;
1800         }
1801
1802         if (c >= a)
1803                 return c;
1804
1805         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1806         if (c >= b) {
1807                 if (_unlikely_(c < USEC_PER_SEC))
1808                         return b;
1809
1810                 c -= USEC_PER_SEC;
1811         }
1812
1813         if (c >= a)
1814                 return c;
1815
1816         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1817         if (c >= b) {
1818                 if (_unlikely_(c < USEC_PER_MSEC*250))
1819                         return b;
1820
1821                 c -= USEC_PER_MSEC*250;
1822         }
1823
1824         if (c >= a)
1825                 return c;
1826
1827         return b;
1828 }
1829
1830 static int event_arm_timer(
1831                 sd_event *e,
1832                 struct clock_data *d) {
1833
1834         struct itimerspec its = {};
1835         sd_event_source *a, *b;
1836         usec_t t;
1837         int r;
1838
1839         assert(e);
1840         assert(d);
1841
1842         if (!d->needs_rearm)
1843                 return 0;
1844         else
1845                 d->needs_rearm = false;
1846
1847         a = prioq_peek(d->earliest);
1848         if (!a || a->enabled == SD_EVENT_OFF) {
1849
1850                 if (d->fd < 0)
1851                         return 0;
1852
1853                 if (d->next == USEC_INFINITY)
1854                         return 0;
1855
1856                 /* disarm */
1857                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1858                 if (r < 0)
1859                         return r;
1860
1861                 d->next = USEC_INFINITY;
1862                 return 0;
1863         }
1864
1865         b = prioq_peek(d->latest);
1866         assert_se(b && b->enabled != SD_EVENT_OFF);
1867
1868         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1869         if (d->next == t)
1870                 return 0;
1871
1872         assert_se(d->fd >= 0);
1873
1874         if (t == 0) {
1875                 /* We don' want to disarm here, just mean some time looooong ago. */
1876                 its.it_value.tv_sec = 0;
1877                 its.it_value.tv_nsec = 1;
1878         } else
1879                 timespec_store(&its.it_value, t);
1880
1881         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1882         if (r < 0)
1883                 return -errno;
1884
1885         d->next = t;
1886         return 0;
1887 }
1888
1889 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1890         assert(e);
1891         assert(s);
1892         assert(s->type == SOURCE_IO);
1893
1894         /* If the event source was already pending, we just OR in the
1895          * new revents, otherwise we reset the value. The ORing is
1896          * necessary to handle EPOLLONESHOT events properly where
1897          * readability might happen independently of writability, and
1898          * we need to keep track of both */
1899
1900         if (s->pending)
1901                 s->io.revents |= revents;
1902         else
1903                 s->io.revents = revents;
1904
1905         return source_set_pending(s, true);
1906 }
1907
1908 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1909         uint64_t x;
1910         ssize_t ss;
1911
1912         assert(e);
1913         assert(fd >= 0);
1914
1915         assert_return(events == EPOLLIN, -EIO);
1916
1917         ss = read(fd, &x, sizeof(x));
1918         if (ss < 0) {
1919                 if (errno == EAGAIN || errno == EINTR)
1920                         return 0;
1921
1922                 return -errno;
1923         }
1924
1925         if (_unlikely_(ss != sizeof(x)))
1926                 return -EIO;
1927
1928         if (next)
1929                 *next = USEC_INFINITY;
1930
1931         return 0;
1932 }
1933
1934 static int process_timer(
1935                 sd_event *e,
1936                 usec_t n,
1937                 struct clock_data *d) {
1938
1939         sd_event_source *s;
1940         int r;
1941
1942         assert(e);
1943         assert(d);
1944
1945         for (;;) {
1946                 s = prioq_peek(d->earliest);
1947                 if (!s ||
1948                     s->time.next > n ||
1949                     s->enabled == SD_EVENT_OFF ||
1950                     s->pending)
1951                         break;
1952
1953                 r = source_set_pending(s, true);
1954                 if (r < 0)
1955                         return r;
1956
1957                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1958                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1959                 d->needs_rearm = true;
1960         }
1961
1962         return 0;
1963 }
1964
1965 static int process_child(sd_event *e) {
1966         sd_event_source *s;
1967         Iterator i;
1968         int r;
1969
1970         assert(e);
1971
1972         e->need_process_child = false;
1973
1974         /*
1975            So, this is ugly. We iteratively invoke waitid() with P_PID
1976            + WNOHANG for each PID we wait for, instead of using
1977            P_ALL. This is because we only want to get child
1978            information of very specific child processes, and not all
1979            of them. We might not have processed the SIGCHLD even of a
1980            previous invocation and we don't want to maintain a
1981            unbounded *per-child* event queue, hence we really don't
1982            want anything flushed out of the kernel's queue that we
1983            don't care about. Since this is O(n) this means that if you
1984            have a lot of processes you probably want to handle SIGCHLD
1985            yourself.
1986
1987            We do not reap the children here (by using WNOWAIT), this
1988            is only done after the event source is dispatched so that
1989            the callback still sees the process as a zombie.
1990         */
1991
1992         HASHMAP_FOREACH(s, e->child_sources, i) {
1993                 assert(s->type == SOURCE_CHILD);
1994
1995                 if (s->pending)
1996                         continue;
1997
1998                 if (s->enabled == SD_EVENT_OFF)
1999                         continue;
2000
2001                 zero(s->child.siginfo);
2002                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2003                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2004                 if (r < 0)
2005                         return -errno;
2006
2007                 if (s->child.siginfo.si_pid != 0) {
2008                         bool zombie =
2009                                 s->child.siginfo.si_code == CLD_EXITED ||
2010                                 s->child.siginfo.si_code == CLD_KILLED ||
2011                                 s->child.siginfo.si_code == CLD_DUMPED;
2012
2013                         if (!zombie && (s->child.options & WEXITED)) {
2014                                 /* If the child isn't dead then let's
2015                                  * immediately remove the state change
2016                                  * from the queue, since there's no
2017                                  * benefit in leaving it queued */
2018
2019                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2020                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2021                         }
2022
2023                         r = source_set_pending(s, true);
2024                         if (r < 0)
2025                                 return r;
2026                 }
2027         }
2028
2029         return 0;
2030 }
2031
2032 static int process_signal(sd_event *e, uint32_t events) {
2033         bool read_one = false;
2034         int r;
2035
2036         assert(e);
2037
2038         assert_return(events == EPOLLIN, -EIO);
2039
2040         for (;;) {
2041                 struct signalfd_siginfo si;
2042                 ssize_t n;
2043                 sd_event_source *s = NULL;
2044
2045                 n = read(e->signal_fd, &si, sizeof(si));
2046                 if (n < 0) {
2047                         if (errno == EAGAIN || errno == EINTR)
2048                                 return read_one;
2049
2050                         return -errno;
2051                 }
2052
2053                 if (_unlikely_(n != sizeof(si)))
2054                         return -EIO;
2055
2056                 assert(si.ssi_signo < _NSIG);
2057
2058                 read_one = true;
2059
2060                 if (si.ssi_signo == SIGCHLD) {
2061                         r = process_child(e);
2062                         if (r < 0)
2063                                 return r;
2064                         if (r > 0)
2065                                 continue;
2066                 }
2067
2068                 if (e->signal_sources)
2069                         s = e->signal_sources[si.ssi_signo];
2070
2071                 if (!s)
2072                         continue;
2073
2074                 s->signal.siginfo = si;
2075                 r = source_set_pending(s, true);
2076                 if (r < 0)
2077                         return r;
2078         }
2079 }
2080
2081 static int source_dispatch(sd_event_source *s) {
2082         int r = 0;
2083
2084         assert(s);
2085         assert(s->pending || s->type == SOURCE_EXIT);
2086
2087         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2088                 r = source_set_pending(s, false);
2089                 if (r < 0)
2090                         return r;
2091         }
2092
2093         if (s->type != SOURCE_POST) {
2094                 sd_event_source *z;
2095                 Iterator i;
2096
2097                 /* If we execute a non-post source, let's mark all
2098                  * post sources as pending */
2099
2100                 SET_FOREACH(z, s->event->post_sources, i) {
2101                         if (z->enabled == SD_EVENT_OFF)
2102                                 continue;
2103
2104                         r = source_set_pending(z, true);
2105                         if (r < 0)
2106                                 return r;
2107                 }
2108         }
2109
2110         if (s->enabled == SD_EVENT_ONESHOT) {
2111                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2112                 if (r < 0)
2113                         return r;
2114         }
2115
2116         s->dispatching = true;
2117
2118         switch (s->type) {
2119
2120         case SOURCE_IO:
2121                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2122                 break;
2123
2124         case SOURCE_TIME_REALTIME:
2125         case SOURCE_TIME_BOOTTIME:
2126         case SOURCE_TIME_MONOTONIC:
2127         case SOURCE_TIME_REALTIME_ALARM:
2128         case SOURCE_TIME_BOOTTIME_ALARM:
2129                 r = s->time.callback(s, s->time.next, s->userdata);
2130                 break;
2131
2132         case SOURCE_SIGNAL:
2133                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2134                 break;
2135
2136         case SOURCE_CHILD: {
2137                 bool zombie;
2138
2139                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2140                          s->child.siginfo.si_code == CLD_KILLED ||
2141                          s->child.siginfo.si_code == CLD_DUMPED;
2142
2143                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2144
2145                 /* Now, reap the PID for good. */
2146                 if (zombie)
2147                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2148
2149                 break;
2150         }
2151
2152         case SOURCE_DEFER:
2153                 r = s->defer.callback(s, s->userdata);
2154                 break;
2155
2156         case SOURCE_POST:
2157                 r = s->post.callback(s, s->userdata);
2158                 break;
2159
2160         case SOURCE_EXIT:
2161                 r = s->exit.callback(s, s->userdata);
2162                 break;
2163
2164         case SOURCE_WATCHDOG:
2165         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2166         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2167                 assert_not_reached("Wut? I shouldn't exist.");
2168         }
2169
2170         s->dispatching = false;
2171
2172         if (r < 0) {
2173                 if (s->description)
2174                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2175                 else
2176                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2177         }
2178
2179         if (s->n_ref == 0)
2180                 source_free(s);
2181         else if (r < 0)
2182                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2183
2184         return 1;
2185 }
2186
2187 static int event_prepare(sd_event *e) {
2188         int r;
2189
2190         assert(e);
2191
2192         for (;;) {
2193                 sd_event_source *s;
2194
2195                 s = prioq_peek(e->prepare);
2196                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2197                         break;
2198
2199                 s->prepare_iteration = e->iteration;
2200                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2201                 if (r < 0)
2202                         return r;
2203
2204                 assert(s->prepare);
2205
2206                 s->dispatching = true;
2207                 r = s->prepare(s, s->userdata);
2208                 s->dispatching = false;
2209
2210                 if (r < 0) {
2211                         if (s->description)
2212                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2213                         else
2214                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2215                 }
2216
2217                 if (s->n_ref == 0)
2218                         source_free(s);
2219                 else if (r < 0)
2220                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2221         }
2222
2223         return 0;
2224 }
2225
2226 static int dispatch_exit(sd_event *e) {
2227         sd_event_source *p;
2228         int r;
2229
2230         assert(e);
2231
2232         p = prioq_peek(e->exit);
2233         if (!p || p->enabled == SD_EVENT_OFF) {
2234                 e->state = SD_EVENT_FINISHED;
2235                 return 0;
2236         }
2237
2238         sd_event_ref(e);
2239         e->iteration++;
2240         e->state = SD_EVENT_EXITING;
2241
2242         r = source_dispatch(p);
2243
2244         e->state = SD_EVENT_INITIAL;
2245         sd_event_unref(e);
2246
2247         return r;
2248 }
2249
2250 static sd_event_source* event_next_pending(sd_event *e) {
2251         sd_event_source *p;
2252
2253         assert(e);
2254
2255         p = prioq_peek(e->pending);
2256         if (!p)
2257                 return NULL;
2258
2259         if (p->enabled == SD_EVENT_OFF)
2260                 return NULL;
2261
2262         return p;
2263 }
2264
2265 static int arm_watchdog(sd_event *e) {
2266         struct itimerspec its = {};
2267         usec_t t;
2268         int r;
2269
2270         assert(e);
2271         assert(e->watchdog_fd >= 0);
2272
2273         t = sleep_between(e,
2274                           e->watchdog_last + (e->watchdog_period / 2),
2275                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2276
2277         timespec_store(&its.it_value, t);
2278
2279         /* Make sure we never set the watchdog to 0, which tells the
2280          * kernel to disable it. */
2281         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2282                 its.it_value.tv_nsec = 1;
2283
2284         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2285         if (r < 0)
2286                 return -errno;
2287
2288         return 0;
2289 }
2290
2291 static int process_watchdog(sd_event *e) {
2292         assert(e);
2293
2294         if (!e->watchdog)
2295                 return 0;
2296
2297         /* Don't notify watchdog too often */
2298         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2299                 return 0;
2300
2301         sd_notify(false, "WATCHDOG=1");
2302         e->watchdog_last = e->timestamp.monotonic;
2303
2304         return arm_watchdog(e);
2305 }
2306
2307 _public_ int sd_event_prepare(sd_event *e) {
2308         int r;
2309
2310         assert_return(e, -EINVAL);
2311         assert_return(!event_pid_changed(e), -ECHILD);
2312         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2313         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2314
2315         if (e->exit_requested)
2316                 goto pending;
2317
2318         e->iteration++;
2319
2320         r = event_prepare(e);
2321         if (r < 0)
2322                 return r;
2323
2324         r = event_arm_timer(e, &e->realtime);
2325         if (r < 0)
2326                 return r;
2327
2328         r = event_arm_timer(e, &e->boottime);
2329         if (r < 0)
2330                 return r;
2331
2332         r = event_arm_timer(e, &e->monotonic);
2333         if (r < 0)
2334                 return r;
2335
2336         r = event_arm_timer(e, &e->realtime_alarm);
2337         if (r < 0)
2338                 return r;
2339
2340         r = event_arm_timer(e, &e->boottime_alarm);
2341         if (r < 0)
2342                 return r;
2343
2344         if (event_next_pending(e) || e->need_process_child)
2345                 goto pending;
2346
2347         e->state = SD_EVENT_ARMED;
2348
2349         return 0;
2350
2351 pending:
2352         e->state = SD_EVENT_ARMED;
2353         r = sd_event_wait(e, 0);
2354         if (r == 0)
2355                 e->state = SD_EVENT_ARMED;
2356
2357         return r;
2358 }
2359
2360 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2361         struct epoll_event *ev_queue;
2362         unsigned ev_queue_max;
2363         int r, m, i;
2364
2365         assert_return(e, -EINVAL);
2366         assert_return(!event_pid_changed(e), -ECHILD);
2367         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2368         assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2369
2370         if (e->exit_requested) {
2371                 e->state = SD_EVENT_PENDING;
2372                 return 1;
2373         }
2374
2375         ev_queue_max = MAX(e->n_sources, 1u);
2376         ev_queue = newa(struct epoll_event, ev_queue_max);
2377
2378         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2379                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2380         if (m < 0) {
2381                 if (errno == EINTR) {
2382                         e->state = SD_EVENT_PENDING;
2383                         return 1;
2384                 }
2385
2386                 r = -errno;
2387                 goto finish;
2388         }
2389
2390         dual_timestamp_get(&e->timestamp);
2391         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2392
2393         for (i = 0; i < m; i++) {
2394
2395                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2396                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2397                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2398                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2399                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2400                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2401                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2402                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2403                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2404                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2405                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2406                         r = process_signal(e, ev_queue[i].events);
2407                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2408                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2409                 else
2410                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2411
2412                 if (r < 0)
2413                         goto finish;
2414         }
2415
2416         r = process_watchdog(e);
2417         if (r < 0)
2418                 goto finish;
2419
2420         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2421         if (r < 0)
2422                 goto finish;
2423
2424         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2425         if (r < 0)
2426                 goto finish;
2427
2428         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2429         if (r < 0)
2430                 goto finish;
2431
2432         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2433         if (r < 0)
2434                 goto finish;
2435
2436         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2437         if (r < 0)
2438                 goto finish;
2439
2440         if (e->need_process_child) {
2441                 r = process_child(e);
2442                 if (r < 0)
2443                         goto finish;
2444         }
2445
2446         if (event_next_pending(e)) {
2447                 e->state = SD_EVENT_PENDING;
2448
2449                 return 1;
2450         }
2451
2452         r = 0;
2453
2454 finish:
2455         e->state = SD_EVENT_INITIAL;
2456
2457         return r;
2458 }
2459
2460 _public_ int sd_event_dispatch(sd_event *e) {
2461         sd_event_source *p;
2462         int r;
2463
2464         assert_return(e, -EINVAL);
2465         assert_return(!event_pid_changed(e), -ECHILD);
2466         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2467         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2468
2469         if (e->exit_requested)
2470                 return dispatch_exit(e);
2471
2472         p = event_next_pending(e);
2473         if (p) {
2474                 sd_event_ref(e);
2475
2476                 e->state = SD_EVENT_RUNNING;
2477                 r = source_dispatch(p);
2478                 e->state = SD_EVENT_INITIAL;
2479
2480                 sd_event_unref(e);
2481
2482                 return r;
2483         }
2484
2485         e->state = SD_EVENT_INITIAL;
2486
2487         return 1;
2488 }
2489
2490 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2491         int r;
2492
2493         assert_return(e, -EINVAL);
2494         assert_return(!event_pid_changed(e), -ECHILD);
2495         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2496         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2497
2498         r = sd_event_prepare(e);
2499         if (r == 0)
2500                 /* There was nothing? Then wait... */
2501                 r = sd_event_wait(e, timeout);
2502
2503         if (r > 0) {
2504                 /* There's something now, then let's dispatch it */
2505                 r = sd_event_dispatch(e);
2506                 if (r < 0)
2507                         return r;
2508
2509                 return 1;
2510         }
2511
2512         return r;
2513 }
2514
2515 _public_ int sd_event_loop(sd_event *e) {
2516         int r;
2517
2518         assert_return(e, -EINVAL);
2519         assert_return(!event_pid_changed(e), -ECHILD);
2520         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2521
2522         sd_event_ref(e);
2523
2524         while (e->state != SD_EVENT_FINISHED) {
2525                 r = sd_event_run(e, (uint64_t) -1);
2526                 if (r < 0)
2527                         goto finish;
2528         }
2529
2530         r = e->exit_code;
2531
2532 finish:
2533         sd_event_unref(e);
2534         return r;
2535 }
2536
2537 _public_ int sd_event_get_fd(sd_event *e) {
2538
2539         assert_return(e, -EINVAL);
2540         assert_return(!event_pid_changed(e), -ECHILD);
2541
2542         return e->epoll_fd;
2543 }
2544
2545 _public_ int sd_event_get_state(sd_event *e) {
2546         assert_return(e, -EINVAL);
2547         assert_return(!event_pid_changed(e), -ECHILD);
2548
2549         return e->state;
2550 }
2551
2552 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2553         assert_return(e, -EINVAL);
2554         assert_return(code, -EINVAL);
2555         assert_return(!event_pid_changed(e), -ECHILD);
2556
2557         if (!e->exit_requested)
2558                 return -ENODATA;
2559
2560         *code = e->exit_code;
2561         return 0;
2562 }
2563
2564 _public_ int sd_event_exit(sd_event *e, int code) {
2565         assert_return(e, -EINVAL);
2566         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2567         assert_return(!event_pid_changed(e), -ECHILD);
2568
2569         e->exit_requested = true;
2570         e->exit_code = code;
2571
2572         return 0;
2573 }
2574
2575 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2576         assert_return(e, -EINVAL);
2577         assert_return(usec, -EINVAL);
2578         assert_return(!event_pid_changed(e), -ECHILD);
2579
2580         /* If we haven't run yet, just get the actual time */
2581         if (!dual_timestamp_is_set(&e->timestamp))
2582                 return -ENODATA;
2583
2584         switch (clock) {
2585
2586         case CLOCK_REALTIME:
2587         case CLOCK_REALTIME_ALARM:
2588                 *usec = e->timestamp.realtime;
2589                 break;
2590
2591         case CLOCK_MONOTONIC:
2592                 *usec = e->timestamp.monotonic;
2593                 break;
2594
2595         case CLOCK_BOOTTIME:
2596         case CLOCK_BOOTTIME_ALARM:
2597                 *usec = e->timestamp_boottime;
2598                 break;
2599         }
2600
2601         return 0;
2602 }
2603
2604 _public_ int sd_event_default(sd_event **ret) {
2605
2606         static thread_local sd_event *default_event = NULL;
2607         sd_event *e = NULL;
2608         int r;
2609
2610         if (!ret)
2611                 return !!default_event;
2612
2613         if (default_event) {
2614                 *ret = sd_event_ref(default_event);
2615                 return 0;
2616         }
2617
2618         r = sd_event_new(&e);
2619         if (r < 0)
2620                 return r;
2621
2622         e->default_event_ptr = &default_event;
2623         e->tid = gettid();
2624         default_event = e;
2625
2626         *ret = e;
2627         return 1;
2628 }
2629
2630 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2631         assert_return(e, -EINVAL);
2632         assert_return(tid, -EINVAL);
2633         assert_return(!event_pid_changed(e), -ECHILD);
2634
2635         if (e->tid != 0) {
2636                 *tid = e->tid;
2637                 return 0;
2638         }
2639
2640         return -ENXIO;
2641 }
2642
2643 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2644         int r;
2645
2646         assert_return(e, -EINVAL);
2647         assert_return(!event_pid_changed(e), -ECHILD);
2648
2649         if (e->watchdog == !!b)
2650                 return e->watchdog;
2651
2652         if (b) {
2653                 struct epoll_event ev = {};
2654
2655                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2656                 if (r <= 0)
2657                         return r;
2658
2659                 /* Issue first ping immediately */
2660                 sd_notify(false, "WATCHDOG=1");
2661                 e->watchdog_last = now(CLOCK_MONOTONIC);
2662
2663                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2664                 if (e->watchdog_fd < 0)
2665                         return -errno;
2666
2667                 r = arm_watchdog(e);
2668                 if (r < 0)
2669                         goto fail;
2670
2671                 ev.events = EPOLLIN;
2672                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2673
2674                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2675                 if (r < 0) {
2676                         r = -errno;
2677                         goto fail;
2678                 }
2679
2680         } else {
2681                 if (e->watchdog_fd >= 0) {
2682                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2683                         e->watchdog_fd = safe_close(e->watchdog_fd);
2684                 }
2685         }
2686
2687         e->watchdog = !!b;
2688         return e->watchdog;
2689
2690 fail:
2691         e->watchdog_fd = safe_close(e->watchdog_fd);
2692         return r;
2693 }
2694
2695 _public_ int sd_event_get_watchdog(sd_event *e) {
2696         assert_return(e, -EINVAL);
2697         assert_return(!event_pid_changed(e), -ECHILD);
2698
2699         return e->watchdog;
2700 }