chiark / gitweb /
sd-event: make errors on EPOLL_CTL_DEL pseudo-fatal
[elogind.git] / src / libelogind / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36 #include "signal-util.h"
37
38 #include "sd-event.h"
39
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_BOOTTIME,
46         SOURCE_TIME_MONOTONIC,
47         SOURCE_TIME_REALTIME_ALARM,
48         SOURCE_TIME_BOOTTIME_ALARM,
49         SOURCE_SIGNAL,
50         SOURCE_CHILD,
51         SOURCE_DEFER,
52         SOURCE_POST,
53         SOURCE_EXIT,
54         SOURCE_WATCHDOG,
55         _SOURCE_EVENT_SOURCE_TYPE_MAX,
56         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60
61 struct sd_event_source {
62         unsigned n_ref;
63
64         sd_event *event;
65         void *userdata;
66         sd_event_handler_t prepare;
67
68         char *description;
69
70         EventSourceType type:5;
71         int enabled:3;
72         bool pending:1;
73         bool dispatching:1;
74         bool floating:1;
75
76         int64_t priority;
77         unsigned pending_index;
78         unsigned prepare_index;
79         unsigned pending_iteration;
80         unsigned prepare_iteration;
81
82         LIST_FIELDS(sd_event_source, sources);
83
84         union {
85                 struct {
86                         sd_event_io_handler_t callback;
87                         int fd;
88                         uint32_t events;
89                         uint32_t revents;
90                         bool registered:1;
91                 } io;
92                 struct {
93                         sd_event_time_handler_t callback;
94                         usec_t next, accuracy;
95                         unsigned earliest_index;
96                         unsigned latest_index;
97                 } time;
98                 struct {
99                         sd_event_signal_handler_t callback;
100                         struct signalfd_siginfo siginfo;
101                         int sig;
102                 } signal;
103                 struct {
104                         sd_event_child_handler_t callback;
105                         siginfo_t siginfo;
106                         pid_t pid;
107                         int options;
108                 } child;
109                 struct {
110                         sd_event_handler_t callback;
111                 } defer;
112                 struct {
113                         sd_event_handler_t callback;
114                 } post;
115                 struct {
116                         sd_event_handler_t callback;
117                         unsigned prioq_index;
118                 } exit;
119         };
120 };
121
122 struct clock_data {
123         int fd;
124
125         /* For all clocks we maintain two priority queues each, one
126          * ordered for the earliest times the events may be
127          * dispatched, and one ordered by the latest times they must
128          * have been dispatched. The range between the top entries in
129          * the two prioqs is the time window we can freely schedule
130          * wakeups in */
131
132         Prioq *earliest;
133         Prioq *latest;
134         usec_t next;
135
136         bool needs_rearm:1;
137 };
138
139 struct sd_event {
140         unsigned n_ref;
141
142         int epoll_fd;
143         int signal_fd;
144         int watchdog_fd;
145
146         Prioq *pending;
147         Prioq *prepare;
148
149         /* timerfd_create() only supports these five clocks so far. We
150          * can add support for more clocks when the kernel learns to
151          * deal with them, too. */
152         struct clock_data realtime;
153         struct clock_data boottime;
154         struct clock_data monotonic;
155         struct clock_data realtime_alarm;
156         struct clock_data boottime_alarm;
157
158         usec_t perturb;
159
160         sigset_t sigset;
161         sd_event_source **signal_sources;
162
163         Hashmap *child_sources;
164         unsigned n_enabled_child_sources;
165
166         Set *post_sources;
167
168         Prioq *exit;
169
170         pid_t original_pid;
171
172         unsigned iteration;
173         dual_timestamp timestamp;
174         usec_t timestamp_boottime;
175         int state;
176
177         bool exit_requested:1;
178         bool need_process_child:1;
179         bool watchdog:1;
180
181         int exit_code;
182
183         pid_t tid;
184         sd_event **default_event_ptr;
185
186         usec_t watchdog_last, watchdog_period;
187
188         unsigned n_sources;
189
190         LIST_HEAD(sd_event_source, sources);
191 };
192
193 static void source_disconnect(sd_event_source *s);
194
195 static int pending_prioq_compare(const void *a, const void *b) {
196         const sd_event_source *x = a, *y = b;
197
198         assert(x->pending);
199         assert(y->pending);
200
201         /* Enabled ones first */
202         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203                 return -1;
204         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205                 return 1;
206
207         /* Lower priority values first */
208         if (x->priority < y->priority)
209                 return -1;
210         if (x->priority > y->priority)
211                 return 1;
212
213         /* Older entries first */
214         if (x->pending_iteration < y->pending_iteration)
215                 return -1;
216         if (x->pending_iteration > y->pending_iteration)
217                 return 1;
218
219         /* Stability for the rest */
220         if (x < y)
221                 return -1;
222         if (x > y)
223                 return 1;
224
225         return 0;
226 }
227
228 static int prepare_prioq_compare(const void *a, const void *b) {
229         const sd_event_source *x = a, *y = b;
230
231         assert(x->prepare);
232         assert(y->prepare);
233
234         /* Move most recently prepared ones last, so that we can stop
235          * preparing as soon as we hit one that has already been
236          * prepared in the current iteration */
237         if (x->prepare_iteration < y->prepare_iteration)
238                 return -1;
239         if (x->prepare_iteration > y->prepare_iteration)
240                 return 1;
241
242         /* Enabled ones first */
243         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
244                 return -1;
245         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
246                 return 1;
247
248         /* Lower priority values first */
249         if (x->priority < y->priority)
250                 return -1;
251         if (x->priority > y->priority)
252                 return 1;
253
254         /* Stability for the rest */
255         if (x < y)
256                 return -1;
257         if (x > y)
258                 return 1;
259
260         return 0;
261 }
262
263 static int earliest_time_prioq_compare(const void *a, const void *b) {
264         const sd_event_source *x = a, *y = b;
265
266         assert(EVENT_SOURCE_IS_TIME(x->type));
267         assert(x->type == y->type);
268
269         /* Enabled ones first */
270         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
271                 return -1;
272         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
273                 return 1;
274
275         /* Move the pending ones to the end */
276         if (!x->pending && y->pending)
277                 return -1;
278         if (x->pending && !y->pending)
279                 return 1;
280
281         /* Order by time */
282         if (x->time.next < y->time.next)
283                 return -1;
284         if (x->time.next > y->time.next)
285                 return 1;
286
287         /* Stability for the rest */
288         if (x < y)
289                 return -1;
290         if (x > y)
291                 return 1;
292
293         return 0;
294 }
295
296 static int latest_time_prioq_compare(const void *a, const void *b) {
297         const sd_event_source *x = a, *y = b;
298
299         assert(EVENT_SOURCE_IS_TIME(x->type));
300         assert(x->type == y->type);
301
302         /* Enabled ones first */
303         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304                 return -1;
305         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
306                 return 1;
307
308         /* Move the pending ones to the end */
309         if (!x->pending && y->pending)
310                 return -1;
311         if (x->pending && !y->pending)
312                 return 1;
313
314         /* Order by time */
315         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
316                 return -1;
317         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
318                 return 1;
319
320         /* Stability for the rest */
321         if (x < y)
322                 return -1;
323         if (x > y)
324                 return 1;
325
326         return 0;
327 }
328
329 static int exit_prioq_compare(const void *a, const void *b) {
330         const sd_event_source *x = a, *y = b;
331
332         assert(x->type == SOURCE_EXIT);
333         assert(y->type == SOURCE_EXIT);
334
335         /* Enabled ones first */
336         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
337                 return -1;
338         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
339                 return 1;
340
341         /* Lower priority values first */
342         if (x->priority < y->priority)
343                 return -1;
344         if (x->priority > y->priority)
345                 return 1;
346
347         /* Stability for the rest */
348         if (x < y)
349                 return -1;
350         if (x > y)
351                 return 1;
352
353         return 0;
354 }
355
356 static void free_clock_data(struct clock_data *d) {
357         assert(d);
358
359         safe_close(d->fd);
360         prioq_free(d->earliest);
361         prioq_free(d->latest);
362 }
363
364 static void event_free(sd_event *e) {
365         sd_event_source *s;
366
367         assert(e);
368
369         while ((s = e->sources)) {
370                 assert(s->floating);
371                 source_disconnect(s);
372                 sd_event_source_unref(s);
373         }
374
375         assert(e->n_sources == 0);
376
377         if (e->default_event_ptr)
378                 *(e->default_event_ptr) = NULL;
379
380         safe_close(e->epoll_fd);
381         safe_close(e->signal_fd);
382         safe_close(e->watchdog_fd);
383
384         free_clock_data(&e->realtime);
385         free_clock_data(&e->boottime);
386         free_clock_data(&e->monotonic);
387         free_clock_data(&e->realtime_alarm);
388         free_clock_data(&e->boottime_alarm);
389
390         prioq_free(e->pending);
391         prioq_free(e->prepare);
392         prioq_free(e->exit);
393
394         free(e->signal_sources);
395
396         hashmap_free(e->child_sources);
397         set_free(e->post_sources);
398         free(e);
399 }
400
401 _public_ int sd_event_new(sd_event** ret) {
402         sd_event *e;
403         int r;
404
405         assert_return(ret, -EINVAL);
406
407         e = new0(sd_event, 1);
408         if (!e)
409                 return -ENOMEM;
410
411         e->n_ref = 1;
412         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
413         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
414         e->original_pid = getpid();
415         e->perturb = USEC_INFINITY;
416
417         assert_se(sigemptyset(&e->sigset) == 0);
418
419         e->pending = prioq_new(pending_prioq_compare);
420         if (!e->pending) {
421                 r = -ENOMEM;
422                 goto fail;
423         }
424
425         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
426         if (e->epoll_fd < 0) {
427                 r = -errno;
428                 goto fail;
429         }
430
431         *ret = e;
432         return 0;
433
434 fail:
435         event_free(e);
436         return r;
437 }
438
439 _public_ sd_event* sd_event_ref(sd_event *e) {
440         assert_return(e, NULL);
441
442         assert(e->n_ref >= 1);
443         e->n_ref++;
444
445         return e;
446 }
447
448 _public_ sd_event* sd_event_unref(sd_event *e) {
449
450         if (!e)
451                 return NULL;
452
453         assert(e->n_ref >= 1);
454         e->n_ref--;
455
456         if (e->n_ref <= 0)
457                 event_free(e);
458
459         return NULL;
460 }
461
462 static bool event_pid_changed(sd_event *e) {
463         assert(e);
464
465         /* We don't support people creating an event loop and keeping
466          * it around over a fork(). Let's complain. */
467
468         return e->original_pid != getpid();
469 }
470
471 static void source_io_unregister(sd_event_source *s) {
472         int r;
473
474         assert(s);
475         assert(s->type == SOURCE_IO);
476
477         if (event_pid_changed(s->event))
478                 return;
479
480         if (!s->io.registered)
481                 return;
482
483         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
484         assert_log(r >= 0);
485
486         s->io.registered = false;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static int event_update_signal_fd(sd_event *e) {
602         struct epoll_event ev = {};
603         bool add_to_epoll;
604         int r;
605
606         assert(e);
607
608         if (event_pid_changed(e))
609                 return 0;
610
611         add_to_epoll = e->signal_fd < 0;
612
613         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
614         if (r < 0)
615                 return -errno;
616
617         e->signal_fd = r;
618
619         if (!add_to_epoll)
620                 return 0;
621
622         ev.events = EPOLLIN;
623         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
624
625         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
626         if (r < 0) {
627                 e->signal_fd = safe_close(e->signal_fd);
628                 return -errno;
629         }
630
631         return 0;
632 }
633
634 static void source_disconnect(sd_event_source *s) {
635         sd_event *event;
636
637         assert(s);
638
639         if (!s->event)
640                 return;
641
642         assert(s->event->n_sources > 0);
643
644         switch (s->type) {
645
646         case SOURCE_IO:
647                 if (s->io.fd >= 0)
648                         source_io_unregister(s);
649
650                 break;
651
652         case SOURCE_TIME_REALTIME:
653         case SOURCE_TIME_BOOTTIME:
654         case SOURCE_TIME_MONOTONIC:
655         case SOURCE_TIME_REALTIME_ALARM:
656         case SOURCE_TIME_BOOTTIME_ALARM: {
657                 struct clock_data *d;
658
659                 d = event_get_clock_data(s->event, s->type);
660                 assert(d);
661
662                 prioq_remove(d->earliest, s, &s->time.earliest_index);
663                 prioq_remove(d->latest, s, &s->time.latest_index);
664                 d->needs_rearm = true;
665                 break;
666         }
667
668         case SOURCE_SIGNAL:
669                 if (s->signal.sig > 0) {
670                         if (s->event->signal_sources)
671                                 s->event->signal_sources[s->signal.sig] = NULL;
672
673                         /* If the signal was on and now it is off... */
674                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
675                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
676
677                                 (void) event_update_signal_fd(s->event);
678                                 /* If disabling failed, we might get a spurious event,
679                                  * but otherwise nothing bad should happen. */
680                         }
681                 }
682
683                 break;
684
685         case SOURCE_CHILD:
686                 if (s->child.pid > 0) {
687                         if (s->enabled != SD_EVENT_OFF) {
688                                 assert(s->event->n_enabled_child_sources > 0);
689                                 s->event->n_enabled_child_sources--;
690
691                                 /* We know the signal was on, if it is off now... */
692                                 if (!need_signal(s->event, SIGCHLD)) {
693                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
694
695                                         (void) event_update_signal_fd(s->event);
696                                         /* If disabling failed, we might get a spurious event,
697                                          * but otherwise nothing bad should happen. */
698                                 }
699                         }
700
701                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
702                 }
703
704                 break;
705
706         case SOURCE_DEFER:
707                 /* nothing */
708                 break;
709
710         case SOURCE_POST:
711                 set_remove(s->event->post_sources, s);
712                 break;
713
714         case SOURCE_EXIT:
715                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
716                 break;
717
718         default:
719                 assert_not_reached("Wut? I shouldn't exist.");
720         }
721
722         if (s->pending)
723                 prioq_remove(s->event->pending, s, &s->pending_index);
724
725         if (s->prepare)
726                 prioq_remove(s->event->prepare, s, &s->prepare_index);
727
728         event = s->event;
729
730         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
731         s->event = NULL;
732         LIST_REMOVE(sources, event->sources, s);
733         event->n_sources--;
734
735         if (!s->floating)
736                 sd_event_unref(event);
737 }
738
739 static void source_free(sd_event_source *s) {
740         assert(s);
741
742         source_disconnect(s);
743         free(s->description);
744         free(s);
745 }
746
747 static int source_set_pending(sd_event_source *s, bool b) {
748         int r;
749
750         assert(s);
751         assert(s->type != SOURCE_EXIT);
752
753         if (s->pending == b)
754                 return 0;
755
756         s->pending = b;
757
758         if (b) {
759                 s->pending_iteration = s->event->iteration;
760
761                 r = prioq_put(s->event->pending, s, &s->pending_index);
762                 if (r < 0) {
763                         s->pending = false;
764                         return r;
765                 }
766         } else
767                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
768
769         if (EVENT_SOURCE_IS_TIME(s->type)) {
770                 struct clock_data *d;
771
772                 d = event_get_clock_data(s->event, s->type);
773                 assert(d);
774
775                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
776                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
777                 d->needs_rearm = true;
778         }
779
780         return 0;
781 }
782
783 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
784         sd_event_source *s;
785
786         assert(e);
787
788         s = new0(sd_event_source, 1);
789         if (!s)
790                 return NULL;
791
792         s->n_ref = 1;
793         s->event = e;
794         s->floating = floating;
795         s->type = type;
796         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
797
798         if (!floating)
799                 sd_event_ref(e);
800
801         LIST_PREPEND(sources, e->sources, s);
802         e->n_sources ++;
803
804         return s;
805 }
806
807 _public_ int sd_event_add_io(
808                 sd_event *e,
809                 sd_event_source **ret,
810                 int fd,
811                 uint32_t events,
812                 sd_event_io_handler_t callback,
813                 void *userdata) {
814
815         sd_event_source *s;
816         int r;
817
818         assert_return(e, -EINVAL);
819         assert_return(fd >= 0, -EINVAL);
820         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
821         assert_return(callback, -EINVAL);
822         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
823         assert_return(!event_pid_changed(e), -ECHILD);
824
825         s = source_new(e, !ret, SOURCE_IO);
826         if (!s)
827                 return -ENOMEM;
828
829         s->io.fd = fd;
830         s->io.events = events;
831         s->io.callback = callback;
832         s->userdata = userdata;
833         s->enabled = SD_EVENT_ON;
834
835         r = source_io_register(s, s->enabled, events);
836         if (r < 0) {
837                 source_free(s);
838                 return r;
839         }
840
841         if (ret)
842                 *ret = s;
843
844         return 0;
845 }
846
847 static void initialize_perturb(sd_event *e) {
848         sd_id128_t bootid = {};
849
850         /* When we sleep for longer, we try to realign the wakeup to
851            the same time wihtin each minute/second/250ms, so that
852            events all across the system can be coalesced into a single
853            CPU wakeup. However, let's take some system-specific
854            randomness for this value, so that in a network of systems
855            with synced clocks timer events are distributed a
856            bit. Here, we calculate a perturbation usec offset from the
857            boot ID. */
858
859         if (_likely_(e->perturb != USEC_INFINITY))
860                 return;
861
862         if (sd_id128_get_boot(&bootid) >= 0)
863                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
864 }
865
866 static int event_setup_timer_fd(
867                 sd_event *e,
868                 struct clock_data *d,
869                 clockid_t clock) {
870
871         struct epoll_event ev = {};
872         int r, fd;
873
874         assert(e);
875         assert(d);
876
877         if (_likely_(d->fd >= 0))
878                 return 0;
879
880         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
881         if (fd < 0)
882                 return -errno;
883
884         ev.events = EPOLLIN;
885         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
886
887         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
888         if (r < 0) {
889                 safe_close(fd);
890                 return -errno;
891         }
892
893         d->fd = fd;
894         return 0;
895 }
896
897 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
898         assert(s);
899
900         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
901 }
902
903 _public_ int sd_event_add_time(
904                 sd_event *e,
905                 sd_event_source **ret,
906                 clockid_t clock,
907                 uint64_t usec,
908                 uint64_t accuracy,
909                 sd_event_time_handler_t callback,
910                 void *userdata) {
911
912         EventSourceType type;
913         sd_event_source *s;
914         struct clock_data *d;
915         int r;
916
917         assert_return(e, -EINVAL);
918         assert_return(usec != (uint64_t) -1, -EINVAL);
919         assert_return(accuracy != (uint64_t) -1, -EINVAL);
920         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
921         assert_return(!event_pid_changed(e), -ECHILD);
922
923         if (!callback)
924                 callback = time_exit_callback;
925
926         type = clock_to_event_source_type(clock);
927         assert_return(type >= 0, -EOPNOTSUPP);
928
929         d = event_get_clock_data(e, type);
930         assert(d);
931
932         if (!d->earliest) {
933                 d->earliest = prioq_new(earliest_time_prioq_compare);
934                 if (!d->earliest)
935                         return -ENOMEM;
936         }
937
938         if (!d->latest) {
939                 d->latest = prioq_new(latest_time_prioq_compare);
940                 if (!d->latest)
941                         return -ENOMEM;
942         }
943
944         if (d->fd < 0) {
945                 r = event_setup_timer_fd(e, d, clock);
946                 if (r < 0)
947                         return r;
948         }
949
950         s = source_new(e, !ret, type);
951         if (!s)
952                 return -ENOMEM;
953
954         s->time.next = usec;
955         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
956         s->time.callback = callback;
957         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
958         s->userdata = userdata;
959         s->enabled = SD_EVENT_ONESHOT;
960
961         d->needs_rearm = true;
962
963         r = prioq_put(d->earliest, s, &s->time.earliest_index);
964         if (r < 0)
965                 goto fail;
966
967         r = prioq_put(d->latest, s, &s->time.latest_index);
968         if (r < 0)
969                 goto fail;
970
971         if (ret)
972                 *ret = s;
973
974         return 0;
975
976 fail:
977         source_free(s);
978         return r;
979 }
980
981 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
982         assert(s);
983
984         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
985 }
986
987 _public_ int sd_event_add_signal(
988                 sd_event *e,
989                 sd_event_source **ret,
990                 int sig,
991                 sd_event_signal_handler_t callback,
992                 void *userdata) {
993
994         sd_event_source *s;
995         sigset_t ss;
996         int r;
997         bool previous;
998
999         assert_return(e, -EINVAL);
1000         assert_return(sig > 0, -EINVAL);
1001         assert_return(sig < _NSIG, -EINVAL);
1002         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1003         assert_return(!event_pid_changed(e), -ECHILD);
1004
1005         if (!callback)
1006                 callback = signal_exit_callback;
1007
1008         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1009         if (r < 0)
1010                 return -errno;
1011
1012         if (!sigismember(&ss, sig))
1013                 return -EBUSY;
1014
1015         if (!e->signal_sources) {
1016                 e->signal_sources = new0(sd_event_source*, _NSIG);
1017                 if (!e->signal_sources)
1018                         return -ENOMEM;
1019         } else if (e->signal_sources[sig])
1020                 return -EBUSY;
1021
1022         previous = need_signal(e, sig);
1023
1024         s = source_new(e, !ret, SOURCE_SIGNAL);
1025         if (!s)
1026                 return -ENOMEM;
1027
1028         s->signal.sig = sig;
1029         s->signal.callback = callback;
1030         s->userdata = userdata;
1031         s->enabled = SD_EVENT_ON;
1032
1033         e->signal_sources[sig] = s;
1034
1035         if (!previous) {
1036                 assert_se(sigaddset(&e->sigset, sig) == 0);
1037
1038                 r = event_update_signal_fd(e);
1039                 if (r < 0) {
1040                         source_free(s);
1041                         return r;
1042                 }
1043         }
1044
1045         /* Use the signal name as description for the event source by default */
1046         (void) sd_event_source_set_description(s, signal_to_string(sig));
1047
1048         if (ret)
1049                 *ret = s;
1050
1051         return 0;
1052 }
1053
1054 _public_ int sd_event_add_child(
1055                 sd_event *e,
1056                 sd_event_source **ret,
1057                 pid_t pid,
1058                 int options,
1059                 sd_event_child_handler_t callback,
1060                 void *userdata) {
1061
1062         sd_event_source *s;
1063         int r;
1064         bool previous;
1065
1066         assert_return(e, -EINVAL);
1067         assert_return(pid > 1, -EINVAL);
1068         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1069         assert_return(options != 0, -EINVAL);
1070         assert_return(callback, -EINVAL);
1071         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1072         assert_return(!event_pid_changed(e), -ECHILD);
1073
1074         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1075         if (r < 0)
1076                 return r;
1077
1078         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1079                 return -EBUSY;
1080
1081         previous = need_signal(e, SIGCHLD);
1082
1083         s = source_new(e, !ret, SOURCE_CHILD);
1084         if (!s)
1085                 return -ENOMEM;
1086
1087         s->child.pid = pid;
1088         s->child.options = options;
1089         s->child.callback = callback;
1090         s->userdata = userdata;
1091         s->enabled = SD_EVENT_ONESHOT;
1092
1093         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1094         if (r < 0) {
1095                 source_free(s);
1096                 return r;
1097         }
1098
1099         e->n_enabled_child_sources ++;
1100
1101         if (!previous) {
1102                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1103
1104                 r = event_update_signal_fd(e);
1105                 if (r < 0) {
1106                         source_free(s);
1107                         return r;
1108                 }
1109         }
1110
1111         e->need_process_child = true;
1112
1113         if (ret)
1114                 *ret = s;
1115
1116         return 0;
1117 }
1118
1119 _public_ int sd_event_add_defer(
1120                 sd_event *e,
1121                 sd_event_source **ret,
1122                 sd_event_handler_t callback,
1123                 void *userdata) {
1124
1125         sd_event_source *s;
1126         int r;
1127
1128         assert_return(e, -EINVAL);
1129         assert_return(callback, -EINVAL);
1130         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1131         assert_return(!event_pid_changed(e), -ECHILD);
1132
1133         s = source_new(e, !ret, SOURCE_DEFER);
1134         if (!s)
1135                 return -ENOMEM;
1136
1137         s->defer.callback = callback;
1138         s->userdata = userdata;
1139         s->enabled = SD_EVENT_ONESHOT;
1140
1141         r = source_set_pending(s, true);
1142         if (r < 0) {
1143                 source_free(s);
1144                 return r;
1145         }
1146
1147         if (ret)
1148                 *ret = s;
1149
1150         return 0;
1151 }
1152
1153 _public_ int sd_event_add_post(
1154                 sd_event *e,
1155                 sd_event_source **ret,
1156                 sd_event_handler_t callback,
1157                 void *userdata) {
1158
1159         sd_event_source *s;
1160         int r;
1161
1162         assert_return(e, -EINVAL);
1163         assert_return(callback, -EINVAL);
1164         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1165         assert_return(!event_pid_changed(e), -ECHILD);
1166
1167         r = set_ensure_allocated(&e->post_sources, NULL);
1168         if (r < 0)
1169                 return r;
1170
1171         s = source_new(e, !ret, SOURCE_POST);
1172         if (!s)
1173                 return -ENOMEM;
1174
1175         s->post.callback = callback;
1176         s->userdata = userdata;
1177         s->enabled = SD_EVENT_ON;
1178
1179         r = set_put(e->post_sources, s);
1180         if (r < 0) {
1181                 source_free(s);
1182                 return r;
1183         }
1184
1185         if (ret)
1186                 *ret = s;
1187
1188         return 0;
1189 }
1190
1191 _public_ int sd_event_add_exit(
1192                 sd_event *e,
1193                 sd_event_source **ret,
1194                 sd_event_handler_t callback,
1195                 void *userdata) {
1196
1197         sd_event_source *s;
1198         int r;
1199
1200         assert_return(e, -EINVAL);
1201         assert_return(callback, -EINVAL);
1202         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1203         assert_return(!event_pid_changed(e), -ECHILD);
1204
1205         if (!e->exit) {
1206                 e->exit = prioq_new(exit_prioq_compare);
1207                 if (!e->exit)
1208                         return -ENOMEM;
1209         }
1210
1211         s = source_new(e, !ret, SOURCE_EXIT);
1212         if (!s)
1213                 return -ENOMEM;
1214
1215         s->exit.callback = callback;
1216         s->userdata = userdata;
1217         s->exit.prioq_index = PRIOQ_IDX_NULL;
1218         s->enabled = SD_EVENT_ONESHOT;
1219
1220         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1221         if (r < 0) {
1222                 source_free(s);
1223                 return r;
1224         }
1225
1226         if (ret)
1227                 *ret = s;
1228
1229         return 0;
1230 }
1231
1232 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1233         assert_return(s, NULL);
1234
1235         assert(s->n_ref >= 1);
1236         s->n_ref++;
1237
1238         return s;
1239 }
1240
1241 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1242
1243         if (!s)
1244                 return NULL;
1245
1246         assert(s->n_ref >= 1);
1247         s->n_ref--;
1248
1249         if (s->n_ref <= 0) {
1250                 /* Here's a special hack: when we are called from a
1251                  * dispatch handler we won't free the event source
1252                  * immediately, but we will detach the fd from the
1253                  * epoll. This way it is safe for the caller to unref
1254                  * the event source and immediately close the fd, but
1255                  * we still retain a valid event source object after
1256                  * the callback. */
1257
1258                 if (s->dispatching) {
1259                         if (s->type == SOURCE_IO)
1260                                 source_io_unregister(s);
1261
1262                         source_disconnect(s);
1263                 } else
1264                         source_free(s);
1265         }
1266
1267         return NULL;
1268 }
1269
1270 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1271         assert_return(s, -EINVAL);
1272         assert_return(!event_pid_changed(s->event), -ECHILD);
1273
1274         return free_and_strdup(&s->description, description);
1275 }
1276
1277 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1278         assert_return(s, -EINVAL);
1279         assert_return(description, -EINVAL);
1280         assert_return(s->description, -ENXIO);
1281         assert_return(!event_pid_changed(s->event), -ECHILD);
1282
1283         *description = s->description;
1284         return 0;
1285 }
1286
1287 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1288         assert_return(s, NULL);
1289
1290         return s->event;
1291 }
1292
1293 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1294         assert_return(s, -EINVAL);
1295         assert_return(s->type != SOURCE_EXIT, -EDOM);
1296         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1297         assert_return(!event_pid_changed(s->event), -ECHILD);
1298
1299         return s->pending;
1300 }
1301
1302 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1303         assert_return(s, -EINVAL);
1304         assert_return(s->type == SOURCE_IO, -EDOM);
1305         assert_return(!event_pid_changed(s->event), -ECHILD);
1306
1307         return s->io.fd;
1308 }
1309
1310 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1311         int r;
1312
1313         assert_return(s, -EINVAL);
1314         assert_return(fd >= 0, -EINVAL);
1315         assert_return(s->type == SOURCE_IO, -EDOM);
1316         assert_return(!event_pid_changed(s->event), -ECHILD);
1317
1318         if (s->io.fd == fd)
1319                 return 0;
1320
1321         if (s->enabled == SD_EVENT_OFF) {
1322                 s->io.fd = fd;
1323                 s->io.registered = false;
1324         } else {
1325                 int saved_fd;
1326
1327                 saved_fd = s->io.fd;
1328                 assert(s->io.registered);
1329
1330                 s->io.fd = fd;
1331                 s->io.registered = false;
1332
1333                 r = source_io_register(s, s->enabled, s->io.events);
1334                 if (r < 0) {
1335                         s->io.fd = saved_fd;
1336                         s->io.registered = true;
1337                         return r;
1338                 }
1339
1340                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1341         }
1342
1343         return 0;
1344 }
1345
1346 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1347         assert_return(s, -EINVAL);
1348         assert_return(events, -EINVAL);
1349         assert_return(s->type == SOURCE_IO, -EDOM);
1350         assert_return(!event_pid_changed(s->event), -ECHILD);
1351
1352         *events = s->io.events;
1353         return 0;
1354 }
1355
1356 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1357         int r;
1358
1359         assert_return(s, -EINVAL);
1360         assert_return(s->type == SOURCE_IO, -EDOM);
1361         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1362         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1363         assert_return(!event_pid_changed(s->event), -ECHILD);
1364
1365         /* edge-triggered updates are never skipped, so we can reset edges */
1366         if (s->io.events == events && !(events & EPOLLET))
1367                 return 0;
1368
1369         if (s->enabled != SD_EVENT_OFF) {
1370                 r = source_io_register(s, s->enabled, events);
1371                 if (r < 0)
1372                         return r;
1373         }
1374
1375         s->io.events = events;
1376         source_set_pending(s, false);
1377
1378         return 0;
1379 }
1380
1381 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1382         assert_return(s, -EINVAL);
1383         assert_return(revents, -EINVAL);
1384         assert_return(s->type == SOURCE_IO, -EDOM);
1385         assert_return(s->pending, -ENODATA);
1386         assert_return(!event_pid_changed(s->event), -ECHILD);
1387
1388         *revents = s->io.revents;
1389         return 0;
1390 }
1391
1392 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1393         assert_return(s, -EINVAL);
1394         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1395         assert_return(!event_pid_changed(s->event), -ECHILD);
1396
1397         return s->signal.sig;
1398 }
1399
1400 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1401         assert_return(s, -EINVAL);
1402         assert_return(!event_pid_changed(s->event), -ECHILD);
1403
1404         return s->priority;
1405 }
1406
1407 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1408         assert_return(s, -EINVAL);
1409         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1410         assert_return(!event_pid_changed(s->event), -ECHILD);
1411
1412         if (s->priority == priority)
1413                 return 0;
1414
1415         s->priority = priority;
1416
1417         if (s->pending)
1418                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1419
1420         if (s->prepare)
1421                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1422
1423         if (s->type == SOURCE_EXIT)
1424                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1425
1426         return 0;
1427 }
1428
1429 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1430         assert_return(s, -EINVAL);
1431         assert_return(m, -EINVAL);
1432         assert_return(!event_pid_changed(s->event), -ECHILD);
1433
1434         *m = s->enabled;
1435         return 0;
1436 }
1437
1438 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1439         int r;
1440
1441         assert_return(s, -EINVAL);
1442         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1443         assert_return(!event_pid_changed(s->event), -ECHILD);
1444
1445         /* If we are dead anyway, we are fine with turning off
1446          * sources, but everything else needs to fail. */
1447         if (s->event->state == SD_EVENT_FINISHED)
1448                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1449
1450         if (s->enabled == m)
1451                 return 0;
1452
1453         if (m == SD_EVENT_OFF) {
1454
1455                 switch (s->type) {
1456
1457                 case SOURCE_IO:
1458                         source_io_unregister(s);
1459                         s->enabled = m;
1460                         break;
1461
1462                 case SOURCE_TIME_REALTIME:
1463                 case SOURCE_TIME_BOOTTIME:
1464                 case SOURCE_TIME_MONOTONIC:
1465                 case SOURCE_TIME_REALTIME_ALARM:
1466                 case SOURCE_TIME_BOOTTIME_ALARM: {
1467                         struct clock_data *d;
1468
1469                         s->enabled = m;
1470                         d = event_get_clock_data(s->event, s->type);
1471                         assert(d);
1472
1473                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1474                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1475                         d->needs_rearm = true;
1476                         break;
1477                 }
1478
1479                 case SOURCE_SIGNAL:
1480                         assert(need_signal(s->event, s->signal.sig));
1481
1482                         s->enabled = m;
1483
1484                         if (!need_signal(s->event, s->signal.sig)) {
1485                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1486
1487                                 (void) event_update_signal_fd(s->event);
1488                                 /* If disabling failed, we might get a spurious event,
1489                                  * but otherwise nothing bad should happen. */
1490                         }
1491
1492                         break;
1493
1494                 case SOURCE_CHILD:
1495                         assert(need_signal(s->event, SIGCHLD));
1496
1497                         s->enabled = m;
1498
1499                         assert(s->event->n_enabled_child_sources > 0);
1500                         s->event->n_enabled_child_sources--;
1501
1502                         if (!need_signal(s->event, SIGCHLD)) {
1503                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1504
1505                                 (void) event_update_signal_fd(s->event);
1506                         }
1507
1508                         break;
1509
1510                 case SOURCE_EXIT:
1511                         s->enabled = m;
1512                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1513                         break;
1514
1515                 case SOURCE_DEFER:
1516                 case SOURCE_POST:
1517                         s->enabled = m;
1518                         break;
1519
1520                 default:
1521                         assert_not_reached("Wut? I shouldn't exist.");
1522                 }
1523
1524         } else {
1525                 switch (s->type) {
1526
1527                 case SOURCE_IO:
1528                         r = source_io_register(s, m, s->io.events);
1529                         if (r < 0)
1530                                 return r;
1531
1532                         s->enabled = m;
1533                         break;
1534
1535                 case SOURCE_TIME_REALTIME:
1536                 case SOURCE_TIME_BOOTTIME:
1537                 case SOURCE_TIME_MONOTONIC:
1538                 case SOURCE_TIME_REALTIME_ALARM:
1539                 case SOURCE_TIME_BOOTTIME_ALARM: {
1540                         struct clock_data *d;
1541
1542                         s->enabled = m;
1543                         d = event_get_clock_data(s->event, s->type);
1544                         assert(d);
1545
1546                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1547                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1548                         d->needs_rearm = true;
1549                         break;
1550                 }
1551
1552                 case SOURCE_SIGNAL:
1553                         /* Check status before enabling. */
1554                         if (!need_signal(s->event, s->signal.sig)) {
1555                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1556
1557                                 r = event_update_signal_fd(s->event);
1558                                 if (r < 0) {
1559                                         s->enabled = SD_EVENT_OFF;
1560                                         return r;
1561                                 }
1562                         }
1563
1564                         s->enabled = m;
1565                         break;
1566
1567                 case SOURCE_CHILD:
1568                         /* Check status before enabling. */
1569                         if (s->enabled == SD_EVENT_OFF) {
1570                                 if (!need_signal(s->event, SIGCHLD)) {
1571                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1572
1573                                         r = event_update_signal_fd(s->event);
1574                                         if (r < 0) {
1575                                                 s->enabled = SD_EVENT_OFF;
1576                                                 return r;
1577                                         }
1578                                 }
1579
1580                                 s->event->n_enabled_child_sources++;
1581                         }
1582
1583                         s->enabled = m;
1584                         break;
1585
1586                 case SOURCE_EXIT:
1587                         s->enabled = m;
1588                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1589                         break;
1590
1591                 case SOURCE_DEFER:
1592                 case SOURCE_POST:
1593                         s->enabled = m;
1594                         break;
1595
1596                 default:
1597                         assert_not_reached("Wut? I shouldn't exist.");
1598                 }
1599         }
1600
1601         if (s->pending)
1602                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1603
1604         if (s->prepare)
1605                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1606
1607         return 0;
1608 }
1609
1610 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1611         assert_return(s, -EINVAL);
1612         assert_return(usec, -EINVAL);
1613         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1614         assert_return(!event_pid_changed(s->event), -ECHILD);
1615
1616         *usec = s->time.next;
1617         return 0;
1618 }
1619
1620 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1621         struct clock_data *d;
1622
1623         assert_return(s, -EINVAL);
1624         assert_return(usec != (uint64_t) -1, -EINVAL);
1625         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1626         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1627         assert_return(!event_pid_changed(s->event), -ECHILD);
1628
1629         s->time.next = usec;
1630
1631         source_set_pending(s, false);
1632
1633         d = event_get_clock_data(s->event, s->type);
1634         assert(d);
1635
1636         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1637         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1638         d->needs_rearm = true;
1639
1640         return 0;
1641 }
1642
1643 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1644         assert_return(s, -EINVAL);
1645         assert_return(usec, -EINVAL);
1646         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1647         assert_return(!event_pid_changed(s->event), -ECHILD);
1648
1649         *usec = s->time.accuracy;
1650         return 0;
1651 }
1652
1653 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1654         struct clock_data *d;
1655
1656         assert_return(s, -EINVAL);
1657         assert_return(usec != (uint64_t) -1, -EINVAL);
1658         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1659         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1660         assert_return(!event_pid_changed(s->event), -ECHILD);
1661
1662         if (usec == 0)
1663                 usec = DEFAULT_ACCURACY_USEC;
1664
1665         s->time.accuracy = usec;
1666
1667         source_set_pending(s, false);
1668
1669         d = event_get_clock_data(s->event, s->type);
1670         assert(d);
1671
1672         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1673         d->needs_rearm = true;
1674
1675         return 0;
1676 }
1677
1678 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1679         assert_return(s, -EINVAL);
1680         assert_return(clock, -EINVAL);
1681         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1682         assert_return(!event_pid_changed(s->event), -ECHILD);
1683
1684         *clock = event_source_type_to_clock(s->type);
1685         return 0;
1686 }
1687
1688 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1689         assert_return(s, -EINVAL);
1690         assert_return(pid, -EINVAL);
1691         assert_return(s->type == SOURCE_CHILD, -EDOM);
1692         assert_return(!event_pid_changed(s->event), -ECHILD);
1693
1694         *pid = s->child.pid;
1695         return 0;
1696 }
1697
1698 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1699         int r;
1700
1701         assert_return(s, -EINVAL);
1702         assert_return(s->type != SOURCE_EXIT, -EDOM);
1703         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1704         assert_return(!event_pid_changed(s->event), -ECHILD);
1705
1706         if (s->prepare == callback)
1707                 return 0;
1708
1709         if (callback && s->prepare) {
1710                 s->prepare = callback;
1711                 return 0;
1712         }
1713
1714         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1715         if (r < 0)
1716                 return r;
1717
1718         s->prepare = callback;
1719
1720         if (callback) {
1721                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1722                 if (r < 0)
1723                         return r;
1724         } else
1725                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1726
1727         return 0;
1728 }
1729
1730 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1731         assert_return(s, NULL);
1732
1733         return s->userdata;
1734 }
1735
1736 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1737         void *ret;
1738
1739         assert_return(s, NULL);
1740
1741         ret = s->userdata;
1742         s->userdata = userdata;
1743
1744         return ret;
1745 }
1746
1747 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1748         usec_t c;
1749         assert(e);
1750         assert(a <= b);
1751
1752         if (a <= 0)
1753                 return 0;
1754
1755         if (b <= a + 1)
1756                 return a;
1757
1758         initialize_perturb(e);
1759
1760         /*
1761           Find a good time to wake up again between times a and b. We
1762           have two goals here:
1763
1764           a) We want to wake up as seldom as possible, hence prefer
1765              later times over earlier times.
1766
1767           b) But if we have to wake up, then let's make sure to
1768              dispatch as much as possible on the entire system.
1769
1770           We implement this by waking up everywhere at the same time
1771           within any given minute if we can, synchronised via the
1772           perturbation value determined from the boot ID. If we can't,
1773           then we try to find the same spot in every 10s, then 1s and
1774           then 250ms step. Otherwise, we pick the last possible time
1775           to wake up.
1776         */
1777
1778         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1779         if (c >= b) {
1780                 if (_unlikely_(c < USEC_PER_MINUTE))
1781                         return b;
1782
1783                 c -= USEC_PER_MINUTE;
1784         }
1785
1786         if (c >= a)
1787                 return c;
1788
1789         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1790         if (c >= b) {
1791                 if (_unlikely_(c < USEC_PER_SEC*10))
1792                         return b;
1793
1794                 c -= USEC_PER_SEC*10;
1795         }
1796
1797         if (c >= a)
1798                 return c;
1799
1800         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1801         if (c >= b) {
1802                 if (_unlikely_(c < USEC_PER_SEC))
1803                         return b;
1804
1805                 c -= USEC_PER_SEC;
1806         }
1807
1808         if (c >= a)
1809                 return c;
1810
1811         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1812         if (c >= b) {
1813                 if (_unlikely_(c < USEC_PER_MSEC*250))
1814                         return b;
1815
1816                 c -= USEC_PER_MSEC*250;
1817         }
1818
1819         if (c >= a)
1820                 return c;
1821
1822         return b;
1823 }
1824
1825 static int event_arm_timer(
1826                 sd_event *e,
1827                 struct clock_data *d) {
1828
1829         struct itimerspec its = {};
1830         sd_event_source *a, *b;
1831         usec_t t;
1832         int r;
1833
1834         assert(e);
1835         assert(d);
1836
1837         if (!d->needs_rearm)
1838                 return 0;
1839         else
1840                 d->needs_rearm = false;
1841
1842         a = prioq_peek(d->earliest);
1843         if (!a || a->enabled == SD_EVENT_OFF) {
1844
1845                 if (d->fd < 0)
1846                         return 0;
1847
1848                 if (d->next == USEC_INFINITY)
1849                         return 0;
1850
1851                 /* disarm */
1852                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1853                 if (r < 0)
1854                         return r;
1855
1856                 d->next = USEC_INFINITY;
1857                 return 0;
1858         }
1859
1860         b = prioq_peek(d->latest);
1861         assert_se(b && b->enabled != SD_EVENT_OFF);
1862
1863         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1864         if (d->next == t)
1865                 return 0;
1866
1867         assert_se(d->fd >= 0);
1868
1869         if (t == 0) {
1870                 /* We don' want to disarm here, just mean some time looooong ago. */
1871                 its.it_value.tv_sec = 0;
1872                 its.it_value.tv_nsec = 1;
1873         } else
1874                 timespec_store(&its.it_value, t);
1875
1876         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1877         if (r < 0)
1878                 return -errno;
1879
1880         d->next = t;
1881         return 0;
1882 }
1883
1884 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1885         assert(e);
1886         assert(s);
1887         assert(s->type == SOURCE_IO);
1888
1889         /* If the event source was already pending, we just OR in the
1890          * new revents, otherwise we reset the value. The ORing is
1891          * necessary to handle EPOLLONESHOT events properly where
1892          * readability might happen independently of writability, and
1893          * we need to keep track of both */
1894
1895         if (s->pending)
1896                 s->io.revents |= revents;
1897         else
1898                 s->io.revents = revents;
1899
1900         return source_set_pending(s, true);
1901 }
1902
1903 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1904         uint64_t x;
1905         ssize_t ss;
1906
1907         assert(e);
1908         assert(fd >= 0);
1909
1910         assert_return(events == EPOLLIN, -EIO);
1911
1912         ss = read(fd, &x, sizeof(x));
1913         if (ss < 0) {
1914                 if (errno == EAGAIN || errno == EINTR)
1915                         return 0;
1916
1917                 return -errno;
1918         }
1919
1920         if (_unlikely_(ss != sizeof(x)))
1921                 return -EIO;
1922
1923         if (next)
1924                 *next = USEC_INFINITY;
1925
1926         return 0;
1927 }
1928
1929 static int process_timer(
1930                 sd_event *e,
1931                 usec_t n,
1932                 struct clock_data *d) {
1933
1934         sd_event_source *s;
1935         int r;
1936
1937         assert(e);
1938         assert(d);
1939
1940         for (;;) {
1941                 s = prioq_peek(d->earliest);
1942                 if (!s ||
1943                     s->time.next > n ||
1944                     s->enabled == SD_EVENT_OFF ||
1945                     s->pending)
1946                         break;
1947
1948                 r = source_set_pending(s, true);
1949                 if (r < 0)
1950                         return r;
1951
1952                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1953                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1954                 d->needs_rearm = true;
1955         }
1956
1957         return 0;
1958 }
1959
1960 static int process_child(sd_event *e) {
1961         sd_event_source *s;
1962         Iterator i;
1963         int r;
1964
1965         assert(e);
1966
1967         e->need_process_child = false;
1968
1969         /*
1970            So, this is ugly. We iteratively invoke waitid() with P_PID
1971            + WNOHANG for each PID we wait for, instead of using
1972            P_ALL. This is because we only want to get child
1973            information of very specific child processes, and not all
1974            of them. We might not have processed the SIGCHLD even of a
1975            previous invocation and we don't want to maintain a
1976            unbounded *per-child* event queue, hence we really don't
1977            want anything flushed out of the kernel's queue that we
1978            don't care about. Since this is O(n) this means that if you
1979            have a lot of processes you probably want to handle SIGCHLD
1980            yourself.
1981
1982            We do not reap the children here (by using WNOWAIT), this
1983            is only done after the event source is dispatched so that
1984            the callback still sees the process as a zombie.
1985         */
1986
1987         HASHMAP_FOREACH(s, e->child_sources, i) {
1988                 assert(s->type == SOURCE_CHILD);
1989
1990                 if (s->pending)
1991                         continue;
1992
1993                 if (s->enabled == SD_EVENT_OFF)
1994                         continue;
1995
1996                 zero(s->child.siginfo);
1997                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1998                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1999                 if (r < 0)
2000                         return -errno;
2001
2002                 if (s->child.siginfo.si_pid != 0) {
2003                         bool zombie =
2004                                 s->child.siginfo.si_code == CLD_EXITED ||
2005                                 s->child.siginfo.si_code == CLD_KILLED ||
2006                                 s->child.siginfo.si_code == CLD_DUMPED;
2007
2008                         if (!zombie && (s->child.options & WEXITED)) {
2009                                 /* If the child isn't dead then let's
2010                                  * immediately remove the state change
2011                                  * from the queue, since there's no
2012                                  * benefit in leaving it queued */
2013
2014                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2015                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2016                         }
2017
2018                         r = source_set_pending(s, true);
2019                         if (r < 0)
2020                                 return r;
2021                 }
2022         }
2023
2024         return 0;
2025 }
2026
2027 static int process_signal(sd_event *e, uint32_t events) {
2028         bool read_one = false;
2029         int r;
2030
2031         assert(e);
2032
2033         assert_return(events == EPOLLIN, -EIO);
2034
2035         for (;;) {
2036                 struct signalfd_siginfo si;
2037                 ssize_t n;
2038                 sd_event_source *s = NULL;
2039
2040                 n = read(e->signal_fd, &si, sizeof(si));
2041                 if (n < 0) {
2042                         if (errno == EAGAIN || errno == EINTR)
2043                                 return read_one;
2044
2045                         return -errno;
2046                 }
2047
2048                 if (_unlikely_(n != sizeof(si)))
2049                         return -EIO;
2050
2051                 assert(si.ssi_signo < _NSIG);
2052
2053                 read_one = true;
2054
2055                 if (si.ssi_signo == SIGCHLD) {
2056                         r = process_child(e);
2057                         if (r < 0)
2058                                 return r;
2059                         if (r > 0)
2060                                 continue;
2061                 }
2062
2063                 if (e->signal_sources)
2064                         s = e->signal_sources[si.ssi_signo];
2065
2066                 if (!s)
2067                         continue;
2068
2069                 s->signal.siginfo = si;
2070                 r = source_set_pending(s, true);
2071                 if (r < 0)
2072                         return r;
2073         }
2074 }
2075
2076 static int source_dispatch(sd_event_source *s) {
2077         int r = 0;
2078
2079         assert(s);
2080         assert(s->pending || s->type == SOURCE_EXIT);
2081
2082         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2083                 r = source_set_pending(s, false);
2084                 if (r < 0)
2085                         return r;
2086         }
2087
2088         if (s->type != SOURCE_POST) {
2089                 sd_event_source *z;
2090                 Iterator i;
2091
2092                 /* If we execute a non-post source, let's mark all
2093                  * post sources as pending */
2094
2095                 SET_FOREACH(z, s->event->post_sources, i) {
2096                         if (z->enabled == SD_EVENT_OFF)
2097                                 continue;
2098
2099                         r = source_set_pending(z, true);
2100                         if (r < 0)
2101                                 return r;
2102                 }
2103         }
2104
2105         if (s->enabled == SD_EVENT_ONESHOT) {
2106                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2107                 if (r < 0)
2108                         return r;
2109         }
2110
2111         s->dispatching = true;
2112
2113         switch (s->type) {
2114
2115         case SOURCE_IO:
2116                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2117                 break;
2118
2119         case SOURCE_TIME_REALTIME:
2120         case SOURCE_TIME_BOOTTIME:
2121         case SOURCE_TIME_MONOTONIC:
2122         case SOURCE_TIME_REALTIME_ALARM:
2123         case SOURCE_TIME_BOOTTIME_ALARM:
2124                 r = s->time.callback(s, s->time.next, s->userdata);
2125                 break;
2126
2127         case SOURCE_SIGNAL:
2128                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2129                 break;
2130
2131         case SOURCE_CHILD: {
2132                 bool zombie;
2133
2134                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2135                          s->child.siginfo.si_code == CLD_KILLED ||
2136                          s->child.siginfo.si_code == CLD_DUMPED;
2137
2138                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2139
2140                 /* Now, reap the PID for good. */
2141                 if (zombie)
2142                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2143
2144                 break;
2145         }
2146
2147         case SOURCE_DEFER:
2148                 r = s->defer.callback(s, s->userdata);
2149                 break;
2150
2151         case SOURCE_POST:
2152                 r = s->post.callback(s, s->userdata);
2153                 break;
2154
2155         case SOURCE_EXIT:
2156                 r = s->exit.callback(s, s->userdata);
2157                 break;
2158
2159         case SOURCE_WATCHDOG:
2160         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2161         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2162                 assert_not_reached("Wut? I shouldn't exist.");
2163         }
2164
2165         s->dispatching = false;
2166
2167         if (r < 0) {
2168                 if (s->description)
2169                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2170                 else
2171                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2172         }
2173
2174         if (s->n_ref == 0)
2175                 source_free(s);
2176         else if (r < 0)
2177                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2178
2179         return 1;
2180 }
2181
2182 static int event_prepare(sd_event *e) {
2183         int r;
2184
2185         assert(e);
2186
2187         for (;;) {
2188                 sd_event_source *s;
2189
2190                 s = prioq_peek(e->prepare);
2191                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2192                         break;
2193
2194                 s->prepare_iteration = e->iteration;
2195                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2196                 if (r < 0)
2197                         return r;
2198
2199                 assert(s->prepare);
2200
2201                 s->dispatching = true;
2202                 r = s->prepare(s, s->userdata);
2203                 s->dispatching = false;
2204
2205                 if (r < 0) {
2206                         if (s->description)
2207                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2208                         else
2209                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2210                 }
2211
2212                 if (s->n_ref == 0)
2213                         source_free(s);
2214                 else if (r < 0)
2215                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2216         }
2217
2218         return 0;
2219 }
2220
2221 static int dispatch_exit(sd_event *e) {
2222         sd_event_source *p;
2223         int r;
2224
2225         assert(e);
2226
2227         p = prioq_peek(e->exit);
2228         if (!p || p->enabled == SD_EVENT_OFF) {
2229                 e->state = SD_EVENT_FINISHED;
2230                 return 0;
2231         }
2232
2233         sd_event_ref(e);
2234         e->iteration++;
2235         e->state = SD_EVENT_EXITING;
2236
2237         r = source_dispatch(p);
2238
2239         e->state = SD_EVENT_INITIAL;
2240         sd_event_unref(e);
2241
2242         return r;
2243 }
2244
2245 static sd_event_source* event_next_pending(sd_event *e) {
2246         sd_event_source *p;
2247
2248         assert(e);
2249
2250         p = prioq_peek(e->pending);
2251         if (!p)
2252                 return NULL;
2253
2254         if (p->enabled == SD_EVENT_OFF)
2255                 return NULL;
2256
2257         return p;
2258 }
2259
2260 static int arm_watchdog(sd_event *e) {
2261         struct itimerspec its = {};
2262         usec_t t;
2263         int r;
2264
2265         assert(e);
2266         assert(e->watchdog_fd >= 0);
2267
2268         t = sleep_between(e,
2269                           e->watchdog_last + (e->watchdog_period / 2),
2270                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2271
2272         timespec_store(&its.it_value, t);
2273
2274         /* Make sure we never set the watchdog to 0, which tells the
2275          * kernel to disable it. */
2276         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2277                 its.it_value.tv_nsec = 1;
2278
2279         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2280         if (r < 0)
2281                 return -errno;
2282
2283         return 0;
2284 }
2285
2286 static int process_watchdog(sd_event *e) {
2287         assert(e);
2288
2289         if (!e->watchdog)
2290                 return 0;
2291
2292         /* Don't notify watchdog too often */
2293         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2294                 return 0;
2295
2296         sd_notify(false, "WATCHDOG=1");
2297         e->watchdog_last = e->timestamp.monotonic;
2298
2299         return arm_watchdog(e);
2300 }
2301
2302 _public_ int sd_event_prepare(sd_event *e) {
2303         int r;
2304
2305         assert_return(e, -EINVAL);
2306         assert_return(!event_pid_changed(e), -ECHILD);
2307         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2308         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2309
2310         if (e->exit_requested)
2311                 goto pending;
2312
2313         e->iteration++;
2314
2315         r = event_prepare(e);
2316         if (r < 0)
2317                 return r;
2318
2319         r = event_arm_timer(e, &e->realtime);
2320         if (r < 0)
2321                 return r;
2322
2323         r = event_arm_timer(e, &e->boottime);
2324         if (r < 0)
2325                 return r;
2326
2327         r = event_arm_timer(e, &e->monotonic);
2328         if (r < 0)
2329                 return r;
2330
2331         r = event_arm_timer(e, &e->realtime_alarm);
2332         if (r < 0)
2333                 return r;
2334
2335         r = event_arm_timer(e, &e->boottime_alarm);
2336         if (r < 0)
2337                 return r;
2338
2339         if (event_next_pending(e) || e->need_process_child)
2340                 goto pending;
2341
2342         e->state = SD_EVENT_ARMED;
2343
2344         return 0;
2345
2346 pending:
2347         e->state = SD_EVENT_ARMED;
2348         r = sd_event_wait(e, 0);
2349         if (r == 0)
2350                 e->state = SD_EVENT_ARMED;
2351
2352         return r;
2353 }
2354
2355 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2356         struct epoll_event *ev_queue;
2357         unsigned ev_queue_max;
2358         int r, m, i;
2359
2360         assert_return(e, -EINVAL);
2361         assert_return(!event_pid_changed(e), -ECHILD);
2362         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2363         assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2364
2365         if (e->exit_requested) {
2366                 e->state = SD_EVENT_PENDING;
2367                 return 1;
2368         }
2369
2370         ev_queue_max = MAX(e->n_sources, 1u);
2371         ev_queue = newa(struct epoll_event, ev_queue_max);
2372
2373         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2374                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2375         if (m < 0) {
2376                 if (errno == EINTR) {
2377                         e->state = SD_EVENT_PENDING;
2378                         return 1;
2379                 }
2380
2381                 r = -errno;
2382                 goto finish;
2383         }
2384
2385         dual_timestamp_get(&e->timestamp);
2386         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2387
2388         for (i = 0; i < m; i++) {
2389
2390                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2391                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2392                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2393                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2394                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2395                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2396                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2397                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2398                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2399                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2400                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2401                         r = process_signal(e, ev_queue[i].events);
2402                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2403                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2404                 else
2405                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2406
2407                 if (r < 0)
2408                         goto finish;
2409         }
2410
2411         r = process_watchdog(e);
2412         if (r < 0)
2413                 goto finish;
2414
2415         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2416         if (r < 0)
2417                 goto finish;
2418
2419         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2420         if (r < 0)
2421                 goto finish;
2422
2423         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2424         if (r < 0)
2425                 goto finish;
2426
2427         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2428         if (r < 0)
2429                 goto finish;
2430
2431         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2432         if (r < 0)
2433                 goto finish;
2434
2435         if (e->need_process_child) {
2436                 r = process_child(e);
2437                 if (r < 0)
2438                         goto finish;
2439         }
2440
2441         if (event_next_pending(e)) {
2442                 e->state = SD_EVENT_PENDING;
2443
2444                 return 1;
2445         }
2446
2447         r = 0;
2448
2449 finish:
2450         e->state = SD_EVENT_INITIAL;
2451
2452         return r;
2453 }
2454
2455 _public_ int sd_event_dispatch(sd_event *e) {
2456         sd_event_source *p;
2457         int r;
2458
2459         assert_return(e, -EINVAL);
2460         assert_return(!event_pid_changed(e), -ECHILD);
2461         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2462         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2463
2464         if (e->exit_requested)
2465                 return dispatch_exit(e);
2466
2467         p = event_next_pending(e);
2468         if (p) {
2469                 sd_event_ref(e);
2470
2471                 e->state = SD_EVENT_RUNNING;
2472                 r = source_dispatch(p);
2473                 e->state = SD_EVENT_INITIAL;
2474
2475                 sd_event_unref(e);
2476
2477                 return r;
2478         }
2479
2480         e->state = SD_EVENT_INITIAL;
2481
2482         return 1;
2483 }
2484
2485 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2486         int r;
2487
2488         assert_return(e, -EINVAL);
2489         assert_return(!event_pid_changed(e), -ECHILD);
2490         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2491         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2492
2493         r = sd_event_prepare(e);
2494         if (r == 0)
2495                 /* There was nothing? Then wait... */
2496                 r = sd_event_wait(e, timeout);
2497
2498         if (r > 0) {
2499                 /* There's something now, then let's dispatch it */
2500                 r = sd_event_dispatch(e);
2501                 if (r < 0)
2502                         return r;
2503
2504                 return 1;
2505         }
2506
2507         return r;
2508 }
2509
2510 _public_ int sd_event_loop(sd_event *e) {
2511         int r;
2512
2513         assert_return(e, -EINVAL);
2514         assert_return(!event_pid_changed(e), -ECHILD);
2515         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2516
2517         sd_event_ref(e);
2518
2519         while (e->state != SD_EVENT_FINISHED) {
2520                 r = sd_event_run(e, (uint64_t) -1);
2521                 if (r < 0)
2522                         goto finish;
2523         }
2524
2525         r = e->exit_code;
2526
2527 finish:
2528         sd_event_unref(e);
2529         return r;
2530 }
2531
2532 _public_ int sd_event_get_fd(sd_event *e) {
2533
2534         assert_return(e, -EINVAL);
2535         assert_return(!event_pid_changed(e), -ECHILD);
2536
2537         return e->epoll_fd;
2538 }
2539
2540 _public_ int sd_event_get_state(sd_event *e) {
2541         assert_return(e, -EINVAL);
2542         assert_return(!event_pid_changed(e), -ECHILD);
2543
2544         return e->state;
2545 }
2546
2547 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2548         assert_return(e, -EINVAL);
2549         assert_return(code, -EINVAL);
2550         assert_return(!event_pid_changed(e), -ECHILD);
2551
2552         if (!e->exit_requested)
2553                 return -ENODATA;
2554
2555         *code = e->exit_code;
2556         return 0;
2557 }
2558
2559 _public_ int sd_event_exit(sd_event *e, int code) {
2560         assert_return(e, -EINVAL);
2561         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2562         assert_return(!event_pid_changed(e), -ECHILD);
2563
2564         e->exit_requested = true;
2565         e->exit_code = code;
2566
2567         return 0;
2568 }
2569
2570 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2571         assert_return(e, -EINVAL);
2572         assert_return(usec, -EINVAL);
2573         assert_return(!event_pid_changed(e), -ECHILD);
2574
2575         /* If we haven't run yet, just get the actual time */
2576         if (!dual_timestamp_is_set(&e->timestamp))
2577                 return -ENODATA;
2578
2579         switch (clock) {
2580
2581         case CLOCK_REALTIME:
2582         case CLOCK_REALTIME_ALARM:
2583                 *usec = e->timestamp.realtime;
2584                 break;
2585
2586         case CLOCK_MONOTONIC:
2587                 *usec = e->timestamp.monotonic;
2588                 break;
2589
2590         case CLOCK_BOOTTIME:
2591         case CLOCK_BOOTTIME_ALARM:
2592                 *usec = e->timestamp_boottime;
2593                 break;
2594         }
2595
2596         return 0;
2597 }
2598
2599 _public_ int sd_event_default(sd_event **ret) {
2600
2601         static thread_local sd_event *default_event = NULL;
2602         sd_event *e = NULL;
2603         int r;
2604
2605         if (!ret)
2606                 return !!default_event;
2607
2608         if (default_event) {
2609                 *ret = sd_event_ref(default_event);
2610                 return 0;
2611         }
2612
2613         r = sd_event_new(&e);
2614         if (r < 0)
2615                 return r;
2616
2617         e->default_event_ptr = &default_event;
2618         e->tid = gettid();
2619         default_event = e;
2620
2621         *ret = e;
2622         return 1;
2623 }
2624
2625 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2626         assert_return(e, -EINVAL);
2627         assert_return(tid, -EINVAL);
2628         assert_return(!event_pid_changed(e), -ECHILD);
2629
2630         if (e->tid != 0) {
2631                 *tid = e->tid;
2632                 return 0;
2633         }
2634
2635         return -ENXIO;
2636 }
2637
2638 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2639         int r;
2640
2641         assert_return(e, -EINVAL);
2642         assert_return(!event_pid_changed(e), -ECHILD);
2643
2644         if (e->watchdog == !!b)
2645                 return e->watchdog;
2646
2647         if (b) {
2648                 struct epoll_event ev = {};
2649
2650                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2651                 if (r <= 0)
2652                         return r;
2653
2654                 /* Issue first ping immediately */
2655                 sd_notify(false, "WATCHDOG=1");
2656                 e->watchdog_last = now(CLOCK_MONOTONIC);
2657
2658                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2659                 if (e->watchdog_fd < 0)
2660                         return -errno;
2661
2662                 r = arm_watchdog(e);
2663                 if (r < 0)
2664                         goto fail;
2665
2666                 ev.events = EPOLLIN;
2667                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2668
2669                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2670                 if (r < 0) {
2671                         r = -errno;
2672                         goto fail;
2673                 }
2674
2675         } else {
2676                 if (e->watchdog_fd >= 0) {
2677                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2678                         e->watchdog_fd = safe_close(e->watchdog_fd);
2679                 }
2680         }
2681
2682         e->watchdog = !!b;
2683         return e->watchdog;
2684
2685 fail:
2686         e->watchdog_fd = safe_close(e->watchdog_fd);
2687         return r;
2688 }
2689
2690 _public_ int sd_event_get_watchdog(sd_event *e) {
2691         assert_return(e, -EINVAL);
2692         assert_return(!event_pid_changed(e), -ECHILD);
2693
2694         return e->watchdog;
2695 }