chiark / gitweb /
25089a03350129cece6cef0b42b497d7b2f7f2d9
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *description;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static int event_update_signal_fd(sd_event *e) {
602         struct epoll_event ev = {};
603         bool add_to_epoll;
604         int r;
605
606         assert(e);
607
608         add_to_epoll = e->signal_fd < 0;
609
610         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
611         if (r < 0)
612                 return -errno;
613
614         e->signal_fd = r;
615
616         if (!add_to_epoll)
617                 return 0;
618
619         ev.events = EPOLLIN;
620         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
621
622         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
623         if (r < 0) {
624                 e->signal_fd = safe_close(e->signal_fd);
625                 return -errno;
626         }
627
628         return 0;
629 }
630
631 static void source_disconnect(sd_event_source *s) {
632         sd_event *event;
633
634         assert(s);
635
636         if (!s->event)
637                 return;
638
639         assert(s->event->n_sources > 0);
640
641         switch (s->type) {
642
643         case SOURCE_IO:
644                 if (s->io.fd >= 0)
645                         source_io_unregister(s);
646
647                 break;
648
649         case SOURCE_TIME_REALTIME:
650         case SOURCE_TIME_BOOTTIME:
651         case SOURCE_TIME_MONOTONIC:
652         case SOURCE_TIME_REALTIME_ALARM:
653         case SOURCE_TIME_BOOTTIME_ALARM: {
654                 struct clock_data *d;
655
656                 d = event_get_clock_data(s->event, s->type);
657                 assert(d);
658
659                 prioq_remove(d->earliest, s, &s->time.earliest_index);
660                 prioq_remove(d->latest, s, &s->time.latest_index);
661                 d->needs_rearm = true;
662                 break;
663         }
664
665         case SOURCE_SIGNAL:
666                 if (s->signal.sig > 0) {
667                         if (s->event->signal_sources)
668                                 s->event->signal_sources[s->signal.sig] = NULL;
669
670                         /* If the signal was on and now it is off... */
671                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
672                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
673
674                                 (void) event_update_signal_fd(s->event);
675                                 /* If disabling failed, we might get a spurious event,
676                                  * but otherwise nothing bad should happen. */
677                         }
678                 }
679
680                 break;
681
682         case SOURCE_CHILD:
683                 if (s->child.pid > 0) {
684                         if (s->enabled != SD_EVENT_OFF) {
685                                 assert(s->event->n_enabled_child_sources > 0);
686                                 s->event->n_enabled_child_sources--;
687
688                                 /* We know the signal was on, if it is off now... */
689                                 if (!need_signal(s->event, SIGCHLD)) {
690                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
691
692                                         (void) event_update_signal_fd(s->event);
693                                         /* If disabling failed, we might get a spurious event,
694                                          * but otherwise nothing bad should happen. */
695                                 }
696                         }
697
698                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
699                 }
700
701                 break;
702
703         case SOURCE_DEFER:
704                 /* nothing */
705                 break;
706
707         case SOURCE_POST:
708                 set_remove(s->event->post_sources, s);
709                 break;
710
711         case SOURCE_EXIT:
712                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713                 break;
714
715         default:
716                 assert_not_reached("Wut? I shouldn't exist.");
717         }
718
719         if (s->pending)
720                 prioq_remove(s->event->pending, s, &s->pending_index);
721
722         if (s->prepare)
723                 prioq_remove(s->event->prepare, s, &s->prepare_index);
724
725         event = s->event;
726
727         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
728         s->event = NULL;
729         LIST_REMOVE(sources, event->sources, s);
730         event->n_sources--;
731
732         if (!s->floating)
733                 sd_event_unref(event);
734 }
735
736 static void source_free(sd_event_source *s) {
737         assert(s);
738
739         source_disconnect(s);
740         free(s->description);
741         free(s);
742 }
743
744 static int source_set_pending(sd_event_source *s, bool b) {
745         int r;
746
747         assert(s);
748         assert(s->type != SOURCE_EXIT);
749
750         if (s->pending == b)
751                 return 0;
752
753         s->pending = b;
754
755         if (b) {
756                 s->pending_iteration = s->event->iteration;
757
758                 r = prioq_put(s->event->pending, s, &s->pending_index);
759                 if (r < 0) {
760                         s->pending = false;
761                         return r;
762                 }
763         } else
764                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
765
766         if (EVENT_SOURCE_IS_TIME(s->type)) {
767                 struct clock_data *d;
768
769                 d = event_get_clock_data(s->event, s->type);
770                 assert(d);
771
772                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
773                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
774                 d->needs_rearm = true;
775         }
776
777         return 0;
778 }
779
780 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
781         sd_event_source *s;
782
783         assert(e);
784
785         s = new0(sd_event_source, 1);
786         if (!s)
787                 return NULL;
788
789         s->n_ref = 1;
790         s->event = e;
791         s->floating = floating;
792         s->type = type;
793         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
794
795         if (!floating)
796                 sd_event_ref(e);
797
798         LIST_PREPEND(sources, e->sources, s);
799         e->n_sources ++;
800
801         return s;
802 }
803
804 _public_ int sd_event_add_io(
805                 sd_event *e,
806                 sd_event_source **ret,
807                 int fd,
808                 uint32_t events,
809                 sd_event_io_handler_t callback,
810                 void *userdata) {
811
812         sd_event_source *s;
813         int r;
814
815         assert_return(e, -EINVAL);
816         assert_return(fd >= 0, -EINVAL);
817         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
818         assert_return(callback, -EINVAL);
819         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
820         assert_return(!event_pid_changed(e), -ECHILD);
821
822         s = source_new(e, !ret, SOURCE_IO);
823         if (!s)
824                 return -ENOMEM;
825
826         s->io.fd = fd;
827         s->io.events = events;
828         s->io.callback = callback;
829         s->userdata = userdata;
830         s->enabled = SD_EVENT_ON;
831
832         r = source_io_register(s, s->enabled, events);
833         if (r < 0) {
834                 source_free(s);
835                 return r;
836         }
837
838         if (ret)
839                 *ret = s;
840
841         return 0;
842 }
843
844 static void initialize_perturb(sd_event *e) {
845         sd_id128_t bootid = {};
846
847         /* When we sleep for longer, we try to realign the wakeup to
848            the same time wihtin each minute/second/250ms, so that
849            events all across the system can be coalesced into a single
850            CPU wakeup. However, let's take some system-specific
851            randomness for this value, so that in a network of systems
852            with synced clocks timer events are distributed a
853            bit. Here, we calculate a perturbation usec offset from the
854            boot ID. */
855
856         if (_likely_(e->perturb != USEC_INFINITY))
857                 return;
858
859         if (sd_id128_get_boot(&bootid) >= 0)
860                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
861 }
862
863 static int event_setup_timer_fd(
864                 sd_event *e,
865                 struct clock_data *d,
866                 clockid_t clock) {
867
868         struct epoll_event ev = {};
869         int r, fd;
870
871         assert(e);
872         assert(d);
873
874         if (_likely_(d->fd >= 0))
875                 return 0;
876
877         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
878         if (fd < 0)
879                 return -errno;
880
881         ev.events = EPOLLIN;
882         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
883
884         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
885         if (r < 0) {
886                 safe_close(fd);
887                 return -errno;
888         }
889
890         d->fd = fd;
891         return 0;
892 }
893
894 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
895         assert(s);
896
897         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
898 }
899
900 _public_ int sd_event_add_time(
901                 sd_event *e,
902                 sd_event_source **ret,
903                 clockid_t clock,
904                 uint64_t usec,
905                 uint64_t accuracy,
906                 sd_event_time_handler_t callback,
907                 void *userdata) {
908
909         EventSourceType type;
910         sd_event_source *s;
911         struct clock_data *d;
912         int r;
913
914         assert_return(e, -EINVAL);
915         assert_return(usec != (uint64_t) -1, -EINVAL);
916         assert_return(accuracy != (uint64_t) -1, -EINVAL);
917         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
918         assert_return(!event_pid_changed(e), -ECHILD);
919
920         if (!callback)
921                 callback = time_exit_callback;
922
923         type = clock_to_event_source_type(clock);
924         assert_return(type >= 0, -ENOTSUP);
925
926         d = event_get_clock_data(e, type);
927         assert(d);
928
929         if (!d->earliest) {
930                 d->earliest = prioq_new(earliest_time_prioq_compare);
931                 if (!d->earliest)
932                         return -ENOMEM;
933         }
934
935         if (!d->latest) {
936                 d->latest = prioq_new(latest_time_prioq_compare);
937                 if (!d->latest)
938                         return -ENOMEM;
939         }
940
941         if (d->fd < 0) {
942                 r = event_setup_timer_fd(e, d, clock);
943                 if (r < 0)
944                         return r;
945         }
946
947         s = source_new(e, !ret, type);
948         if (!s)
949                 return -ENOMEM;
950
951         s->time.next = usec;
952         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
953         s->time.callback = callback;
954         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
955         s->userdata = userdata;
956         s->enabled = SD_EVENT_ONESHOT;
957
958         d->needs_rearm = true;
959
960         r = prioq_put(d->earliest, s, &s->time.earliest_index);
961         if (r < 0)
962                 goto fail;
963
964         r = prioq_put(d->latest, s, &s->time.latest_index);
965         if (r < 0)
966                 goto fail;
967
968         if (ret)
969                 *ret = s;
970
971         return 0;
972
973 fail:
974         source_free(s);
975         return r;
976 }
977
978 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
979         assert(s);
980
981         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
982 }
983
984 _public_ int sd_event_add_signal(
985                 sd_event *e,
986                 sd_event_source **ret,
987                 int sig,
988                 sd_event_signal_handler_t callback,
989                 void *userdata) {
990
991         sd_event_source *s;
992         sigset_t ss;
993         int r;
994         bool previous;
995
996         assert_return(e, -EINVAL);
997         assert_return(sig > 0, -EINVAL);
998         assert_return(sig < _NSIG, -EINVAL);
999         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1000         assert_return(!event_pid_changed(e), -ECHILD);
1001
1002         if (!callback)
1003                 callback = signal_exit_callback;
1004
1005         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1006         if (r < 0)
1007                 return -errno;
1008
1009         if (!sigismember(&ss, sig))
1010                 return -EBUSY;
1011
1012         if (!e->signal_sources) {
1013                 e->signal_sources = new0(sd_event_source*, _NSIG);
1014                 if (!e->signal_sources)
1015                         return -ENOMEM;
1016         } else if (e->signal_sources[sig])
1017                 return -EBUSY;
1018
1019         previous = need_signal(e, sig);
1020
1021         s = source_new(e, !ret, SOURCE_SIGNAL);
1022         if (!s)
1023                 return -ENOMEM;
1024
1025         s->signal.sig = sig;
1026         s->signal.callback = callback;
1027         s->userdata = userdata;
1028         s->enabled = SD_EVENT_ON;
1029
1030         e->signal_sources[sig] = s;
1031
1032         if (!previous) {
1033                 assert_se(sigaddset(&e->sigset, sig) == 0);
1034
1035                 r = event_update_signal_fd(e);
1036                 if (r < 0) {
1037                         source_free(s);
1038                         return r;
1039                 }
1040         }
1041
1042         /* Use the signal name as description for the event source by default */
1043         (void) sd_event_source_set_description(s, signal_to_string(sig));
1044
1045         if (ret)
1046                 *ret = s;
1047
1048         return 0;
1049 }
1050
1051 _public_ int sd_event_add_child(
1052                 sd_event *e,
1053                 sd_event_source **ret,
1054                 pid_t pid,
1055                 int options,
1056                 sd_event_child_handler_t callback,
1057                 void *userdata) {
1058
1059         sd_event_source *s;
1060         int r;
1061         bool previous;
1062
1063         assert_return(e, -EINVAL);
1064         assert_return(pid > 1, -EINVAL);
1065         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1066         assert_return(options != 0, -EINVAL);
1067         assert_return(callback, -EINVAL);
1068         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1069         assert_return(!event_pid_changed(e), -ECHILD);
1070
1071         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1072         if (r < 0)
1073                 return r;
1074
1075         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1076                 return -EBUSY;
1077
1078         previous = need_signal(e, SIGCHLD);
1079
1080         s = source_new(e, !ret, SOURCE_CHILD);
1081         if (!s)
1082                 return -ENOMEM;
1083
1084         s->child.pid = pid;
1085         s->child.options = options;
1086         s->child.callback = callback;
1087         s->userdata = userdata;
1088         s->enabled = SD_EVENT_ONESHOT;
1089
1090         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1091         if (r < 0) {
1092                 source_free(s);
1093                 return r;
1094         }
1095
1096         e->n_enabled_child_sources ++;
1097
1098         if (!previous) {
1099                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1100
1101                 r = event_update_signal_fd(e);
1102                 if (r < 0) {
1103                         source_free(s);
1104                         return r;
1105                 }
1106         }
1107
1108         e->need_process_child = true;
1109
1110         if (ret)
1111                 *ret = s;
1112
1113         return 0;
1114 }
1115
1116 _public_ int sd_event_add_defer(
1117                 sd_event *e,
1118                 sd_event_source **ret,
1119                 sd_event_handler_t callback,
1120                 void *userdata) {
1121
1122         sd_event_source *s;
1123         int r;
1124
1125         assert_return(e, -EINVAL);
1126         assert_return(callback, -EINVAL);
1127         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1128         assert_return(!event_pid_changed(e), -ECHILD);
1129
1130         s = source_new(e, !ret, SOURCE_DEFER);
1131         if (!s)
1132                 return -ENOMEM;
1133
1134         s->defer.callback = callback;
1135         s->userdata = userdata;
1136         s->enabled = SD_EVENT_ONESHOT;
1137
1138         r = source_set_pending(s, true);
1139         if (r < 0) {
1140                 source_free(s);
1141                 return r;
1142         }
1143
1144         if (ret)
1145                 *ret = s;
1146
1147         return 0;
1148 }
1149
1150 _public_ int sd_event_add_post(
1151                 sd_event *e,
1152                 sd_event_source **ret,
1153                 sd_event_handler_t callback,
1154                 void *userdata) {
1155
1156         sd_event_source *s;
1157         int r;
1158
1159         assert_return(e, -EINVAL);
1160         assert_return(callback, -EINVAL);
1161         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1162         assert_return(!event_pid_changed(e), -ECHILD);
1163
1164         r = set_ensure_allocated(&e->post_sources, NULL);
1165         if (r < 0)
1166                 return r;
1167
1168         s = source_new(e, !ret, SOURCE_POST);
1169         if (!s)
1170                 return -ENOMEM;
1171
1172         s->post.callback = callback;
1173         s->userdata = userdata;
1174         s->enabled = SD_EVENT_ON;
1175
1176         r = set_put(e->post_sources, s);
1177         if (r < 0) {
1178                 source_free(s);
1179                 return r;
1180         }
1181
1182         if (ret)
1183                 *ret = s;
1184
1185         return 0;
1186 }
1187
1188 _public_ int sd_event_add_exit(
1189                 sd_event *e,
1190                 sd_event_source **ret,
1191                 sd_event_handler_t callback,
1192                 void *userdata) {
1193
1194         sd_event_source *s;
1195         int r;
1196
1197         assert_return(e, -EINVAL);
1198         assert_return(callback, -EINVAL);
1199         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1200         assert_return(!event_pid_changed(e), -ECHILD);
1201
1202         if (!e->exit) {
1203                 e->exit = prioq_new(exit_prioq_compare);
1204                 if (!e->exit)
1205                         return -ENOMEM;
1206         }
1207
1208         s = source_new(e, !ret, SOURCE_EXIT);
1209         if (!s)
1210                 return -ENOMEM;
1211
1212         s->exit.callback = callback;
1213         s->userdata = userdata;
1214         s->exit.prioq_index = PRIOQ_IDX_NULL;
1215         s->enabled = SD_EVENT_ONESHOT;
1216
1217         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1218         if (r < 0) {
1219                 source_free(s);
1220                 return r;
1221         }
1222
1223         if (ret)
1224                 *ret = s;
1225
1226         return 0;
1227 }
1228
1229 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1230         assert_return(s, NULL);
1231
1232         assert(s->n_ref >= 1);
1233         s->n_ref++;
1234
1235         return s;
1236 }
1237
1238 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1239
1240         if (!s)
1241                 return NULL;
1242
1243         assert(s->n_ref >= 1);
1244         s->n_ref--;
1245
1246         if (s->n_ref <= 0) {
1247                 /* Here's a special hack: when we are called from a
1248                  * dispatch handler we won't free the event source
1249                  * immediately, but we will detach the fd from the
1250                  * epoll. This way it is safe for the caller to unref
1251                  * the event source and immediately close the fd, but
1252                  * we still retain a valid event source object after
1253                  * the callback. */
1254
1255                 if (s->dispatching) {
1256                         if (s->type == SOURCE_IO)
1257                                 source_io_unregister(s);
1258
1259                         source_disconnect(s);
1260                 } else
1261                         source_free(s);
1262         }
1263
1264         return NULL;
1265 }
1266
1267 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1268         assert_return(s, -EINVAL);
1269         assert_return(!event_pid_changed(s->event), -ECHILD);
1270
1271         return free_and_strdup(&s->description, description);
1272 }
1273
1274 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1275         assert_return(s, -EINVAL);
1276         assert_return(description, -EINVAL);
1277         assert_return(s->description, -ENXIO);
1278         assert_return(!event_pid_changed(s->event), -ECHILD);
1279
1280         *description = s->description;
1281         return 0;
1282 }
1283
1284 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1285         assert_return(s, NULL);
1286
1287         return s->event;
1288 }
1289
1290 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1291         assert_return(s, -EINVAL);
1292         assert_return(s->type != SOURCE_EXIT, -EDOM);
1293         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1294         assert_return(!event_pid_changed(s->event), -ECHILD);
1295
1296         return s->pending;
1297 }
1298
1299 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1300         assert_return(s, -EINVAL);
1301         assert_return(s->type == SOURCE_IO, -EDOM);
1302         assert_return(!event_pid_changed(s->event), -ECHILD);
1303
1304         return s->io.fd;
1305 }
1306
1307 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1308         int r;
1309
1310         assert_return(s, -EINVAL);
1311         assert_return(fd >= 0, -EINVAL);
1312         assert_return(s->type == SOURCE_IO, -EDOM);
1313         assert_return(!event_pid_changed(s->event), -ECHILD);
1314
1315         if (s->io.fd == fd)
1316                 return 0;
1317
1318         if (s->enabled == SD_EVENT_OFF) {
1319                 s->io.fd = fd;
1320                 s->io.registered = false;
1321         } else {
1322                 int saved_fd;
1323
1324                 saved_fd = s->io.fd;
1325                 assert(s->io.registered);
1326
1327                 s->io.fd = fd;
1328                 s->io.registered = false;
1329
1330                 r = source_io_register(s, s->enabled, s->io.events);
1331                 if (r < 0) {
1332                         s->io.fd = saved_fd;
1333                         s->io.registered = true;
1334                         return r;
1335                 }
1336
1337                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1338         }
1339
1340         return 0;
1341 }
1342
1343 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1344         assert_return(s, -EINVAL);
1345         assert_return(events, -EINVAL);
1346         assert_return(s->type == SOURCE_IO, -EDOM);
1347         assert_return(!event_pid_changed(s->event), -ECHILD);
1348
1349         *events = s->io.events;
1350         return 0;
1351 }
1352
1353 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1354         int r;
1355
1356         assert_return(s, -EINVAL);
1357         assert_return(s->type == SOURCE_IO, -EDOM);
1358         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1359         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1360         assert_return(!event_pid_changed(s->event), -ECHILD);
1361
1362         /* edge-triggered updates are never skipped, so we can reset edges */
1363         if (s->io.events == events && !(events & EPOLLET))
1364                 return 0;
1365
1366         if (s->enabled != SD_EVENT_OFF) {
1367                 r = source_io_register(s, s->enabled, events);
1368                 if (r < 0)
1369                         return r;
1370         }
1371
1372         s->io.events = events;
1373         source_set_pending(s, false);
1374
1375         return 0;
1376 }
1377
1378 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1379         assert_return(s, -EINVAL);
1380         assert_return(revents, -EINVAL);
1381         assert_return(s->type == SOURCE_IO, -EDOM);
1382         assert_return(s->pending, -ENODATA);
1383         assert_return(!event_pid_changed(s->event), -ECHILD);
1384
1385         *revents = s->io.revents;
1386         return 0;
1387 }
1388
1389 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1390         assert_return(s, -EINVAL);
1391         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1392         assert_return(!event_pid_changed(s->event), -ECHILD);
1393
1394         return s->signal.sig;
1395 }
1396
1397 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1398         assert_return(s, -EINVAL);
1399         assert_return(!event_pid_changed(s->event), -ECHILD);
1400
1401         return s->priority;
1402 }
1403
1404 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1405         assert_return(s, -EINVAL);
1406         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1407         assert_return(!event_pid_changed(s->event), -ECHILD);
1408
1409         if (s->priority == priority)
1410                 return 0;
1411
1412         s->priority = priority;
1413
1414         if (s->pending)
1415                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1416
1417         if (s->prepare)
1418                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1419
1420         if (s->type == SOURCE_EXIT)
1421                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1422
1423         return 0;
1424 }
1425
1426 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1427         assert_return(s, -EINVAL);
1428         assert_return(m, -EINVAL);
1429         assert_return(!event_pid_changed(s->event), -ECHILD);
1430
1431         *m = s->enabled;
1432         return 0;
1433 }
1434
1435 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1436         int r;
1437
1438         assert_return(s, -EINVAL);
1439         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1440         assert_return(!event_pid_changed(s->event), -ECHILD);
1441
1442         /* If we are dead anyway, we are fine with turning off
1443          * sources, but everything else needs to fail. */
1444         if (s->event->state == SD_EVENT_FINISHED)
1445                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1446
1447         if (s->enabled == m)
1448                 return 0;
1449
1450         if (m == SD_EVENT_OFF) {
1451
1452                 switch (s->type) {
1453
1454                 case SOURCE_IO:
1455                         r = source_io_unregister(s);
1456                         if (r < 0)
1457                                 return r;
1458
1459                         s->enabled = m;
1460                         break;
1461
1462                 case SOURCE_TIME_REALTIME:
1463                 case SOURCE_TIME_BOOTTIME:
1464                 case SOURCE_TIME_MONOTONIC:
1465                 case SOURCE_TIME_REALTIME_ALARM:
1466                 case SOURCE_TIME_BOOTTIME_ALARM: {
1467                         struct clock_data *d;
1468
1469                         s->enabled = m;
1470                         d = event_get_clock_data(s->event, s->type);
1471                         assert(d);
1472
1473                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1474                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1475                         d->needs_rearm = true;
1476                         break;
1477                 }
1478
1479                 case SOURCE_SIGNAL:
1480                         assert(need_signal(s->event, s->signal.sig));
1481
1482                         s->enabled = m;
1483
1484                         if (!need_signal(s->event, s->signal.sig)) {
1485                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1486
1487                                 (void) event_update_signal_fd(s->event);
1488                                 /* If disabling failed, we might get a spurious event,
1489                                  * but otherwise nothing bad should happen. */
1490                         }
1491
1492                         break;
1493
1494                 case SOURCE_CHILD:
1495                         assert(need_signal(s->event, SIGCHLD));
1496
1497                         s->enabled = m;
1498
1499                         assert(s->event->n_enabled_child_sources > 0);
1500                         s->event->n_enabled_child_sources--;
1501
1502                         if (!need_signal(s->event, SIGCHLD)) {
1503                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1504
1505                                 (void) event_update_signal_fd(s->event);
1506                         }
1507
1508                         break;
1509
1510                 case SOURCE_EXIT:
1511                         s->enabled = m;
1512                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1513                         break;
1514
1515                 case SOURCE_DEFER:
1516                 case SOURCE_POST:
1517                         s->enabled = m;
1518                         break;
1519
1520                 default:
1521                         assert_not_reached("Wut? I shouldn't exist.");
1522                 }
1523
1524         } else {
1525                 switch (s->type) {
1526
1527                 case SOURCE_IO:
1528                         r = source_io_register(s, m, s->io.events);
1529                         if (r < 0)
1530                                 return r;
1531
1532                         s->enabled = m;
1533                         break;
1534
1535                 case SOURCE_TIME_REALTIME:
1536                 case SOURCE_TIME_BOOTTIME:
1537                 case SOURCE_TIME_MONOTONIC:
1538                 case SOURCE_TIME_REALTIME_ALARM:
1539                 case SOURCE_TIME_BOOTTIME_ALARM: {
1540                         struct clock_data *d;
1541
1542                         s->enabled = m;
1543                         d = event_get_clock_data(s->event, s->type);
1544                         assert(d);
1545
1546                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1547                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1548                         d->needs_rearm = true;
1549                         break;
1550                 }
1551
1552                 case SOURCE_SIGNAL:
1553                         /* Check status before enabling. */
1554                         if (!need_signal(s->event, s->signal.sig)) {
1555                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1556
1557                                 r = event_update_signal_fd(s->event);
1558                                 if (r < 0) {
1559                                         s->enabled = SD_EVENT_OFF;
1560                                         return r;
1561                                 }
1562                         }
1563
1564                         s->enabled = m;
1565                         break;
1566
1567                 case SOURCE_CHILD:
1568                         /* Check status before enabling. */
1569                         if (s->enabled == SD_EVENT_OFF) {
1570                                 if (!need_signal(s->event, SIGCHLD)) {
1571                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1572
1573                                         r = event_update_signal_fd(s->event);
1574                                         if (r < 0) {
1575                                                 s->enabled = SD_EVENT_OFF;
1576                                                 return r;
1577                                         }
1578                                 }
1579
1580                                 s->event->n_enabled_child_sources++;
1581                         }
1582
1583                         s->enabled = m;
1584                         break;
1585
1586                 case SOURCE_EXIT:
1587                         s->enabled = m;
1588                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1589                         break;
1590
1591                 case SOURCE_DEFER:
1592                 case SOURCE_POST:
1593                         s->enabled = m;
1594                         break;
1595
1596                 default:
1597                         assert_not_reached("Wut? I shouldn't exist.");
1598                 }
1599         }
1600
1601         if (s->pending)
1602                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1603
1604         if (s->prepare)
1605                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1606
1607         return 0;
1608 }
1609
1610 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1611         assert_return(s, -EINVAL);
1612         assert_return(usec, -EINVAL);
1613         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1614         assert_return(!event_pid_changed(s->event), -ECHILD);
1615
1616         *usec = s->time.next;
1617         return 0;
1618 }
1619
1620 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1621         struct clock_data *d;
1622
1623         assert_return(s, -EINVAL);
1624         assert_return(usec != (uint64_t) -1, -EINVAL);
1625         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1626         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1627         assert_return(!event_pid_changed(s->event), -ECHILD);
1628
1629         s->time.next = usec;
1630
1631         source_set_pending(s, false);
1632
1633         d = event_get_clock_data(s->event, s->type);
1634         assert(d);
1635
1636         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1637         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1638         d->needs_rearm = true;
1639
1640         return 0;
1641 }
1642
1643 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1644         assert_return(s, -EINVAL);
1645         assert_return(usec, -EINVAL);
1646         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1647         assert_return(!event_pid_changed(s->event), -ECHILD);
1648
1649         *usec = s->time.accuracy;
1650         return 0;
1651 }
1652
1653 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1654         struct clock_data *d;
1655
1656         assert_return(s, -EINVAL);
1657         assert_return(usec != (uint64_t) -1, -EINVAL);
1658         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1659         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1660         assert_return(!event_pid_changed(s->event), -ECHILD);
1661
1662         if (usec == 0)
1663                 usec = DEFAULT_ACCURACY_USEC;
1664
1665         s->time.accuracy = usec;
1666
1667         source_set_pending(s, false);
1668
1669         d = event_get_clock_data(s->event, s->type);
1670         assert(d);
1671
1672         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1673         d->needs_rearm = true;
1674
1675         return 0;
1676 }
1677
1678 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1679         assert_return(s, -EINVAL);
1680         assert_return(clock, -EINVAL);
1681         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1682         assert_return(!event_pid_changed(s->event), -ECHILD);
1683
1684         *clock = event_source_type_to_clock(s->type);
1685         return 0;
1686 }
1687
1688 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1689         assert_return(s, -EINVAL);
1690         assert_return(pid, -EINVAL);
1691         assert_return(s->type == SOURCE_CHILD, -EDOM);
1692         assert_return(!event_pid_changed(s->event), -ECHILD);
1693
1694         *pid = s->child.pid;
1695         return 0;
1696 }
1697
1698 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1699         int r;
1700
1701         assert_return(s, -EINVAL);
1702         assert_return(s->type != SOURCE_EXIT, -EDOM);
1703         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1704         assert_return(!event_pid_changed(s->event), -ECHILD);
1705
1706         if (s->prepare == callback)
1707                 return 0;
1708
1709         if (callback && s->prepare) {
1710                 s->prepare = callback;
1711                 return 0;
1712         }
1713
1714         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1715         if (r < 0)
1716                 return r;
1717
1718         s->prepare = callback;
1719
1720         if (callback) {
1721                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1722                 if (r < 0)
1723                         return r;
1724         } else
1725                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1726
1727         return 0;
1728 }
1729
1730 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1731         assert_return(s, NULL);
1732
1733         return s->userdata;
1734 }
1735
1736 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1737         void *ret;
1738
1739         assert_return(s, NULL);
1740
1741         ret = s->userdata;
1742         s->userdata = userdata;
1743
1744         return ret;
1745 }
1746
1747 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1748         usec_t c;
1749         assert(e);
1750         assert(a <= b);
1751
1752         if (a <= 0)
1753                 return 0;
1754
1755         if (b <= a + 1)
1756                 return a;
1757
1758         initialize_perturb(e);
1759
1760         /*
1761           Find a good time to wake up again between times a and b. We
1762           have two goals here:
1763
1764           a) We want to wake up as seldom as possible, hence prefer
1765              later times over earlier times.
1766
1767           b) But if we have to wake up, then let's make sure to
1768              dispatch as much as possible on the entire system.
1769
1770           We implement this by waking up everywhere at the same time
1771           within any given minute if we can, synchronised via the
1772           perturbation value determined from the boot ID. If we can't,
1773           then we try to find the same spot in every 10s, then 1s and
1774           then 250ms step. Otherwise, we pick the last possible time
1775           to wake up.
1776         */
1777
1778         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1779         if (c >= b) {
1780                 if (_unlikely_(c < USEC_PER_MINUTE))
1781                         return b;
1782
1783                 c -= USEC_PER_MINUTE;
1784         }
1785
1786         if (c >= a)
1787                 return c;
1788
1789         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1790         if (c >= b) {
1791                 if (_unlikely_(c < USEC_PER_SEC*10))
1792                         return b;
1793
1794                 c -= USEC_PER_SEC*10;
1795         }
1796
1797         if (c >= a)
1798                 return c;
1799
1800         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1801         if (c >= b) {
1802                 if (_unlikely_(c < USEC_PER_SEC))
1803                         return b;
1804
1805                 c -= USEC_PER_SEC;
1806         }
1807
1808         if (c >= a)
1809                 return c;
1810
1811         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1812         if (c >= b) {
1813                 if (_unlikely_(c < USEC_PER_MSEC*250))
1814                         return b;
1815
1816                 c -= USEC_PER_MSEC*250;
1817         }
1818
1819         if (c >= a)
1820                 return c;
1821
1822         return b;
1823 }
1824
1825 static int event_arm_timer(
1826                 sd_event *e,
1827                 struct clock_data *d) {
1828
1829         struct itimerspec its = {};
1830         sd_event_source *a, *b;
1831         usec_t t;
1832         int r;
1833
1834         assert(e);
1835         assert(d);
1836
1837         if (!d->needs_rearm)
1838                 return 0;
1839         else
1840                 d->needs_rearm = false;
1841
1842         a = prioq_peek(d->earliest);
1843         if (!a || a->enabled == SD_EVENT_OFF) {
1844
1845                 if (d->fd < 0)
1846                         return 0;
1847
1848                 if (d->next == USEC_INFINITY)
1849                         return 0;
1850
1851                 /* disarm */
1852                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1853                 if (r < 0)
1854                         return r;
1855
1856                 d->next = USEC_INFINITY;
1857                 return 0;
1858         }
1859
1860         b = prioq_peek(d->latest);
1861         assert_se(b && b->enabled != SD_EVENT_OFF);
1862
1863         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1864         if (d->next == t)
1865                 return 0;
1866
1867         assert_se(d->fd >= 0);
1868
1869         if (t == 0) {
1870                 /* We don' want to disarm here, just mean some time looooong ago. */
1871                 its.it_value.tv_sec = 0;
1872                 its.it_value.tv_nsec = 1;
1873         } else
1874                 timespec_store(&its.it_value, t);
1875
1876         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1877         if (r < 0)
1878                 return -errno;
1879
1880         d->next = t;
1881         return 0;
1882 }
1883
1884 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1885         assert(e);
1886         assert(s);
1887         assert(s->type == SOURCE_IO);
1888
1889         /* If the event source was already pending, we just OR in the
1890          * new revents, otherwise we reset the value. The ORing is
1891          * necessary to handle EPOLLONESHOT events properly where
1892          * readability might happen independently of writability, and
1893          * we need to keep track of both */
1894
1895         if (s->pending)
1896                 s->io.revents |= revents;
1897         else
1898                 s->io.revents = revents;
1899
1900         return source_set_pending(s, true);
1901 }
1902
1903 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1904         uint64_t x;
1905         ssize_t ss;
1906
1907         assert(e);
1908         assert(fd >= 0);
1909
1910         assert_return(events == EPOLLIN, -EIO);
1911
1912         ss = read(fd, &x, sizeof(x));
1913         if (ss < 0) {
1914                 if (errno == EAGAIN || errno == EINTR)
1915                         return 0;
1916
1917                 return -errno;
1918         }
1919
1920         if (_unlikely_(ss != sizeof(x)))
1921                 return -EIO;
1922
1923         if (next)
1924                 *next = USEC_INFINITY;
1925
1926         return 0;
1927 }
1928
1929 static int process_timer(
1930                 sd_event *e,
1931                 usec_t n,
1932                 struct clock_data *d) {
1933
1934         sd_event_source *s;
1935         int r;
1936
1937         assert(e);
1938         assert(d);
1939
1940         for (;;) {
1941                 s = prioq_peek(d->earliest);
1942                 if (!s ||
1943                     s->time.next > n ||
1944                     s->enabled == SD_EVENT_OFF ||
1945                     s->pending)
1946                         break;
1947
1948                 r = source_set_pending(s, true);
1949                 if (r < 0)
1950                         return r;
1951
1952                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1953                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1954                 d->needs_rearm = true;
1955         }
1956
1957         return 0;
1958 }
1959
1960 static int process_child(sd_event *e) {
1961         sd_event_source *s;
1962         Iterator i;
1963         int r;
1964
1965         assert(e);
1966
1967         e->need_process_child = false;
1968
1969         /*
1970            So, this is ugly. We iteratively invoke waitid() with P_PID
1971            + WNOHANG for each PID we wait for, instead of using
1972            P_ALL. This is because we only want to get child
1973            information of very specific child processes, and not all
1974            of them. We might not have processed the SIGCHLD even of a
1975            previous invocation and we don't want to maintain a
1976            unbounded *per-child* event queue, hence we really don't
1977            want anything flushed out of the kernel's queue that we
1978            don't care about. Since this is O(n) this means that if you
1979            have a lot of processes you probably want to handle SIGCHLD
1980            yourself.
1981
1982            We do not reap the children here (by using WNOWAIT), this
1983            is only done after the event source is dispatched so that
1984            the callback still sees the process as a zombie.
1985         */
1986
1987         HASHMAP_FOREACH(s, e->child_sources, i) {
1988                 assert(s->type == SOURCE_CHILD);
1989
1990                 if (s->pending)
1991                         continue;
1992
1993                 if (s->enabled == SD_EVENT_OFF)
1994                         continue;
1995
1996                 zero(s->child.siginfo);
1997                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1998                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1999                 if (r < 0)
2000                         return -errno;
2001
2002                 if (s->child.siginfo.si_pid != 0) {
2003                         bool zombie =
2004                                 s->child.siginfo.si_code == CLD_EXITED ||
2005                                 s->child.siginfo.si_code == CLD_KILLED ||
2006                                 s->child.siginfo.si_code == CLD_DUMPED;
2007
2008                         if (!zombie && (s->child.options & WEXITED)) {
2009                                 /* If the child isn't dead then let's
2010                                  * immediately remove the state change
2011                                  * from the queue, since there's no
2012                                  * benefit in leaving it queued */
2013
2014                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2015                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2016                         }
2017
2018                         r = source_set_pending(s, true);
2019                         if (r < 0)
2020                                 return r;
2021                 }
2022         }
2023
2024         return 0;
2025 }
2026
2027 static int process_signal(sd_event *e, uint32_t events) {
2028         bool read_one = false;
2029         int r;
2030
2031         assert(e);
2032
2033         assert_return(events == EPOLLIN, -EIO);
2034
2035         for (;;) {
2036                 struct signalfd_siginfo si;
2037                 ssize_t n;
2038                 sd_event_source *s = NULL;
2039
2040                 n = read(e->signal_fd, &si, sizeof(si));
2041                 if (n < 0) {
2042                         if (errno == EAGAIN || errno == EINTR)
2043                                 return read_one;
2044
2045                         return -errno;
2046                 }
2047
2048                 if (_unlikely_(n != sizeof(si)))
2049                         return -EIO;
2050
2051                 assert(si.ssi_signo < _NSIG);
2052
2053                 read_one = true;
2054
2055                 if (si.ssi_signo == SIGCHLD) {
2056                         r = process_child(e);
2057                         if (r < 0)
2058                                 return r;
2059                         if (r > 0)
2060                                 continue;
2061                 }
2062
2063                 if (e->signal_sources)
2064                         s = e->signal_sources[si.ssi_signo];
2065
2066                 if (!s)
2067                         continue;
2068
2069                 s->signal.siginfo = si;
2070                 r = source_set_pending(s, true);
2071                 if (r < 0)
2072                         return r;
2073         }
2074 }
2075
2076 static int source_dispatch(sd_event_source *s) {
2077         int r = 0;
2078
2079         assert(s);
2080         assert(s->pending || s->type == SOURCE_EXIT);
2081
2082         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2083                 r = source_set_pending(s, false);
2084                 if (r < 0)
2085                         return r;
2086         }
2087
2088         if (s->type != SOURCE_POST) {
2089                 sd_event_source *z;
2090                 Iterator i;
2091
2092                 /* If we execute a non-post source, let's mark all
2093                  * post sources as pending */
2094
2095                 SET_FOREACH(z, s->event->post_sources, i) {
2096                         if (z->enabled == SD_EVENT_OFF)
2097                                 continue;
2098
2099                         r = source_set_pending(z, true);
2100                         if (r < 0)
2101                                 return r;
2102                 }
2103         }
2104
2105         if (s->enabled == SD_EVENT_ONESHOT) {
2106                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2107                 if (r < 0)
2108                         return r;
2109         }
2110
2111         s->dispatching = true;
2112
2113         switch (s->type) {
2114
2115         case SOURCE_IO:
2116                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2117                 break;
2118
2119         case SOURCE_TIME_REALTIME:
2120         case SOURCE_TIME_BOOTTIME:
2121         case SOURCE_TIME_MONOTONIC:
2122         case SOURCE_TIME_REALTIME_ALARM:
2123         case SOURCE_TIME_BOOTTIME_ALARM:
2124                 r = s->time.callback(s, s->time.next, s->userdata);
2125                 break;
2126
2127         case SOURCE_SIGNAL:
2128                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2129                 break;
2130
2131         case SOURCE_CHILD: {
2132                 bool zombie;
2133
2134                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2135                          s->child.siginfo.si_code == CLD_KILLED ||
2136                          s->child.siginfo.si_code == CLD_DUMPED;
2137
2138                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2139
2140                 /* Now, reap the PID for good. */
2141                 if (zombie)
2142                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2143
2144                 break;
2145         }
2146
2147         case SOURCE_DEFER:
2148                 r = s->defer.callback(s, s->userdata);
2149                 break;
2150
2151         case SOURCE_POST:
2152                 r = s->post.callback(s, s->userdata);
2153                 break;
2154
2155         case SOURCE_EXIT:
2156                 r = s->exit.callback(s, s->userdata);
2157                 break;
2158
2159         case SOURCE_WATCHDOG:
2160         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2161         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2162                 assert_not_reached("Wut? I shouldn't exist.");
2163         }
2164
2165         s->dispatching = false;
2166
2167         if (r < 0) {
2168                 if (s->description)
2169                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2170                 else
2171                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2172         }
2173
2174         if (s->n_ref == 0)
2175                 source_free(s);
2176         else if (r < 0)
2177                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2178
2179         return 1;
2180 }
2181
2182 static int event_prepare(sd_event *e) {
2183         int r;
2184
2185         assert(e);
2186
2187         for (;;) {
2188                 sd_event_source *s;
2189
2190                 s = prioq_peek(e->prepare);
2191                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2192                         break;
2193
2194                 s->prepare_iteration = e->iteration;
2195                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2196                 if (r < 0)
2197                         return r;
2198
2199                 assert(s->prepare);
2200
2201                 s->dispatching = true;
2202                 r = s->prepare(s, s->userdata);
2203                 s->dispatching = false;
2204
2205                 if (r < 0) {
2206                         if (s->description)
2207                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2208                         else
2209                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2210                 }
2211
2212                 if (s->n_ref == 0)
2213                         source_free(s);
2214                 else if (r < 0)
2215                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2216         }
2217
2218         return 0;
2219 }
2220
2221 static int dispatch_exit(sd_event *e) {
2222         sd_event_source *p;
2223         int r;
2224
2225         assert(e);
2226
2227         p = prioq_peek(e->exit);
2228         if (!p || p->enabled == SD_EVENT_OFF) {
2229                 e->state = SD_EVENT_FINISHED;
2230                 return 0;
2231         }
2232
2233         sd_event_ref(e);
2234         e->iteration++;
2235         e->state = SD_EVENT_EXITING;
2236
2237         r = source_dispatch(p);
2238
2239         e->state = SD_EVENT_PASSIVE;
2240         sd_event_unref(e);
2241
2242         return r;
2243 }
2244
2245 static sd_event_source* event_next_pending(sd_event *e) {
2246         sd_event_source *p;
2247
2248         assert(e);
2249
2250         p = prioq_peek(e->pending);
2251         if (!p)
2252                 return NULL;
2253
2254         if (p->enabled == SD_EVENT_OFF)
2255                 return NULL;
2256
2257         return p;
2258 }
2259
2260 static int arm_watchdog(sd_event *e) {
2261         struct itimerspec its = {};
2262         usec_t t;
2263         int r;
2264
2265         assert(e);
2266         assert(e->watchdog_fd >= 0);
2267
2268         t = sleep_between(e,
2269                           e->watchdog_last + (e->watchdog_period / 2),
2270                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2271
2272         timespec_store(&its.it_value, t);
2273
2274         /* Make sure we never set the watchdog to 0, which tells the
2275          * kernel to disable it. */
2276         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2277                 its.it_value.tv_nsec = 1;
2278
2279         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2280         if (r < 0)
2281                 return -errno;
2282
2283         return 0;
2284 }
2285
2286 static int process_watchdog(sd_event *e) {
2287         assert(e);
2288
2289         if (!e->watchdog)
2290                 return 0;
2291
2292         /* Don't notify watchdog too often */
2293         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2294                 return 0;
2295
2296         sd_notify(false, "WATCHDOG=1");
2297         e->watchdog_last = e->timestamp.monotonic;
2298
2299         return arm_watchdog(e);
2300 }
2301
2302 _public_ int sd_event_prepare(sd_event *e) {
2303         int r;
2304
2305         assert_return(e, -EINVAL);
2306         assert_return(!event_pid_changed(e), -ECHILD);
2307         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2308         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2309
2310         if (e->exit_requested)
2311                 goto pending;
2312
2313         e->iteration++;
2314
2315         r = event_prepare(e);
2316         if (r < 0)
2317                 return r;
2318
2319         r = event_arm_timer(e, &e->realtime);
2320         if (r < 0)
2321                 return r;
2322
2323         r = event_arm_timer(e, &e->boottime);
2324         if (r < 0)
2325                 return r;
2326
2327         r = event_arm_timer(e, &e->monotonic);
2328         if (r < 0)
2329                 return r;
2330
2331         r = event_arm_timer(e, &e->realtime_alarm);
2332         if (r < 0)
2333                 return r;
2334
2335         r = event_arm_timer(e, &e->boottime_alarm);
2336         if (r < 0)
2337                 return r;
2338
2339         if (event_next_pending(e) || e->need_process_child)
2340                 goto pending;
2341
2342         e->state = SD_EVENT_PREPARED;
2343
2344         return 0;
2345
2346 pending:
2347         e->state = SD_EVENT_PREPARED;
2348         r = sd_event_wait(e, 0);
2349         if (r == 0)
2350                 e->state = SD_EVENT_PREPARED;
2351
2352         return r;
2353 }
2354
2355 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2356         struct epoll_event *ev_queue;
2357         unsigned ev_queue_max;
2358         int r, m, i;
2359
2360         assert_return(e, -EINVAL);
2361         assert_return(!event_pid_changed(e), -ECHILD);
2362         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2363         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2364
2365         if (e->exit_requested) {
2366                 e->state = SD_EVENT_PENDING;
2367                 return 1;
2368         }
2369
2370         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2371         ev_queue = newa(struct epoll_event, ev_queue_max);
2372
2373         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2374                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2375         if (m < 0) {
2376                 if (errno == EINTR) {
2377                         e->state = SD_EVENT_PENDING;
2378                         return 1;
2379                 }
2380
2381                 r = -errno;
2382
2383                 goto finish;
2384         }
2385
2386         dual_timestamp_get(&e->timestamp);
2387         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2388
2389         for (i = 0; i < m; i++) {
2390
2391                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2392                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2393                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2394                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2395                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2396                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2397                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2398                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2399                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2400                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2401                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2402                         r = process_signal(e, ev_queue[i].events);
2403                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2404                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2405                 else
2406                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2407
2408                 if (r < 0)
2409                         goto finish;
2410         }
2411
2412         r = process_watchdog(e);
2413         if (r < 0)
2414                 goto finish;
2415
2416         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2417         if (r < 0)
2418                 goto finish;
2419
2420         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2421         if (r < 0)
2422                 goto finish;
2423
2424         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2425         if (r < 0)
2426                 goto finish;
2427
2428         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2429         if (r < 0)
2430                 goto finish;
2431
2432         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2433         if (r < 0)
2434                 goto finish;
2435
2436         if (e->need_process_child) {
2437                 r = process_child(e);
2438                 if (r < 0)
2439                         goto finish;
2440         }
2441
2442         if (event_next_pending(e)) {
2443                 e->state = SD_EVENT_PENDING;
2444
2445                 return 1;
2446         }
2447
2448         r = 0;
2449
2450 finish:
2451         e->state = SD_EVENT_PASSIVE;
2452
2453         return r;
2454 }
2455
2456 _public_ int sd_event_dispatch(sd_event *e) {
2457         sd_event_source *p;
2458         int r;
2459
2460         assert_return(e, -EINVAL);
2461         assert_return(!event_pid_changed(e), -ECHILD);
2462         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2463         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2464
2465         if (e->exit_requested)
2466                 return dispatch_exit(e);
2467
2468         p = event_next_pending(e);
2469         if (p) {
2470                 sd_event_ref(e);
2471
2472                 e->state = SD_EVENT_RUNNING;
2473                 r = source_dispatch(p);
2474                 e->state = SD_EVENT_PASSIVE;
2475
2476                 sd_event_unref(e);
2477
2478                 return r;
2479         }
2480
2481         e->state = SD_EVENT_PASSIVE;
2482
2483         return 1;
2484 }
2485
2486 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2487         int r;
2488
2489         assert_return(e, -EINVAL);
2490         assert_return(!event_pid_changed(e), -ECHILD);
2491         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2492         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2493
2494         r = sd_event_prepare(e);
2495         if (r > 0)
2496                 return sd_event_dispatch(e);
2497         else if (r < 0)
2498                 return r;
2499
2500         r = sd_event_wait(e, timeout);
2501         if (r > 0)
2502                 return sd_event_dispatch(e);
2503         else
2504                 return r;
2505 }
2506
2507 _public_ int sd_event_loop(sd_event *e) {
2508         int r;
2509
2510         assert_return(e, -EINVAL);
2511         assert_return(!event_pid_changed(e), -ECHILD);
2512         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2513
2514         sd_event_ref(e);
2515
2516         while (e->state != SD_EVENT_FINISHED) {
2517                 r = sd_event_run(e, (uint64_t) -1);
2518                 if (r < 0)
2519                         goto finish;
2520         }
2521
2522         r = e->exit_code;
2523
2524 finish:
2525         sd_event_unref(e);
2526         return r;
2527 }
2528
2529 _public_ int sd_event_get_fd(sd_event *e) {
2530
2531         assert_return(e, -EINVAL);
2532         assert_return(!event_pid_changed(e), -ECHILD);
2533
2534         return e->epoll_fd;
2535 }
2536
2537 _public_ int sd_event_get_state(sd_event *e) {
2538         assert_return(e, -EINVAL);
2539         assert_return(!event_pid_changed(e), -ECHILD);
2540
2541         return e->state;
2542 }
2543
2544 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2545         assert_return(e, -EINVAL);
2546         assert_return(code, -EINVAL);
2547         assert_return(!event_pid_changed(e), -ECHILD);
2548
2549         if (!e->exit_requested)
2550                 return -ENODATA;
2551
2552         *code = e->exit_code;
2553         return 0;
2554 }
2555
2556 _public_ int sd_event_exit(sd_event *e, int code) {
2557         assert_return(e, -EINVAL);
2558         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2559         assert_return(!event_pid_changed(e), -ECHILD);
2560
2561         e->exit_requested = true;
2562         e->exit_code = code;
2563
2564         return 0;
2565 }
2566
2567 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2568         assert_return(e, -EINVAL);
2569         assert_return(usec, -EINVAL);
2570         assert_return(!event_pid_changed(e), -ECHILD);
2571
2572         /* If we haven't run yet, just get the actual time */
2573         if (!dual_timestamp_is_set(&e->timestamp))
2574                 return -ENODATA;
2575
2576         switch (clock) {
2577
2578         case CLOCK_REALTIME:
2579         case CLOCK_REALTIME_ALARM:
2580                 *usec = e->timestamp.realtime;
2581                 break;
2582
2583         case CLOCK_MONOTONIC:
2584                 *usec = e->timestamp.monotonic;
2585                 break;
2586
2587         case CLOCK_BOOTTIME:
2588         case CLOCK_BOOTTIME_ALARM:
2589                 *usec = e->timestamp_boottime;
2590                 break;
2591         }
2592
2593         return 0;
2594 }
2595
2596 _public_ int sd_event_default(sd_event **ret) {
2597
2598         static thread_local sd_event *default_event = NULL;
2599         sd_event *e = NULL;
2600         int r;
2601
2602         if (!ret)
2603                 return !!default_event;
2604
2605         if (default_event) {
2606                 *ret = sd_event_ref(default_event);
2607                 return 0;
2608         }
2609
2610         r = sd_event_new(&e);
2611         if (r < 0)
2612                 return r;
2613
2614         e->default_event_ptr = &default_event;
2615         e->tid = gettid();
2616         default_event = e;
2617
2618         *ret = e;
2619         return 1;
2620 }
2621
2622 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2623         assert_return(e, -EINVAL);
2624         assert_return(tid, -EINVAL);
2625         assert_return(!event_pid_changed(e), -ECHILD);
2626
2627         if (e->tid != 0) {
2628                 *tid = e->tid;
2629                 return 0;
2630         }
2631
2632         return -ENXIO;
2633 }
2634
2635 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2636         int r;
2637
2638         assert_return(e, -EINVAL);
2639         assert_return(!event_pid_changed(e), -ECHILD);
2640
2641         if (e->watchdog == !!b)
2642                 return e->watchdog;
2643
2644         if (b) {
2645                 struct epoll_event ev = {};
2646
2647                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2648                 if (r <= 0)
2649                         return r;
2650
2651                 /* Issue first ping immediately */
2652                 sd_notify(false, "WATCHDOG=1");
2653                 e->watchdog_last = now(CLOCK_MONOTONIC);
2654
2655                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2656                 if (e->watchdog_fd < 0)
2657                         return -errno;
2658
2659                 r = arm_watchdog(e);
2660                 if (r < 0)
2661                         goto fail;
2662
2663                 ev.events = EPOLLIN;
2664                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2665
2666                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2667                 if (r < 0) {
2668                         r = -errno;
2669                         goto fail;
2670                 }
2671
2672         } else {
2673                 if (e->watchdog_fd >= 0) {
2674                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2675                         e->watchdog_fd = safe_close(e->watchdog_fd);
2676                 }
2677         }
2678
2679         e->watchdog = !!b;
2680         return e->watchdog;
2681
2682 fail:
2683         e->watchdog_fd = safe_close(e->watchdog_fd);
2684         return r;
2685 }
2686
2687 _public_ int sd_event_get_watchdog(sd_event *e) {
2688         assert_return(e, -EINVAL);
2689         assert_return(!event_pid_changed(e), -ECHILD);
2690
2691         return e->watchdog;
2692 }