chiark / gitweb /
bf6f01cfadb68a289a0b19fffeb695f3a0a4147a
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         EventSourceType type:5;
70         int enabled:3;
71         bool pending:1;
72         bool dispatching:1;
73         bool floating:1;
74
75         int64_t priority;
76         unsigned pending_index;
77         unsigned prepare_index;
78         unsigned pending_iteration;
79         unsigned prepare_iteration;
80
81         LIST_FIELDS(sd_event_source, sources);
82
83         union {
84                 struct {
85                         sd_event_io_handler_t callback;
86                         int fd;
87                         uint32_t events;
88                         uint32_t revents;
89                         bool registered:1;
90                 } io;
91                 struct {
92                         sd_event_time_handler_t callback;
93                         usec_t next, accuracy;
94                         unsigned earliest_index;
95                         unsigned latest_index;
96                 } time;
97                 struct {
98                         sd_event_signal_handler_t callback;
99                         struct signalfd_siginfo siginfo;
100                         int sig;
101                 } signal;
102                 struct {
103                         sd_event_child_handler_t callback;
104                         siginfo_t siginfo;
105                         pid_t pid;
106                         int options;
107                 } child;
108                 struct {
109                         sd_event_handler_t callback;
110                 } defer;
111                 struct {
112                         sd_event_handler_t callback;
113                 } post;
114                 struct {
115                         sd_event_handler_t callback;
116                         unsigned prioq_index;
117                 } exit;
118         };
119 };
120
121 struct clock_data {
122         int fd;
123
124         /* For all clocks we maintain two priority queues each, one
125          * ordered for the earliest times the events may be
126          * dispatched, and one ordered by the latest times they must
127          * have been dispatched. The range between the top entries in
128          * the two prioqs is the time window we can freely schedule
129          * wakeups in */
130
131         Prioq *earliest;
132         Prioq *latest;
133         usec_t next;
134 };
135
136 struct sd_event {
137         unsigned n_ref;
138
139         int epoll_fd;
140         int signal_fd;
141         int watchdog_fd;
142
143         Prioq *pending;
144         Prioq *prepare;
145
146         /* timerfd_create() only supports these five clocks so far. We
147          * can add support for more clocks when the kernel learns to
148          * deal with them, too. */
149         struct clock_data realtime;
150         struct clock_data boottime;
151         struct clock_data monotonic;
152         struct clock_data realtime_alarm;
153         struct clock_data boottime_alarm;
154
155         usec_t perturb;
156
157         sigset_t sigset;
158         sd_event_source **signal_sources;
159
160         Hashmap *child_sources;
161         unsigned n_enabled_child_sources;
162
163         Set *post_sources;
164
165         Prioq *exit;
166
167         pid_t original_pid;
168
169         unsigned iteration;
170         dual_timestamp timestamp;
171         usec_t timestamp_boottime;
172         int state;
173
174         bool exit_requested:1;
175         bool need_process_child:1;
176         bool watchdog:1;
177
178         int exit_code;
179
180         pid_t tid;
181         sd_event **default_event_ptr;
182
183         usec_t watchdog_last, watchdog_period;
184
185         unsigned n_sources;
186
187         LIST_HEAD(sd_event_source, sources);
188 };
189
190 static void source_disconnect(sd_event_source *s);
191
192 static int pending_prioq_compare(const void *a, const void *b) {
193         const sd_event_source *x = a, *y = b;
194
195         assert(x->pending);
196         assert(y->pending);
197
198         /* Enabled ones first */
199         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
200                 return -1;
201         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
202                 return 1;
203
204         /* Lower priority values first */
205         if (x->priority < y->priority)
206                 return -1;
207         if (x->priority > y->priority)
208                 return 1;
209
210         /* Older entries first */
211         if (x->pending_iteration < y->pending_iteration)
212                 return -1;
213         if (x->pending_iteration > y->pending_iteration)
214                 return 1;
215
216         /* Stability for the rest */
217         if (x < y)
218                 return -1;
219         if (x > y)
220                 return 1;
221
222         return 0;
223 }
224
225 static int prepare_prioq_compare(const void *a, const void *b) {
226         const sd_event_source *x = a, *y = b;
227
228         assert(x->prepare);
229         assert(y->prepare);
230
231         /* Move most recently prepared ones last, so that we can stop
232          * preparing as soon as we hit one that has already been
233          * prepared in the current iteration */
234         if (x->prepare_iteration < y->prepare_iteration)
235                 return -1;
236         if (x->prepare_iteration > y->prepare_iteration)
237                 return 1;
238
239         /* Enabled ones first */
240         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
241                 return -1;
242         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
243                 return 1;
244
245         /* Lower priority values first */
246         if (x->priority < y->priority)
247                 return -1;
248         if (x->priority > y->priority)
249                 return 1;
250
251         /* Stability for the rest */
252         if (x < y)
253                 return -1;
254         if (x > y)
255                 return 1;
256
257         return 0;
258 }
259
260 static int earliest_time_prioq_compare(const void *a, const void *b) {
261         const sd_event_source *x = a, *y = b;
262
263         assert(EVENT_SOURCE_IS_TIME(x->type));
264         assert(x->type == y->type);
265
266         /* Enabled ones first */
267         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
268                 return -1;
269         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
270                 return 1;
271
272         /* Move the pending ones to the end */
273         if (!x->pending && y->pending)
274                 return -1;
275         if (x->pending && !y->pending)
276                 return 1;
277
278         /* Order by time */
279         if (x->time.next < y->time.next)
280                 return -1;
281         if (x->time.next > y->time.next)
282                 return 1;
283
284         /* Stability for the rest */
285         if (x < y)
286                 return -1;
287         if (x > y)
288                 return 1;
289
290         return 0;
291 }
292
293 static int latest_time_prioq_compare(const void *a, const void *b) {
294         const sd_event_source *x = a, *y = b;
295
296         assert(EVENT_SOURCE_IS_TIME(x->type));
297         assert(x->type == y->type);
298
299         /* Enabled ones first */
300         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
301                 return -1;
302         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
303                 return 1;
304
305         /* Move the pending ones to the end */
306         if (!x->pending && y->pending)
307                 return -1;
308         if (x->pending && !y->pending)
309                 return 1;
310
311         /* Order by time */
312         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
313                 return -1;
314         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
315                 return 1;
316
317         /* Stability for the rest */
318         if (x < y)
319                 return -1;
320         if (x > y)
321                 return 1;
322
323         return 0;
324 }
325
326 static int exit_prioq_compare(const void *a, const void *b) {
327         const sd_event_source *x = a, *y = b;
328
329         assert(x->type == SOURCE_EXIT);
330         assert(y->type == SOURCE_EXIT);
331
332         /* Enabled ones first */
333         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
334                 return -1;
335         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
336                 return 1;
337
338         /* Lower priority values first */
339         if (x->priority < y->priority)
340                 return -1;
341         if (x->priority > y->priority)
342                 return 1;
343
344         /* Stability for the rest */
345         if (x < y)
346                 return -1;
347         if (x > y)
348                 return 1;
349
350         return 0;
351 }
352
353 static void free_clock_data(struct clock_data *d) {
354         assert(d);
355
356         safe_close(d->fd);
357         prioq_free(d->earliest);
358         prioq_free(d->latest);
359 }
360
361 static void event_free(sd_event *e) {
362         sd_event_source *s;
363
364         assert(e);
365
366         while ((s = e->sources)) {
367                 assert(s->floating);
368                 source_disconnect(s);
369                 sd_event_source_unref(s);
370         }
371
372         assert(e->n_sources == 0);
373
374         if (e->default_event_ptr)
375                 *(e->default_event_ptr) = NULL;
376
377         safe_close(e->epoll_fd);
378         safe_close(e->signal_fd);
379         safe_close(e->watchdog_fd);
380
381         free_clock_data(&e->realtime);
382         free_clock_data(&e->boottime);
383         free_clock_data(&e->monotonic);
384         free_clock_data(&e->realtime_alarm);
385         free_clock_data(&e->boottime_alarm);
386
387         prioq_free(e->pending);
388         prioq_free(e->prepare);
389         prioq_free(e->exit);
390
391         free(e->signal_sources);
392
393         hashmap_free(e->child_sources);
394         set_free(e->post_sources);
395         free(e);
396 }
397
398 _public_ int sd_event_new(sd_event** ret) {
399         sd_event *e;
400         int r;
401
402         assert_return(ret, -EINVAL);
403
404         e = new0(sd_event, 1);
405         if (!e)
406                 return -ENOMEM;
407
408         e->n_ref = 1;
409         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
410         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
411         e->original_pid = getpid();
412         e->perturb = USEC_INFINITY;
413
414         assert_se(sigemptyset(&e->sigset) == 0);
415
416         e->pending = prioq_new(pending_prioq_compare);
417         if (!e->pending) {
418                 r = -ENOMEM;
419                 goto fail;
420         }
421
422         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
423         if (e->epoll_fd < 0) {
424                 r = -errno;
425                 goto fail;
426         }
427
428         *ret = e;
429         return 0;
430
431 fail:
432         event_free(e);
433         return r;
434 }
435
436 _public_ sd_event* sd_event_ref(sd_event *e) {
437         assert_return(e, NULL);
438
439         assert(e->n_ref >= 1);
440         e->n_ref++;
441
442         return e;
443 }
444
445 _public_ sd_event* sd_event_unref(sd_event *e) {
446
447         if (!e)
448                 return NULL;
449
450         assert(e->n_ref >= 1);
451         e->n_ref--;
452
453         if (e->n_ref <= 0)
454                 event_free(e);
455
456         return NULL;
457 }
458
459 static bool event_pid_changed(sd_event *e) {
460         assert(e);
461
462         /* We don't support people creating am event loop and keeping
463          * it around over a fork(). Let's complain. */
464
465         return e->original_pid != getpid();
466 }
467
468 static int source_io_unregister(sd_event_source *s) {
469         int r;
470
471         assert(s);
472         assert(s->type == SOURCE_IO);
473
474         if (!s->io.registered)
475                 return 0;
476
477         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
478         if (r < 0)
479                 return -errno;
480
481         s->io.registered = false;
482         return 0;
483 }
484
485 static int source_io_register(
486                 sd_event_source *s,
487                 int enabled,
488                 uint32_t events) {
489
490         struct epoll_event ev = {};
491         int r;
492
493         assert(s);
494         assert(s->type == SOURCE_IO);
495         assert(enabled != SD_EVENT_OFF);
496
497         ev.events = events;
498         ev.data.ptr = s;
499
500         if (enabled == SD_EVENT_ONESHOT)
501                 ev.events |= EPOLLONESHOT;
502
503         if (s->io.registered)
504                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
505         else
506                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
507
508         if (r < 0)
509                 return -errno;
510
511         s->io.registered = true;
512
513         return 0;
514 }
515
516 static clockid_t event_source_type_to_clock(EventSourceType t) {
517
518         switch (t) {
519
520         case SOURCE_TIME_REALTIME:
521                 return CLOCK_REALTIME;
522
523         case SOURCE_TIME_BOOTTIME:
524                 return CLOCK_BOOTTIME;
525
526         case SOURCE_TIME_MONOTONIC:
527                 return CLOCK_MONOTONIC;
528
529         case SOURCE_TIME_REALTIME_ALARM:
530                 return CLOCK_REALTIME_ALARM;
531
532         case SOURCE_TIME_BOOTTIME_ALARM:
533                 return CLOCK_BOOTTIME_ALARM;
534
535         default:
536                 return (clockid_t) -1;
537         }
538 }
539
540 static EventSourceType clock_to_event_source_type(clockid_t clock) {
541
542         switch (clock) {
543
544         case CLOCK_REALTIME:
545                 return SOURCE_TIME_REALTIME;
546
547         case CLOCK_BOOTTIME:
548                 return SOURCE_TIME_BOOTTIME;
549
550         case CLOCK_MONOTONIC:
551                 return SOURCE_TIME_MONOTONIC;
552
553         case CLOCK_REALTIME_ALARM:
554                 return SOURCE_TIME_REALTIME_ALARM;
555
556         case CLOCK_BOOTTIME_ALARM:
557                 return SOURCE_TIME_BOOTTIME_ALARM;
558
559         default:
560                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
561         }
562 }
563
564 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
565         assert(e);
566
567         switch (t) {
568
569         case SOURCE_TIME_REALTIME:
570                 return &e->realtime;
571
572         case SOURCE_TIME_BOOTTIME:
573                 return &e->boottime;
574
575         case SOURCE_TIME_MONOTONIC:
576                 return &e->monotonic;
577
578         case SOURCE_TIME_REALTIME_ALARM:
579                 return &e->realtime_alarm;
580
581         case SOURCE_TIME_BOOTTIME_ALARM:
582                 return &e->boottime_alarm;
583
584         default:
585                 return NULL;
586         }
587 }
588
589 static void source_disconnect(sd_event_source *s) {
590         sd_event *event;
591
592         assert(s);
593
594         if (!s->event)
595                 return;
596
597         assert(s->event->n_sources > 0);
598
599         switch (s->type) {
600
601         case SOURCE_IO:
602                 if (s->io.fd >= 0)
603                         source_io_unregister(s);
604
605                 break;
606
607         case SOURCE_TIME_REALTIME:
608         case SOURCE_TIME_BOOTTIME:
609         case SOURCE_TIME_MONOTONIC:
610         case SOURCE_TIME_REALTIME_ALARM:
611         case SOURCE_TIME_BOOTTIME_ALARM: {
612                 struct clock_data *d;
613
614                 d = event_get_clock_data(s->event, s->type);
615                 assert(d);
616
617                 prioq_remove(d->earliest, s, &s->time.earliest_index);
618                 prioq_remove(d->latest, s, &s->time.latest_index);
619                 break;
620         }
621
622         case SOURCE_SIGNAL:
623                 if (s->signal.sig > 0) {
624                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
625                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
626
627                         if (s->event->signal_sources)
628                                 s->event->signal_sources[s->signal.sig] = NULL;
629                 }
630
631                 break;
632
633         case SOURCE_CHILD:
634                 if (s->child.pid > 0) {
635                         if (s->enabled != SD_EVENT_OFF) {
636                                 assert(s->event->n_enabled_child_sources > 0);
637                                 s->event->n_enabled_child_sources--;
638                         }
639
640                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
641                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
642
643                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
644                 }
645
646                 break;
647
648         case SOURCE_DEFER:
649                 /* nothing */
650                 break;
651
652         case SOURCE_POST:
653                 set_remove(s->event->post_sources, s);
654                 break;
655
656         case SOURCE_EXIT:
657                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
658                 break;
659
660         default:
661                 assert_not_reached("Wut? I shouldn't exist.");
662         }
663
664         if (s->pending)
665                 prioq_remove(s->event->pending, s, &s->pending_index);
666
667         if (s->prepare)
668                 prioq_remove(s->event->prepare, s, &s->prepare_index);
669
670         event = s->event;
671
672         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
673         s->event = NULL;
674         LIST_REMOVE(sources, event->sources, s);
675         event->n_sources--;
676
677         if (!s->floating)
678                 sd_event_unref(event);
679 }
680
681 static void source_free(sd_event_source *s) {
682         assert(s);
683
684         source_disconnect(s);
685         free(s);
686 }
687
688 static int source_set_pending(sd_event_source *s, bool b) {
689         int r;
690
691         assert(s);
692         assert(s->type != SOURCE_EXIT);
693
694         if (s->pending == b)
695                 return 0;
696
697         s->pending = b;
698
699         if (b) {
700                 s->pending_iteration = s->event->iteration;
701
702                 r = prioq_put(s->event->pending, s, &s->pending_index);
703                 if (r < 0) {
704                         s->pending = false;
705                         return r;
706                 }
707         } else
708                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
709
710         if (EVENT_SOURCE_IS_TIME(s->type)) {
711                 struct clock_data *d;
712
713                 d = event_get_clock_data(s->event, s->type);
714                 assert(d);
715
716                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
717                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
718         }
719
720         return 0;
721 }
722
723 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
724         sd_event_source *s;
725
726         assert(e);
727
728         s = new0(sd_event_source, 1);
729         if (!s)
730                 return NULL;
731
732         s->n_ref = 1;
733         s->event = e;
734         s->floating = floating;
735         s->type = type;
736         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
737
738         if (!floating)
739                 sd_event_ref(e);
740
741         LIST_PREPEND(sources, e->sources, s);
742         e->n_sources ++;
743
744         return s;
745 }
746
747 _public_ int sd_event_add_io(
748                 sd_event *e,
749                 sd_event_source **ret,
750                 int fd,
751                 uint32_t events,
752                 sd_event_io_handler_t callback,
753                 void *userdata) {
754
755         sd_event_source *s;
756         int r;
757
758         assert_return(e, -EINVAL);
759         assert_return(fd >= 0, -EINVAL);
760         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
761         assert_return(callback, -EINVAL);
762         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
763         assert_return(!event_pid_changed(e), -ECHILD);
764
765         s = source_new(e, !ret, SOURCE_IO);
766         if (!s)
767                 return -ENOMEM;
768
769         s->io.fd = fd;
770         s->io.events = events;
771         s->io.callback = callback;
772         s->userdata = userdata;
773         s->enabled = SD_EVENT_ON;
774
775         r = source_io_register(s, s->enabled, events);
776         if (r < 0) {
777                 source_free(s);
778                 return -errno;
779         }
780
781         if (ret)
782                 *ret = s;
783
784         return 0;
785 }
786
787 static void initialize_perturb(sd_event *e) {
788         sd_id128_t bootid = {};
789
790         /* When we sleep for longer, we try to realign the wakeup to
791            the same time wihtin each minute/second/250ms, so that
792            events all across the system can be coalesced into a single
793            CPU wakeup. However, let's take some system-specific
794            randomness for this value, so that in a network of systems
795            with synced clocks timer events are distributed a
796            bit. Here, we calculate a perturbation usec offset from the
797            boot ID. */
798
799         if (_likely_(e->perturb != USEC_INFINITY))
800                 return;
801
802         if (sd_id128_get_boot(&bootid) >= 0)
803                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
804 }
805
806 static int event_setup_timer_fd(
807                 sd_event *e,
808                 struct clock_data *d,
809                 clockid_t clock) {
810
811         struct epoll_event ev = {};
812         int r, fd;
813
814         assert(e);
815         assert(d);
816
817         if (_likely_(d->fd >= 0))
818                 return 0;
819
820         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
821         if (fd < 0)
822                 return -errno;
823
824         ev.events = EPOLLIN;
825         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
826
827         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
828         if (r < 0) {
829                 safe_close(fd);
830                 return -errno;
831         }
832
833         d->fd = fd;
834         return 0;
835 }
836
837 _public_ int sd_event_add_time(
838                 sd_event *e,
839                 sd_event_source **ret,
840                 clockid_t clock,
841                 uint64_t usec,
842                 uint64_t accuracy,
843                 sd_event_time_handler_t callback,
844                 void *userdata) {
845
846         EventSourceType type;
847         sd_event_source *s;
848         struct clock_data *d;
849         int r;
850
851         assert_return(e, -EINVAL);
852         assert_return(usec != (uint64_t) -1, -EINVAL);
853         assert_return(accuracy != (uint64_t) -1, -EINVAL);
854         assert_return(callback, -EINVAL);
855         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
856         assert_return(!event_pid_changed(e), -ECHILD);
857
858         type = clock_to_event_source_type(clock);
859         assert_return(type >= 0, -ENOTSUP);
860
861         d = event_get_clock_data(e, type);
862         assert(d);
863
864         if (!d->earliest) {
865                 d->earliest = prioq_new(earliest_time_prioq_compare);
866                 if (!d->earliest)
867                         return -ENOMEM;
868         }
869
870         if (!d->latest) {
871                 d->latest = prioq_new(latest_time_prioq_compare);
872                 if (!d->latest)
873                         return -ENOMEM;
874         }
875
876         if (d->fd < 0) {
877                 r = event_setup_timer_fd(e, d, clock);
878                 if (r < 0)
879                         return r;
880         }
881
882         s = source_new(e, !ret, type);
883         if (!s)
884                 return -ENOMEM;
885
886         s->time.next = usec;
887         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
888         s->time.callback = callback;
889         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
890         s->userdata = userdata;
891         s->enabled = SD_EVENT_ONESHOT;
892
893         r = prioq_put(d->earliest, s, &s->time.earliest_index);
894         if (r < 0)
895                 goto fail;
896
897         r = prioq_put(d->latest, s, &s->time.latest_index);
898         if (r < 0)
899                 goto fail;
900
901         if (ret)
902                 *ret = s;
903
904         return 0;
905
906 fail:
907         source_free(s);
908         return r;
909 }
910
911 static int event_update_signal_fd(sd_event *e) {
912         struct epoll_event ev = {};
913         bool add_to_epoll;
914         int r;
915
916         assert(e);
917
918         add_to_epoll = e->signal_fd < 0;
919
920         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
921         if (r < 0)
922                 return -errno;
923
924         e->signal_fd = r;
925
926         if (!add_to_epoll)
927                 return 0;
928
929         ev.events = EPOLLIN;
930         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
931
932         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
933         if (r < 0) {
934                 e->signal_fd = safe_close(e->signal_fd);
935                 return -errno;
936         }
937
938         return 0;
939 }
940
941 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
942         assert(s);
943
944         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
945 }
946
947 _public_ int sd_event_add_signal(
948                 sd_event *e,
949                 sd_event_source **ret,
950                 int sig,
951                 sd_event_signal_handler_t callback,
952                 void *userdata) {
953
954         sd_event_source *s;
955         sigset_t ss;
956         int r;
957
958         assert_return(e, -EINVAL);
959         assert_return(sig > 0, -EINVAL);
960         assert_return(sig < _NSIG, -EINVAL);
961         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
962         assert_return(!event_pid_changed(e), -ECHILD);
963
964         if (!callback)
965                 callback = signal_exit_callback;
966
967         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
968         if (r < 0)
969                 return -errno;
970
971         if (!sigismember(&ss, sig))
972                 return -EBUSY;
973
974         if (!e->signal_sources) {
975                 e->signal_sources = new0(sd_event_source*, _NSIG);
976                 if (!e->signal_sources)
977                         return -ENOMEM;
978         } else if (e->signal_sources[sig])
979                 return -EBUSY;
980
981         s = source_new(e, !ret, SOURCE_SIGNAL);
982         if (!s)
983                 return -ENOMEM;
984
985         s->signal.sig = sig;
986         s->signal.callback = callback;
987         s->userdata = userdata;
988         s->enabled = SD_EVENT_ON;
989
990         e->signal_sources[sig] = s;
991         assert_se(sigaddset(&e->sigset, sig) == 0);
992
993         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
994                 r = event_update_signal_fd(e);
995                 if (r < 0) {
996                         source_free(s);
997                         return r;
998                 }
999         }
1000
1001         if (ret)
1002                 *ret = s;
1003
1004         return 0;
1005 }
1006
1007 _public_ int sd_event_add_child(
1008                 sd_event *e,
1009                 sd_event_source **ret,
1010                 pid_t pid,
1011                 int options,
1012                 sd_event_child_handler_t callback,
1013                 void *userdata) {
1014
1015         sd_event_source *s;
1016         int r;
1017
1018         assert_return(e, -EINVAL);
1019         assert_return(pid > 1, -EINVAL);
1020         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1021         assert_return(options != 0, -EINVAL);
1022         assert_return(callback, -EINVAL);
1023         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1024         assert_return(!event_pid_changed(e), -ECHILD);
1025
1026         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
1027         if (r < 0)
1028                 return r;
1029
1030         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1031                 return -EBUSY;
1032
1033         s = source_new(e, !ret, SOURCE_CHILD);
1034         if (!s)
1035                 return -ENOMEM;
1036
1037         s->child.pid = pid;
1038         s->child.options = options;
1039         s->child.callback = callback;
1040         s->userdata = userdata;
1041         s->enabled = SD_EVENT_ONESHOT;
1042
1043         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1044         if (r < 0) {
1045                 source_free(s);
1046                 return r;
1047         }
1048
1049         e->n_enabled_child_sources ++;
1050
1051         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1052
1053         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1054                 r = event_update_signal_fd(e);
1055                 if (r < 0) {
1056                         source_free(s);
1057                         return -errno;
1058                 }
1059         }
1060
1061         e->need_process_child = true;
1062
1063         if (ret)
1064                 *ret = s;
1065
1066         return 0;
1067 }
1068
1069 _public_ int sd_event_add_defer(
1070                 sd_event *e,
1071                 sd_event_source **ret,
1072                 sd_event_handler_t callback,
1073                 void *userdata) {
1074
1075         sd_event_source *s;
1076         int r;
1077
1078         assert_return(e, -EINVAL);
1079         assert_return(callback, -EINVAL);
1080         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1081         assert_return(!event_pid_changed(e), -ECHILD);
1082
1083         s = source_new(e, !ret, SOURCE_DEFER);
1084         if (!s)
1085                 return -ENOMEM;
1086
1087         s->defer.callback = callback;
1088         s->userdata = userdata;
1089         s->enabled = SD_EVENT_ONESHOT;
1090
1091         r = source_set_pending(s, true);
1092         if (r < 0) {
1093                 source_free(s);
1094                 return r;
1095         }
1096
1097         if (ret)
1098                 *ret = s;
1099
1100         return 0;
1101 }
1102
1103 _public_ int sd_event_add_post(
1104                 sd_event *e,
1105                 sd_event_source **ret,
1106                 sd_event_handler_t callback,
1107                 void *userdata) {
1108
1109         sd_event_source *s;
1110         int r;
1111
1112         assert_return(e, -EINVAL);
1113         assert_return(callback, -EINVAL);
1114         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1115         assert_return(!event_pid_changed(e), -ECHILD);
1116
1117         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1118         if (r < 0)
1119                 return r;
1120
1121         s = source_new(e, !ret, SOURCE_POST);
1122         if (!s)
1123                 return -ENOMEM;
1124
1125         s->post.callback = callback;
1126         s->userdata = userdata;
1127         s->enabled = SD_EVENT_ON;
1128
1129         r = set_put(e->post_sources, s);
1130         if (r < 0) {
1131                 source_free(s);
1132                 return r;
1133         }
1134
1135         if (ret)
1136                 *ret = s;
1137
1138         return 0;
1139 }
1140
1141 _public_ int sd_event_add_exit(
1142                 sd_event *e,
1143                 sd_event_source **ret,
1144                 sd_event_handler_t callback,
1145                 void *userdata) {
1146
1147         sd_event_source *s;
1148         int r;
1149
1150         assert_return(e, -EINVAL);
1151         assert_return(callback, -EINVAL);
1152         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1153         assert_return(!event_pid_changed(e), -ECHILD);
1154
1155         if (!e->exit) {
1156                 e->exit = prioq_new(exit_prioq_compare);
1157                 if (!e->exit)
1158                         return -ENOMEM;
1159         }
1160
1161         s = source_new(e, !ret, SOURCE_EXIT);
1162         if (!s)
1163                 return -ENOMEM;
1164
1165         s->exit.callback = callback;
1166         s->userdata = userdata;
1167         s->exit.prioq_index = PRIOQ_IDX_NULL;
1168         s->enabled = SD_EVENT_ONESHOT;
1169
1170         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1171         if (r < 0) {
1172                 source_free(s);
1173                 return r;
1174         }
1175
1176         if (ret)
1177                 *ret = s;
1178
1179         return 0;
1180 }
1181
1182 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1183         assert_return(s, NULL);
1184
1185         assert(s->n_ref >= 1);
1186         s->n_ref++;
1187
1188         return s;
1189 }
1190
1191 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1192
1193         if (!s)
1194                 return NULL;
1195
1196         assert(s->n_ref >= 1);
1197         s->n_ref--;
1198
1199         if (s->n_ref <= 0) {
1200                 /* Here's a special hack: when we are called from a
1201                  * dispatch handler we won't free the event source
1202                  * immediately, but we will detach the fd from the
1203                  * epoll. This way it is safe for the caller to unref
1204                  * the event source and immediately close the fd, but
1205                  * we still retain a valid event source object after
1206                  * the callback. */
1207
1208                 if (s->dispatching) {
1209                         if (s->type == SOURCE_IO)
1210                                 source_io_unregister(s);
1211
1212                         source_disconnect(s);
1213                 } else
1214                         source_free(s);
1215         }
1216
1217         return NULL;
1218 }
1219
1220 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1221         assert_return(s, NULL);
1222
1223         return s->event;
1224 }
1225
1226 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1227         assert_return(s, -EINVAL);
1228         assert_return(s->type != SOURCE_EXIT, -EDOM);
1229         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1230         assert_return(!event_pid_changed(s->event), -ECHILD);
1231
1232         return s->pending;
1233 }
1234
1235 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1236         assert_return(s, -EINVAL);
1237         assert_return(s->type == SOURCE_IO, -EDOM);
1238         assert_return(!event_pid_changed(s->event), -ECHILD);
1239
1240         return s->io.fd;
1241 }
1242
1243 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1244         int r;
1245
1246         assert_return(s, -EINVAL);
1247         assert_return(fd >= 0, -EINVAL);
1248         assert_return(s->type == SOURCE_IO, -EDOM);
1249         assert_return(!event_pid_changed(s->event), -ECHILD);
1250
1251         if (s->io.fd == fd)
1252                 return 0;
1253
1254         if (s->enabled == SD_EVENT_OFF) {
1255                 s->io.fd = fd;
1256                 s->io.registered = false;
1257         } else {
1258                 int saved_fd;
1259
1260                 saved_fd = s->io.fd;
1261                 assert(s->io.registered);
1262
1263                 s->io.fd = fd;
1264                 s->io.registered = false;
1265
1266                 r = source_io_register(s, s->enabled, s->io.events);
1267                 if (r < 0) {
1268                         s->io.fd = saved_fd;
1269                         s->io.registered = true;
1270                         return r;
1271                 }
1272
1273                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1274         }
1275
1276         return 0;
1277 }
1278
1279 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1280         assert_return(s, -EINVAL);
1281         assert_return(events, -EINVAL);
1282         assert_return(s->type == SOURCE_IO, -EDOM);
1283         assert_return(!event_pid_changed(s->event), -ECHILD);
1284
1285         *events = s->io.events;
1286         return 0;
1287 }
1288
1289 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1290         int r;
1291
1292         assert_return(s, -EINVAL);
1293         assert_return(s->type == SOURCE_IO, -EDOM);
1294         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1295         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1296         assert_return(!event_pid_changed(s->event), -ECHILD);
1297
1298         /* edge-triggered updates are never skipped, so we can reset edges */
1299         if (s->io.events == events && !(events & EPOLLET))
1300                 return 0;
1301
1302         if (s->enabled != SD_EVENT_OFF) {
1303                 r = source_io_register(s, s->enabled, events);
1304                 if (r < 0)
1305                         return r;
1306         }
1307
1308         s->io.events = events;
1309         source_set_pending(s, false);
1310
1311         return 0;
1312 }
1313
1314 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1315         assert_return(s, -EINVAL);
1316         assert_return(revents, -EINVAL);
1317         assert_return(s->type == SOURCE_IO, -EDOM);
1318         assert_return(s->pending, -ENODATA);
1319         assert_return(!event_pid_changed(s->event), -ECHILD);
1320
1321         *revents = s->io.revents;
1322         return 0;
1323 }
1324
1325 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1326         assert_return(s, -EINVAL);
1327         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1328         assert_return(!event_pid_changed(s->event), -ECHILD);
1329
1330         return s->signal.sig;
1331 }
1332
1333 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1334         assert_return(s, -EINVAL);
1335         assert_return(!event_pid_changed(s->event), -ECHILD);
1336
1337         return s->priority;
1338 }
1339
1340 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1341         assert_return(s, -EINVAL);
1342         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1343         assert_return(!event_pid_changed(s->event), -ECHILD);
1344
1345         if (s->priority == priority)
1346                 return 0;
1347
1348         s->priority = priority;
1349
1350         if (s->pending)
1351                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1352
1353         if (s->prepare)
1354                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1355
1356         if (s->type == SOURCE_EXIT)
1357                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1358
1359         return 0;
1360 }
1361
1362 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1363         assert_return(s, -EINVAL);
1364         assert_return(m, -EINVAL);
1365         assert_return(!event_pid_changed(s->event), -ECHILD);
1366
1367         *m = s->enabled;
1368         return 0;
1369 }
1370
1371 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1372         int r;
1373
1374         assert_return(s, -EINVAL);
1375         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1376         assert_return(!event_pid_changed(s->event), -ECHILD);
1377
1378         /* If we are dead anyway, we are fine with turning off
1379          * sources, but everything else needs to fail. */
1380         if (s->event->state == SD_EVENT_FINISHED)
1381                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1382
1383         if (s->enabled == m)
1384                 return 0;
1385
1386         if (m == SD_EVENT_OFF) {
1387
1388                 switch (s->type) {
1389
1390                 case SOURCE_IO:
1391                         r = source_io_unregister(s);
1392                         if (r < 0)
1393                                 return r;
1394
1395                         s->enabled = m;
1396                         break;
1397
1398                 case SOURCE_TIME_REALTIME:
1399                 case SOURCE_TIME_BOOTTIME:
1400                 case SOURCE_TIME_MONOTONIC:
1401                 case SOURCE_TIME_REALTIME_ALARM:
1402                 case SOURCE_TIME_BOOTTIME_ALARM: {
1403                         struct clock_data *d;
1404
1405                         s->enabled = m;
1406                         d = event_get_clock_data(s->event, s->type);
1407                         assert(d);
1408
1409                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1410                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1411                         break;
1412                 }
1413
1414                 case SOURCE_SIGNAL:
1415                         s->enabled = m;
1416                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1417                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1418                                 event_update_signal_fd(s->event);
1419                         }
1420
1421                         break;
1422
1423                 case SOURCE_CHILD:
1424                         s->enabled = m;
1425
1426                         assert(s->event->n_enabled_child_sources > 0);
1427                         s->event->n_enabled_child_sources--;
1428
1429                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1430                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1431                                 event_update_signal_fd(s->event);
1432                         }
1433
1434                         break;
1435
1436                 case SOURCE_EXIT:
1437                         s->enabled = m;
1438                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1439                         break;
1440
1441                 case SOURCE_DEFER:
1442                 case SOURCE_POST:
1443                         s->enabled = m;
1444                         break;
1445
1446                 default:
1447                         assert_not_reached("Wut? I shouldn't exist.");
1448                 }
1449
1450         } else {
1451                 switch (s->type) {
1452
1453                 case SOURCE_IO:
1454                         r = source_io_register(s, m, s->io.events);
1455                         if (r < 0)
1456                                 return r;
1457
1458                         s->enabled = m;
1459                         break;
1460
1461                 case SOURCE_TIME_REALTIME:
1462                 case SOURCE_TIME_BOOTTIME:
1463                 case SOURCE_TIME_MONOTONIC:
1464                 case SOURCE_TIME_REALTIME_ALARM:
1465                 case SOURCE_TIME_BOOTTIME_ALARM: {
1466                         struct clock_data *d;
1467
1468                         s->enabled = m;
1469                         d = event_get_clock_data(s->event, s->type);
1470                         assert(d);
1471
1472                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1473                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1474                         break;
1475                 }
1476
1477                 case SOURCE_SIGNAL:
1478                         s->enabled = m;
1479
1480                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1481                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1482                                 event_update_signal_fd(s->event);
1483                         }
1484                         break;
1485
1486                 case SOURCE_CHILD:
1487                         if (s->enabled == SD_EVENT_OFF) {
1488                                 s->event->n_enabled_child_sources++;
1489
1490                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1491                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1492                                         event_update_signal_fd(s->event);
1493                                 }
1494                         }
1495
1496                         s->enabled = m;
1497                         break;
1498
1499                 case SOURCE_EXIT:
1500                         s->enabled = m;
1501                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1502                         break;
1503
1504                 case SOURCE_DEFER:
1505                 case SOURCE_POST:
1506                         s->enabled = m;
1507                         break;
1508
1509                 default:
1510                         assert_not_reached("Wut? I shouldn't exist.");
1511                 }
1512         }
1513
1514         if (s->pending)
1515                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1516
1517         if (s->prepare)
1518                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1519
1520         return 0;
1521 }
1522
1523 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1524         assert_return(s, -EINVAL);
1525         assert_return(usec, -EINVAL);
1526         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1527         assert_return(!event_pid_changed(s->event), -ECHILD);
1528
1529         *usec = s->time.next;
1530         return 0;
1531 }
1532
1533 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1534         struct clock_data *d;
1535
1536         assert_return(s, -EINVAL);
1537         assert_return(usec != (uint64_t) -1, -EINVAL);
1538         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1539         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1540         assert_return(!event_pid_changed(s->event), -ECHILD);
1541
1542         s->time.next = usec;
1543
1544         source_set_pending(s, false);
1545
1546         d = event_get_clock_data(s->event, s->type);
1547         assert(d);
1548
1549         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1550         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1551
1552         return 0;
1553 }
1554
1555 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1556         assert_return(s, -EINVAL);
1557         assert_return(usec, -EINVAL);
1558         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1559         assert_return(!event_pid_changed(s->event), -ECHILD);
1560
1561         *usec = s->time.accuracy;
1562         return 0;
1563 }
1564
1565 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1566         struct clock_data *d;
1567
1568         assert_return(s, -EINVAL);
1569         assert_return(usec != (uint64_t) -1, -EINVAL);
1570         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1571         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1572         assert_return(!event_pid_changed(s->event), -ECHILD);
1573
1574         if (usec == 0)
1575                 usec = DEFAULT_ACCURACY_USEC;
1576
1577         s->time.accuracy = usec;
1578
1579         source_set_pending(s, false);
1580
1581         d = event_get_clock_data(s->event, s->type);
1582         assert(d);
1583
1584         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1585
1586         return 0;
1587 }
1588
1589 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1590         assert_return(s, -EINVAL);
1591         assert_return(clock, -EINVAL);
1592         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1593         assert_return(!event_pid_changed(s->event), -ECHILD);
1594
1595         *clock = event_source_type_to_clock(s->type);
1596         return 0;
1597 }
1598
1599 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1600         assert_return(s, -EINVAL);
1601         assert_return(pid, -EINVAL);
1602         assert_return(s->type == SOURCE_CHILD, -EDOM);
1603         assert_return(!event_pid_changed(s->event), -ECHILD);
1604
1605         *pid = s->child.pid;
1606         return 0;
1607 }
1608
1609 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1610         int r;
1611
1612         assert_return(s, -EINVAL);
1613         assert_return(s->type != SOURCE_EXIT, -EDOM);
1614         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1615         assert_return(!event_pid_changed(s->event), -ECHILD);
1616
1617         if (s->prepare == callback)
1618                 return 0;
1619
1620         if (callback && s->prepare) {
1621                 s->prepare = callback;
1622                 return 0;
1623         }
1624
1625         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1626         if (r < 0)
1627                 return r;
1628
1629         s->prepare = callback;
1630
1631         if (callback) {
1632                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1633                 if (r < 0)
1634                         return r;
1635         } else
1636                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1637
1638         return 0;
1639 }
1640
1641 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1642         assert_return(s, NULL);
1643
1644         return s->userdata;
1645 }
1646
1647 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1648         void *ret;
1649
1650         assert_return(s, NULL);
1651
1652         ret = s->userdata;
1653         s->userdata = userdata;
1654
1655         return ret;
1656 }
1657
1658 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1659         usec_t c;
1660         assert(e);
1661         assert(a <= b);
1662
1663         if (a <= 0)
1664                 return 0;
1665
1666         if (b <= a + 1)
1667                 return a;
1668
1669         initialize_perturb(e);
1670
1671         /*
1672           Find a good time to wake up again between times a and b. We
1673           have two goals here:
1674
1675           a) We want to wake up as seldom as possible, hence prefer
1676              later times over earlier times.
1677
1678           b) But if we have to wake up, then let's make sure to
1679              dispatch as much as possible on the entire system.
1680
1681           We implement this by waking up everywhere at the same time
1682           within any given minute if we can, synchronised via the
1683           perturbation value determined from the boot ID. If we can't,
1684           then we try to find the same spot in every 10s, then 1s and
1685           then 250ms step. Otherwise, we pick the last possible time
1686           to wake up.
1687         */
1688
1689         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1690         if (c >= b) {
1691                 if (_unlikely_(c < USEC_PER_MINUTE))
1692                         return b;
1693
1694                 c -= USEC_PER_MINUTE;
1695         }
1696
1697         if (c >= a)
1698                 return c;
1699
1700         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1701         if (c >= b) {
1702                 if (_unlikely_(c < USEC_PER_SEC*10))
1703                         return b;
1704
1705                 c -= USEC_PER_SEC*10;
1706         }
1707
1708         if (c >= a)
1709                 return c;
1710
1711         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1712         if (c >= b) {
1713                 if (_unlikely_(c < USEC_PER_SEC))
1714                         return b;
1715
1716                 c -= USEC_PER_SEC;
1717         }
1718
1719         if (c >= a)
1720                 return c;
1721
1722         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1723         if (c >= b) {
1724                 if (_unlikely_(c < USEC_PER_MSEC*250))
1725                         return b;
1726
1727                 c -= USEC_PER_MSEC*250;
1728         }
1729
1730         if (c >= a)
1731                 return c;
1732
1733         return b;
1734 }
1735
1736 static int event_arm_timer(
1737                 sd_event *e,
1738                 struct clock_data *d) {
1739
1740         struct itimerspec its = {};
1741         sd_event_source *a, *b;
1742         usec_t t;
1743         int r;
1744
1745         assert(e);
1746         assert(d);
1747
1748         a = prioq_peek(d->earliest);
1749         if (!a || a->enabled == SD_EVENT_OFF) {
1750
1751                 if (d->fd < 0)
1752                         return 0;
1753
1754                 if (d->next == USEC_INFINITY)
1755                         return 0;
1756
1757                 /* disarm */
1758                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1759                 if (r < 0)
1760                         return r;
1761
1762                 d->next = USEC_INFINITY;
1763                 return 0;
1764         }
1765
1766         b = prioq_peek(d->latest);
1767         assert_se(b && b->enabled != SD_EVENT_OFF);
1768
1769         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1770         if (d->next == t)
1771                 return 0;
1772
1773         assert_se(d->fd >= 0);
1774
1775         if (t == 0) {
1776                 /* We don' want to disarm here, just mean some time looooong ago. */
1777                 its.it_value.tv_sec = 0;
1778                 its.it_value.tv_nsec = 1;
1779         } else
1780                 timespec_store(&its.it_value, t);
1781
1782         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1783         if (r < 0)
1784                 return -errno;
1785
1786         d->next = t;
1787         return 0;
1788 }
1789
1790 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1791         assert(e);
1792         assert(s);
1793         assert(s->type == SOURCE_IO);
1794
1795         /* If the event source was already pending, we just OR in the
1796          * new revents, otherwise we reset the value. The ORing is
1797          * necessary to handle EPOLLONESHOT events properly where
1798          * readability might happen independently of writability, and
1799          * we need to keep track of both */
1800
1801         if (s->pending)
1802                 s->io.revents |= revents;
1803         else
1804                 s->io.revents = revents;
1805
1806         return source_set_pending(s, true);
1807 }
1808
1809 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1810         uint64_t x;
1811         ssize_t ss;
1812
1813         assert(e);
1814         assert(fd >= 0);
1815
1816         assert_return(events == EPOLLIN, -EIO);
1817
1818         ss = read(fd, &x, sizeof(x));
1819         if (ss < 0) {
1820                 if (errno == EAGAIN || errno == EINTR)
1821                         return 0;
1822
1823                 return -errno;
1824         }
1825
1826         if (_unlikely_(ss != sizeof(x)))
1827                 return -EIO;
1828
1829         if (next)
1830                 *next = USEC_INFINITY;
1831
1832         return 0;
1833 }
1834
1835 static int process_timer(
1836                 sd_event *e,
1837                 usec_t n,
1838                 struct clock_data *d) {
1839
1840         sd_event_source *s;
1841         int r;
1842
1843         assert(e);
1844         assert(d);
1845
1846         for (;;) {
1847                 s = prioq_peek(d->earliest);
1848                 if (!s ||
1849                     s->time.next > n ||
1850                     s->enabled == SD_EVENT_OFF ||
1851                     s->pending)
1852                         break;
1853
1854                 r = source_set_pending(s, true);
1855                 if (r < 0)
1856                         return r;
1857
1858                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1859                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1860         }
1861
1862         return 0;
1863 }
1864
1865 static int process_child(sd_event *e) {
1866         sd_event_source *s;
1867         Iterator i;
1868         int r;
1869
1870         assert(e);
1871
1872         e->need_process_child = false;
1873
1874         /*
1875            So, this is ugly. We iteratively invoke waitid() with P_PID
1876            + WNOHANG for each PID we wait for, instead of using
1877            P_ALL. This is because we only want to get child
1878            information of very specific child processes, and not all
1879            of them. We might not have processed the SIGCHLD even of a
1880            previous invocation and we don't want to maintain a
1881            unbounded *per-child* event queue, hence we really don't
1882            want anything flushed out of the kernel's queue that we
1883            don't care about. Since this is O(n) this means that if you
1884            have a lot of processes you probably want to handle SIGCHLD
1885            yourself.
1886
1887            We do not reap the children here (by using WNOWAIT), this
1888            is only done after the event source is dispatched so that
1889            the callback still sees the process as a zombie.
1890         */
1891
1892         HASHMAP_FOREACH(s, e->child_sources, i) {
1893                 assert(s->type == SOURCE_CHILD);
1894
1895                 if (s->pending)
1896                         continue;
1897
1898                 if (s->enabled == SD_EVENT_OFF)
1899                         continue;
1900
1901                 zero(s->child.siginfo);
1902                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1903                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1904                 if (r < 0)
1905                         return -errno;
1906
1907                 if (s->child.siginfo.si_pid != 0) {
1908                         bool zombie =
1909                                 s->child.siginfo.si_code == CLD_EXITED ||
1910                                 s->child.siginfo.si_code == CLD_KILLED ||
1911                                 s->child.siginfo.si_code == CLD_DUMPED;
1912
1913                         if (!zombie && (s->child.options & WEXITED)) {
1914                                 /* If the child isn't dead then let's
1915                                  * immediately remove the state change
1916                                  * from the queue, since there's no
1917                                  * benefit in leaving it queued */
1918
1919                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1920                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1921                         }
1922
1923                         r = source_set_pending(s, true);
1924                         if (r < 0)
1925                                 return r;
1926                 }
1927         }
1928
1929         return 0;
1930 }
1931
1932 static int process_signal(sd_event *e, uint32_t events) {
1933         bool read_one = false;
1934         int r;
1935
1936         assert(e);
1937
1938         assert_return(events == EPOLLIN, -EIO);
1939
1940         for (;;) {
1941                 struct signalfd_siginfo si;
1942                 ssize_t ss;
1943                 sd_event_source *s = NULL;
1944
1945                 ss = read(e->signal_fd, &si, sizeof(si));
1946                 if (ss < 0) {
1947                         if (errno == EAGAIN || errno == EINTR)
1948                                 return read_one;
1949
1950                         return -errno;
1951                 }
1952
1953                 if (_unlikely_(ss != sizeof(si)))
1954                         return -EIO;
1955
1956                 read_one = true;
1957
1958                 if (si.ssi_signo == SIGCHLD) {
1959                         r = process_child(e);
1960                         if (r < 0)
1961                                 return r;
1962                         if (r > 0)
1963                                 continue;
1964                 }
1965
1966                 if (e->signal_sources)
1967                         s = e->signal_sources[si.ssi_signo];
1968
1969                 if (!s)
1970                         continue;
1971
1972                 s->signal.siginfo = si;
1973                 r = source_set_pending(s, true);
1974                 if (r < 0)
1975                         return r;
1976         }
1977 }
1978
1979 static int source_dispatch(sd_event_source *s) {
1980         int r = 0;
1981
1982         assert(s);
1983         assert(s->pending || s->type == SOURCE_EXIT);
1984
1985         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1986                 r = source_set_pending(s, false);
1987                 if (r < 0)
1988                         return r;
1989         }
1990
1991         if (s->type != SOURCE_POST) {
1992                 sd_event_source *z;
1993                 Iterator i;
1994
1995                 /* If we execute a non-post source, let's mark all
1996                  * post sources as pending */
1997
1998                 SET_FOREACH(z, s->event->post_sources, i) {
1999                         if (z->enabled == SD_EVENT_OFF)
2000                                 continue;
2001
2002                         r = source_set_pending(z, true);
2003                         if (r < 0)
2004                                 return r;
2005                 }
2006         }
2007
2008         if (s->enabled == SD_EVENT_ONESHOT) {
2009                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2010                 if (r < 0)
2011                         return r;
2012         }
2013
2014         s->dispatching = true;
2015
2016         switch (s->type) {
2017
2018         case SOURCE_IO:
2019                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2020                 break;
2021
2022         case SOURCE_TIME_REALTIME:
2023         case SOURCE_TIME_BOOTTIME:
2024         case SOURCE_TIME_MONOTONIC:
2025         case SOURCE_TIME_REALTIME_ALARM:
2026         case SOURCE_TIME_BOOTTIME_ALARM:
2027                 r = s->time.callback(s, s->time.next, s->userdata);
2028                 break;
2029
2030         case SOURCE_SIGNAL:
2031                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2032                 break;
2033
2034         case SOURCE_CHILD: {
2035                 bool zombie;
2036
2037                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2038                          s->child.siginfo.si_code == CLD_KILLED ||
2039                          s->child.siginfo.si_code == CLD_DUMPED;
2040
2041                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2042
2043                 /* Now, reap the PID for good. */
2044                 if (zombie)
2045                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2046
2047                 break;
2048         }
2049
2050         case SOURCE_DEFER:
2051                 r = s->defer.callback(s, s->userdata);
2052                 break;
2053
2054         case SOURCE_POST:
2055                 r = s->post.callback(s, s->userdata);
2056                 break;
2057
2058         case SOURCE_EXIT:
2059                 r = s->exit.callback(s, s->userdata);
2060                 break;
2061
2062         case SOURCE_WATCHDOG:
2063         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2064         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2065                 assert_not_reached("Wut? I shouldn't exist.");
2066         }
2067
2068         s->dispatching = false;
2069
2070         if (r < 0)
2071                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2072
2073         if (s->n_ref == 0)
2074                 source_free(s);
2075         else if (r < 0)
2076                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2077
2078         return 1;
2079 }
2080
2081 static int event_prepare(sd_event *e) {
2082         int r;
2083
2084         assert(e);
2085
2086         for (;;) {
2087                 sd_event_source *s;
2088
2089                 s = prioq_peek(e->prepare);
2090                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2091                         break;
2092
2093                 s->prepare_iteration = e->iteration;
2094                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2095                 if (r < 0)
2096                         return r;
2097
2098                 assert(s->prepare);
2099
2100                 s->dispatching = true;
2101                 r = s->prepare(s, s->userdata);
2102                 s->dispatching = false;
2103
2104                 if (r < 0)
2105                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2106
2107                 if (s->n_ref == 0)
2108                         source_free(s);
2109                 else if (r < 0)
2110                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2111         }
2112
2113         return 0;
2114 }
2115
2116 static int dispatch_exit(sd_event *e) {
2117         sd_event_source *p;
2118         int r;
2119
2120         assert(e);
2121
2122         p = prioq_peek(e->exit);
2123         if (!p || p->enabled == SD_EVENT_OFF) {
2124                 e->state = SD_EVENT_FINISHED;
2125                 return 0;
2126         }
2127
2128         sd_event_ref(e);
2129         e->iteration++;
2130         e->state = SD_EVENT_EXITING;
2131
2132         r = source_dispatch(p);
2133
2134         e->state = SD_EVENT_PASSIVE;
2135         sd_event_unref(e);
2136
2137         return r;
2138 }
2139
2140 static sd_event_source* event_next_pending(sd_event *e) {
2141         sd_event_source *p;
2142
2143         assert(e);
2144
2145         p = prioq_peek(e->pending);
2146         if (!p)
2147                 return NULL;
2148
2149         if (p->enabled == SD_EVENT_OFF)
2150                 return NULL;
2151
2152         return p;
2153 }
2154
2155 static int arm_watchdog(sd_event *e) {
2156         struct itimerspec its = {};
2157         usec_t t;
2158         int r;
2159
2160         assert(e);
2161         assert(e->watchdog_fd >= 0);
2162
2163         t = sleep_between(e,
2164                           e->watchdog_last + (e->watchdog_period / 2),
2165                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2166
2167         timespec_store(&its.it_value, t);
2168
2169         /* Make sure we never set the watchdog to 0, which tells the
2170          * kernel to disable it. */
2171         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2172                 its.it_value.tv_nsec = 1;
2173
2174         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2175         if (r < 0)
2176                 return -errno;
2177
2178         return 0;
2179 }
2180
2181 static int process_watchdog(sd_event *e) {
2182         assert(e);
2183
2184         if (!e->watchdog)
2185                 return 0;
2186
2187         /* Don't notify watchdog too often */
2188         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2189                 return 0;
2190
2191         sd_notify(false, "WATCHDOG=1");
2192         e->watchdog_last = e->timestamp.monotonic;
2193
2194         return arm_watchdog(e);
2195 }
2196
2197 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2198         struct epoll_event *ev_queue;
2199         unsigned ev_queue_max;
2200         sd_event_source *p;
2201         int r, i, m;
2202         bool timedout;
2203
2204         assert_return(e, -EINVAL);
2205         assert_return(!event_pid_changed(e), -ECHILD);
2206         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2207         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2208
2209         if (e->exit_requested)
2210                 return dispatch_exit(e);
2211
2212         sd_event_ref(e);
2213         e->iteration++;
2214         e->state = SD_EVENT_RUNNING;
2215
2216         r = event_prepare(e);
2217         if (r < 0)
2218                 goto finish;
2219
2220         r = event_arm_timer(e, &e->realtime);
2221         if (r < 0)
2222                 goto finish;
2223
2224         r = event_arm_timer(e, &e->boottime);
2225         if (r < 0)
2226                 goto finish;
2227
2228         r = event_arm_timer(e, &e->monotonic);
2229         if (r < 0)
2230                 goto finish;
2231
2232         r = event_arm_timer(e, &e->realtime_alarm);
2233         if (r < 0)
2234                 goto finish;
2235
2236         r = event_arm_timer(e, &e->boottime_alarm);
2237         if (r < 0)
2238                 goto finish;
2239
2240         if (event_next_pending(e) || e->need_process_child)
2241                 timeout = 0;
2242
2243         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2244         ev_queue = newa(struct epoll_event, ev_queue_max);
2245
2246         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2247                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2248         if (m < 0) {
2249                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2250                 goto finish;
2251         }
2252
2253         timedout = m == 0;
2254
2255         dual_timestamp_get(&e->timestamp);
2256         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2257
2258         for (i = 0; i < m; i++) {
2259
2260                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2261                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2262                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2263                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2264                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2265                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2266                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2267                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2268                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2269                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2270                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2271                         r = process_signal(e, ev_queue[i].events);
2272                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2273                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2274                 else
2275                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2276
2277                 if (r < 0)
2278                         goto finish;
2279         }
2280
2281         r = process_watchdog(e);
2282         if (r < 0)
2283                 goto finish;
2284
2285         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2286         if (r < 0)
2287                 goto finish;
2288
2289         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2290         if (r < 0)
2291                 goto finish;
2292
2293         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2294         if (r < 0)
2295                 goto finish;
2296
2297         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2298         if (r < 0)
2299                 goto finish;
2300
2301         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2302         if (r < 0)
2303                 goto finish;
2304
2305         if (e->need_process_child) {
2306                 r = process_child(e);
2307                 if (r < 0)
2308                         goto finish;
2309         }
2310
2311         p = event_next_pending(e);
2312         if (!p) {
2313                 r = !timedout;
2314                 goto finish;
2315         }
2316
2317         r = source_dispatch(p);
2318
2319 finish:
2320         e->state = SD_EVENT_PASSIVE;
2321         sd_event_unref(e);
2322
2323         return r;
2324 }
2325
2326 _public_ int sd_event_loop(sd_event *e) {
2327         int r;
2328
2329         assert_return(e, -EINVAL);
2330         assert_return(!event_pid_changed(e), -ECHILD);
2331         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2332
2333         sd_event_ref(e);
2334
2335         while (e->state != SD_EVENT_FINISHED) {
2336                 r = sd_event_run(e, (uint64_t) -1);
2337                 if (r < 0)
2338                         goto finish;
2339         }
2340
2341         r = e->exit_code;
2342
2343 finish:
2344         sd_event_unref(e);
2345         return r;
2346 }
2347
2348 _public_ int sd_event_get_state(sd_event *e) {
2349         assert_return(e, -EINVAL);
2350         assert_return(!event_pid_changed(e), -ECHILD);
2351
2352         return e->state;
2353 }
2354
2355 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2356         assert_return(e, -EINVAL);
2357         assert_return(code, -EINVAL);
2358         assert_return(!event_pid_changed(e), -ECHILD);
2359
2360         if (!e->exit_requested)
2361                 return -ENODATA;
2362
2363         *code = e->exit_code;
2364         return 0;
2365 }
2366
2367 _public_ int sd_event_exit(sd_event *e, int code) {
2368         assert_return(e, -EINVAL);
2369         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2370         assert_return(!event_pid_changed(e), -ECHILD);
2371
2372         e->exit_requested = true;
2373         e->exit_code = code;
2374
2375         return 0;
2376 }
2377
2378 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2379         assert_return(e, -EINVAL);
2380         assert_return(usec, -EINVAL);
2381         assert_return(!event_pid_changed(e), -ECHILD);
2382
2383         /* If we haven't run yet, just get the actual time */
2384         if (!dual_timestamp_is_set(&e->timestamp))
2385                 return -ENODATA;
2386
2387         switch (clock) {
2388
2389         case CLOCK_REALTIME:
2390         case CLOCK_REALTIME_ALARM:
2391                 *usec = e->timestamp.realtime;
2392                 break;
2393
2394         case CLOCK_MONOTONIC:
2395                 *usec = e->timestamp.monotonic;
2396                 break;
2397
2398         case CLOCK_BOOTTIME:
2399         case CLOCK_BOOTTIME_ALARM:
2400                 *usec = e->timestamp_boottime;
2401                 break;
2402         }
2403
2404         return 0;
2405 }
2406
2407 _public_ int sd_event_default(sd_event **ret) {
2408
2409         static thread_local sd_event *default_event = NULL;
2410         sd_event *e = NULL;
2411         int r;
2412
2413         if (!ret)
2414                 return !!default_event;
2415
2416         if (default_event) {
2417                 *ret = sd_event_ref(default_event);
2418                 return 0;
2419         }
2420
2421         r = sd_event_new(&e);
2422         if (r < 0)
2423                 return r;
2424
2425         e->default_event_ptr = &default_event;
2426         e->tid = gettid();
2427         default_event = e;
2428
2429         *ret = e;
2430         return 1;
2431 }
2432
2433 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2434         assert_return(e, -EINVAL);
2435         assert_return(tid, -EINVAL);
2436         assert_return(!event_pid_changed(e), -ECHILD);
2437
2438         if (e->tid != 0) {
2439                 *tid = e->tid;
2440                 return 0;
2441         }
2442
2443         return -ENXIO;
2444 }
2445
2446 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2447         int r;
2448
2449         assert_return(e, -EINVAL);
2450         assert_return(!event_pid_changed(e), -ECHILD);
2451
2452         if (e->watchdog == !!b)
2453                 return e->watchdog;
2454
2455         if (b) {
2456                 struct epoll_event ev = {};
2457
2458                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2459                 if (r <= 0)
2460                         return r;
2461
2462                 /* Issue first ping immediately */
2463                 sd_notify(false, "WATCHDOG=1");
2464                 e->watchdog_last = now(CLOCK_MONOTONIC);
2465
2466                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2467                 if (e->watchdog_fd < 0)
2468                         return -errno;
2469
2470                 r = arm_watchdog(e);
2471                 if (r < 0)
2472                         goto fail;
2473
2474                 ev.events = EPOLLIN;
2475                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2476
2477                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2478                 if (r < 0) {
2479                         r = -errno;
2480                         goto fail;
2481                 }
2482
2483         } else {
2484                 if (e->watchdog_fd >= 0) {
2485                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2486                         e->watchdog_fd = safe_close(e->watchdog_fd);
2487                 }
2488         }
2489
2490         e->watchdog = !!b;
2491         return e->watchdog;
2492
2493 fail:
2494         e->watchdog_fd = safe_close(e->watchdog_fd);
2495         return r;
2496 }
2497
2498 _public_ int sd_event_get_watchdog(sd_event *e) {
2499         assert_return(e, -EINVAL);
2500         assert_return(!event_pid_changed(e), -ECHILD);
2501
2502         return e->watchdog;
2503 }