chiark / gitweb /
hashmap: introduce hash_ops to make struct Hashmap smaller
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *name;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static void source_disconnect(sd_event_source *s) {
594         sd_event *event;
595
596         assert(s);
597
598         if (!s->event)
599                 return;
600
601         assert(s->event->n_sources > 0);
602
603         switch (s->type) {
604
605         case SOURCE_IO:
606                 if (s->io.fd >= 0)
607                         source_io_unregister(s);
608
609                 break;
610
611         case SOURCE_TIME_REALTIME:
612         case SOURCE_TIME_BOOTTIME:
613         case SOURCE_TIME_MONOTONIC:
614         case SOURCE_TIME_REALTIME_ALARM:
615         case SOURCE_TIME_BOOTTIME_ALARM: {
616                 struct clock_data *d;
617
618                 d = event_get_clock_data(s->event, s->type);
619                 assert(d);
620
621                 prioq_remove(d->earliest, s, &s->time.earliest_index);
622                 prioq_remove(d->latest, s, &s->time.latest_index);
623                 d->needs_rearm = true;
624                 break;
625         }
626
627         case SOURCE_SIGNAL:
628                 if (s->signal.sig > 0) {
629                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
630                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
631
632                         if (s->event->signal_sources)
633                                 s->event->signal_sources[s->signal.sig] = NULL;
634                 }
635
636                 break;
637
638         case SOURCE_CHILD:
639                 if (s->child.pid > 0) {
640                         if (s->enabled != SD_EVENT_OFF) {
641                                 assert(s->event->n_enabled_child_sources > 0);
642                                 s->event->n_enabled_child_sources--;
643                         }
644
645                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
646                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
647
648                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
649                 }
650
651                 break;
652
653         case SOURCE_DEFER:
654                 /* nothing */
655                 break;
656
657         case SOURCE_POST:
658                 set_remove(s->event->post_sources, s);
659                 break;
660
661         case SOURCE_EXIT:
662                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
663                 break;
664
665         default:
666                 assert_not_reached("Wut? I shouldn't exist.");
667         }
668
669         if (s->pending)
670                 prioq_remove(s->event->pending, s, &s->pending_index);
671
672         if (s->prepare)
673                 prioq_remove(s->event->prepare, s, &s->prepare_index);
674
675         event = s->event;
676
677         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
678         s->event = NULL;
679         LIST_REMOVE(sources, event->sources, s);
680         event->n_sources--;
681
682         if (!s->floating)
683                 sd_event_unref(event);
684 }
685
686 static void source_free(sd_event_source *s) {
687         assert(s);
688
689         source_disconnect(s);
690         free(s->name);
691         free(s);
692 }
693
694 static int source_set_pending(sd_event_source *s, bool b) {
695         int r;
696
697         assert(s);
698         assert(s->type != SOURCE_EXIT);
699
700         if (s->pending == b)
701                 return 0;
702
703         s->pending = b;
704
705         if (b) {
706                 s->pending_iteration = s->event->iteration;
707
708                 r = prioq_put(s->event->pending, s, &s->pending_index);
709                 if (r < 0) {
710                         s->pending = false;
711                         return r;
712                 }
713         } else
714                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
715
716         if (EVENT_SOURCE_IS_TIME(s->type)) {
717                 struct clock_data *d;
718
719                 d = event_get_clock_data(s->event, s->type);
720                 assert(d);
721
722                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
723                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
724                 d->needs_rearm = true;
725         }
726
727         return 0;
728 }
729
730 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
731         sd_event_source *s;
732
733         assert(e);
734
735         s = new0(sd_event_source, 1);
736         if (!s)
737                 return NULL;
738
739         s->n_ref = 1;
740         s->event = e;
741         s->floating = floating;
742         s->type = type;
743         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
744
745         if (!floating)
746                 sd_event_ref(e);
747
748         LIST_PREPEND(sources, e->sources, s);
749         e->n_sources ++;
750
751         return s;
752 }
753
754 _public_ int sd_event_add_io(
755                 sd_event *e,
756                 sd_event_source **ret,
757                 int fd,
758                 uint32_t events,
759                 sd_event_io_handler_t callback,
760                 void *userdata) {
761
762         sd_event_source *s;
763         int r;
764
765         assert_return(e, -EINVAL);
766         assert_return(fd >= 0, -EINVAL);
767         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
768         assert_return(callback, -EINVAL);
769         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
770         assert_return(!event_pid_changed(e), -ECHILD);
771
772         s = source_new(e, !ret, SOURCE_IO);
773         if (!s)
774                 return -ENOMEM;
775
776         s->io.fd = fd;
777         s->io.events = events;
778         s->io.callback = callback;
779         s->userdata = userdata;
780         s->enabled = SD_EVENT_ON;
781
782         r = source_io_register(s, s->enabled, events);
783         if (r < 0) {
784                 source_free(s);
785                 return r;
786         }
787
788         if (ret)
789                 *ret = s;
790
791         return 0;
792 }
793
794 static void initialize_perturb(sd_event *e) {
795         sd_id128_t bootid = {};
796
797         /* When we sleep for longer, we try to realign the wakeup to
798            the same time wihtin each minute/second/250ms, so that
799            events all across the system can be coalesced into a single
800            CPU wakeup. However, let's take some system-specific
801            randomness for this value, so that in a network of systems
802            with synced clocks timer events are distributed a
803            bit. Here, we calculate a perturbation usec offset from the
804            boot ID. */
805
806         if (_likely_(e->perturb != USEC_INFINITY))
807                 return;
808
809         if (sd_id128_get_boot(&bootid) >= 0)
810                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
811 }
812
813 static int event_setup_timer_fd(
814                 sd_event *e,
815                 struct clock_data *d,
816                 clockid_t clock) {
817
818         struct epoll_event ev = {};
819         int r, fd;
820
821         assert(e);
822         assert(d);
823
824         if (_likely_(d->fd >= 0))
825                 return 0;
826
827         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
828         if (fd < 0)
829                 return -errno;
830
831         ev.events = EPOLLIN;
832         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
833
834         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
835         if (r < 0) {
836                 safe_close(fd);
837                 return -errno;
838         }
839
840         d->fd = fd;
841         return 0;
842 }
843
844 _public_ int sd_event_add_time(
845                 sd_event *e,
846                 sd_event_source **ret,
847                 clockid_t clock,
848                 uint64_t usec,
849                 uint64_t accuracy,
850                 sd_event_time_handler_t callback,
851                 void *userdata) {
852
853         EventSourceType type;
854         sd_event_source *s;
855         struct clock_data *d;
856         int r;
857
858         assert_return(e, -EINVAL);
859         assert_return(usec != (uint64_t) -1, -EINVAL);
860         assert_return(accuracy != (uint64_t) -1, -EINVAL);
861         assert_return(callback, -EINVAL);
862         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
863         assert_return(!event_pid_changed(e), -ECHILD);
864
865         type = clock_to_event_source_type(clock);
866         assert_return(type >= 0, -ENOTSUP);
867
868         d = event_get_clock_data(e, type);
869         assert(d);
870
871         if (!d->earliest) {
872                 d->earliest = prioq_new(earliest_time_prioq_compare);
873                 if (!d->earliest)
874                         return -ENOMEM;
875         }
876
877         if (!d->latest) {
878                 d->latest = prioq_new(latest_time_prioq_compare);
879                 if (!d->latest)
880                         return -ENOMEM;
881         }
882
883         if (d->fd < 0) {
884                 r = event_setup_timer_fd(e, d, clock);
885                 if (r < 0)
886                         return r;
887         }
888
889         s = source_new(e, !ret, type);
890         if (!s)
891                 return -ENOMEM;
892
893         s->time.next = usec;
894         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
895         s->time.callback = callback;
896         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
897         s->userdata = userdata;
898         s->enabled = SD_EVENT_ONESHOT;
899
900         d->needs_rearm = true;
901
902         r = prioq_put(d->earliest, s, &s->time.earliest_index);
903         if (r < 0)
904                 goto fail;
905
906         r = prioq_put(d->latest, s, &s->time.latest_index);
907         if (r < 0)
908                 goto fail;
909
910         if (ret)
911                 *ret = s;
912
913         return 0;
914
915 fail:
916         source_free(s);
917         return r;
918 }
919
920 static int event_update_signal_fd(sd_event *e) {
921         struct epoll_event ev = {};
922         bool add_to_epoll;
923         int r;
924
925         assert(e);
926
927         add_to_epoll = e->signal_fd < 0;
928
929         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
930         if (r < 0)
931                 return -errno;
932
933         e->signal_fd = r;
934
935         if (!add_to_epoll)
936                 return 0;
937
938         ev.events = EPOLLIN;
939         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
940
941         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
942         if (r < 0) {
943                 e->signal_fd = safe_close(e->signal_fd);
944                 return -errno;
945         }
946
947         return 0;
948 }
949
950 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
951         assert(s);
952
953         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
954 }
955
956 _public_ int sd_event_add_signal(
957                 sd_event *e,
958                 sd_event_source **ret,
959                 int sig,
960                 sd_event_signal_handler_t callback,
961                 void *userdata) {
962
963         sd_event_source *s;
964         sigset_t ss;
965         int r;
966
967         assert_return(e, -EINVAL);
968         assert_return(sig > 0, -EINVAL);
969         assert_return(sig < _NSIG, -EINVAL);
970         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
971         assert_return(!event_pid_changed(e), -ECHILD);
972
973         if (!callback)
974                 callback = signal_exit_callback;
975
976         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
977         if (r < 0)
978                 return -errno;
979
980         if (!sigismember(&ss, sig))
981                 return -EBUSY;
982
983         if (!e->signal_sources) {
984                 e->signal_sources = new0(sd_event_source*, _NSIG);
985                 if (!e->signal_sources)
986                         return -ENOMEM;
987         } else if (e->signal_sources[sig])
988                 return -EBUSY;
989
990         s = source_new(e, !ret, SOURCE_SIGNAL);
991         if (!s)
992                 return -ENOMEM;
993
994         s->signal.sig = sig;
995         s->signal.callback = callback;
996         s->userdata = userdata;
997         s->enabled = SD_EVENT_ON;
998
999         e->signal_sources[sig] = s;
1000         assert_se(sigaddset(&e->sigset, sig) == 0);
1001
1002         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
1003                 r = event_update_signal_fd(e);
1004                 if (r < 0) {
1005                         source_free(s);
1006                         return r;
1007                 }
1008         }
1009
1010         if (ret)
1011                 *ret = s;
1012
1013         return 0;
1014 }
1015
1016 _public_ int sd_event_add_child(
1017                 sd_event *e,
1018                 sd_event_source **ret,
1019                 pid_t pid,
1020                 int options,
1021                 sd_event_child_handler_t callback,
1022                 void *userdata) {
1023
1024         sd_event_source *s;
1025         int r;
1026
1027         assert_return(e, -EINVAL);
1028         assert_return(pid > 1, -EINVAL);
1029         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1030         assert_return(options != 0, -EINVAL);
1031         assert_return(callback, -EINVAL);
1032         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1033         assert_return(!event_pid_changed(e), -ECHILD);
1034
1035         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1036         if (r < 0)
1037                 return r;
1038
1039         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1040                 return -EBUSY;
1041
1042         s = source_new(e, !ret, SOURCE_CHILD);
1043         if (!s)
1044                 return -ENOMEM;
1045
1046         s->child.pid = pid;
1047         s->child.options = options;
1048         s->child.callback = callback;
1049         s->userdata = userdata;
1050         s->enabled = SD_EVENT_ONESHOT;
1051
1052         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1053         if (r < 0) {
1054                 source_free(s);
1055                 return r;
1056         }
1057
1058         e->n_enabled_child_sources ++;
1059
1060         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1061
1062         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
1063                 r = event_update_signal_fd(e);
1064                 if (r < 0) {
1065                         source_free(s);
1066                         return r;
1067                 }
1068         }
1069
1070         e->need_process_child = true;
1071
1072         if (ret)
1073                 *ret = s;
1074
1075         return 0;
1076 }
1077
1078 _public_ int sd_event_add_defer(
1079                 sd_event *e,
1080                 sd_event_source **ret,
1081                 sd_event_handler_t callback,
1082                 void *userdata) {
1083
1084         sd_event_source *s;
1085         int r;
1086
1087         assert_return(e, -EINVAL);
1088         assert_return(callback, -EINVAL);
1089         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1090         assert_return(!event_pid_changed(e), -ECHILD);
1091
1092         s = source_new(e, !ret, SOURCE_DEFER);
1093         if (!s)
1094                 return -ENOMEM;
1095
1096         s->defer.callback = callback;
1097         s->userdata = userdata;
1098         s->enabled = SD_EVENT_ONESHOT;
1099
1100         r = source_set_pending(s, true);
1101         if (r < 0) {
1102                 source_free(s);
1103                 return r;
1104         }
1105
1106         if (ret)
1107                 *ret = s;
1108
1109         return 0;
1110 }
1111
1112 _public_ int sd_event_add_post(
1113                 sd_event *e,
1114                 sd_event_source **ret,
1115                 sd_event_handler_t callback,
1116                 void *userdata) {
1117
1118         sd_event_source *s;
1119         int r;
1120
1121         assert_return(e, -EINVAL);
1122         assert_return(callback, -EINVAL);
1123         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1124         assert_return(!event_pid_changed(e), -ECHILD);
1125
1126         r = set_ensure_allocated(&e->post_sources, NULL);
1127         if (r < 0)
1128                 return r;
1129
1130         s = source_new(e, !ret, SOURCE_POST);
1131         if (!s)
1132                 return -ENOMEM;
1133
1134         s->post.callback = callback;
1135         s->userdata = userdata;
1136         s->enabled = SD_EVENT_ON;
1137
1138         r = set_put(e->post_sources, s);
1139         if (r < 0) {
1140                 source_free(s);
1141                 return r;
1142         }
1143
1144         if (ret)
1145                 *ret = s;
1146
1147         return 0;
1148 }
1149
1150 _public_ int sd_event_add_exit(
1151                 sd_event *e,
1152                 sd_event_source **ret,
1153                 sd_event_handler_t callback,
1154                 void *userdata) {
1155
1156         sd_event_source *s;
1157         int r;
1158
1159         assert_return(e, -EINVAL);
1160         assert_return(callback, -EINVAL);
1161         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1162         assert_return(!event_pid_changed(e), -ECHILD);
1163
1164         if (!e->exit) {
1165                 e->exit = prioq_new(exit_prioq_compare);
1166                 if (!e->exit)
1167                         return -ENOMEM;
1168         }
1169
1170         s = source_new(e, !ret, SOURCE_EXIT);
1171         if (!s)
1172                 return -ENOMEM;
1173
1174         s->exit.callback = callback;
1175         s->userdata = userdata;
1176         s->exit.prioq_index = PRIOQ_IDX_NULL;
1177         s->enabled = SD_EVENT_ONESHOT;
1178
1179         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1180         if (r < 0) {
1181                 source_free(s);
1182                 return r;
1183         }
1184
1185         if (ret)
1186                 *ret = s;
1187
1188         return 0;
1189 }
1190
1191 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1192         assert_return(s, NULL);
1193
1194         assert(s->n_ref >= 1);
1195         s->n_ref++;
1196
1197         return s;
1198 }
1199
1200 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1201
1202         if (!s)
1203                 return NULL;
1204
1205         assert(s->n_ref >= 1);
1206         s->n_ref--;
1207
1208         if (s->n_ref <= 0) {
1209                 /* Here's a special hack: when we are called from a
1210                  * dispatch handler we won't free the event source
1211                  * immediately, but we will detach the fd from the
1212                  * epoll. This way it is safe for the caller to unref
1213                  * the event source and immediately close the fd, but
1214                  * we still retain a valid event source object after
1215                  * the callback. */
1216
1217                 if (s->dispatching) {
1218                         if (s->type == SOURCE_IO)
1219                                 source_io_unregister(s);
1220
1221                         source_disconnect(s);
1222                 } else
1223                         source_free(s);
1224         }
1225
1226         return NULL;
1227 }
1228
1229 _public_ int sd_event_source_set_name(sd_event_source *s, const char *name) {
1230         assert_return(s, -EINVAL);
1231
1232         return free_and_strdup(&s->name, name);
1233 }
1234
1235 _public_ int sd_event_source_get_name(sd_event_source *s, const char **name) {
1236         assert_return(s, -EINVAL);
1237         assert_return(name, -EINVAL);
1238
1239         *name = s->name;
1240
1241         return 0;
1242 }
1243
1244 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1245         assert_return(s, NULL);
1246
1247         return s->event;
1248 }
1249
1250 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1251         assert_return(s, -EINVAL);
1252         assert_return(s->type != SOURCE_EXIT, -EDOM);
1253         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1254         assert_return(!event_pid_changed(s->event), -ECHILD);
1255
1256         return s->pending;
1257 }
1258
1259 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1260         assert_return(s, -EINVAL);
1261         assert_return(s->type == SOURCE_IO, -EDOM);
1262         assert_return(!event_pid_changed(s->event), -ECHILD);
1263
1264         return s->io.fd;
1265 }
1266
1267 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1268         int r;
1269
1270         assert_return(s, -EINVAL);
1271         assert_return(fd >= 0, -EINVAL);
1272         assert_return(s->type == SOURCE_IO, -EDOM);
1273         assert_return(!event_pid_changed(s->event), -ECHILD);
1274
1275         if (s->io.fd == fd)
1276                 return 0;
1277
1278         if (s->enabled == SD_EVENT_OFF) {
1279                 s->io.fd = fd;
1280                 s->io.registered = false;
1281         } else {
1282                 int saved_fd;
1283
1284                 saved_fd = s->io.fd;
1285                 assert(s->io.registered);
1286
1287                 s->io.fd = fd;
1288                 s->io.registered = false;
1289
1290                 r = source_io_register(s, s->enabled, s->io.events);
1291                 if (r < 0) {
1292                         s->io.fd = saved_fd;
1293                         s->io.registered = true;
1294                         return r;
1295                 }
1296
1297                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1298         }
1299
1300         return 0;
1301 }
1302
1303 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1304         assert_return(s, -EINVAL);
1305         assert_return(events, -EINVAL);
1306         assert_return(s->type == SOURCE_IO, -EDOM);
1307         assert_return(!event_pid_changed(s->event), -ECHILD);
1308
1309         *events = s->io.events;
1310         return 0;
1311 }
1312
1313 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1314         int r;
1315
1316         assert_return(s, -EINVAL);
1317         assert_return(s->type == SOURCE_IO, -EDOM);
1318         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1319         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1320         assert_return(!event_pid_changed(s->event), -ECHILD);
1321
1322         /* edge-triggered updates are never skipped, so we can reset edges */
1323         if (s->io.events == events && !(events & EPOLLET))
1324                 return 0;
1325
1326         if (s->enabled != SD_EVENT_OFF) {
1327                 r = source_io_register(s, s->enabled, events);
1328                 if (r < 0)
1329                         return r;
1330         }
1331
1332         s->io.events = events;
1333         source_set_pending(s, false);
1334
1335         return 0;
1336 }
1337
1338 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1339         assert_return(s, -EINVAL);
1340         assert_return(revents, -EINVAL);
1341         assert_return(s->type == SOURCE_IO, -EDOM);
1342         assert_return(s->pending, -ENODATA);
1343         assert_return(!event_pid_changed(s->event), -ECHILD);
1344
1345         *revents = s->io.revents;
1346         return 0;
1347 }
1348
1349 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1350         assert_return(s, -EINVAL);
1351         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1352         assert_return(!event_pid_changed(s->event), -ECHILD);
1353
1354         return s->signal.sig;
1355 }
1356
1357 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1358         assert_return(s, -EINVAL);
1359         assert_return(!event_pid_changed(s->event), -ECHILD);
1360
1361         return s->priority;
1362 }
1363
1364 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1365         assert_return(s, -EINVAL);
1366         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1367         assert_return(!event_pid_changed(s->event), -ECHILD);
1368
1369         if (s->priority == priority)
1370                 return 0;
1371
1372         s->priority = priority;
1373
1374         if (s->pending)
1375                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1376
1377         if (s->prepare)
1378                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1379
1380         if (s->type == SOURCE_EXIT)
1381                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1382
1383         return 0;
1384 }
1385
1386 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1387         assert_return(s, -EINVAL);
1388         assert_return(m, -EINVAL);
1389         assert_return(!event_pid_changed(s->event), -ECHILD);
1390
1391         *m = s->enabled;
1392         return 0;
1393 }
1394
1395 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1396         int r;
1397
1398         assert_return(s, -EINVAL);
1399         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1400         assert_return(!event_pid_changed(s->event), -ECHILD);
1401
1402         /* If we are dead anyway, we are fine with turning off
1403          * sources, but everything else needs to fail. */
1404         if (s->event->state == SD_EVENT_FINISHED)
1405                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1406
1407         if (s->enabled == m)
1408                 return 0;
1409
1410         if (m == SD_EVENT_OFF) {
1411
1412                 switch (s->type) {
1413
1414                 case SOURCE_IO:
1415                         r = source_io_unregister(s);
1416                         if (r < 0)
1417                                 return r;
1418
1419                         s->enabled = m;
1420                         break;
1421
1422                 case SOURCE_TIME_REALTIME:
1423                 case SOURCE_TIME_BOOTTIME:
1424                 case SOURCE_TIME_MONOTONIC:
1425                 case SOURCE_TIME_REALTIME_ALARM:
1426                 case SOURCE_TIME_BOOTTIME_ALARM: {
1427                         struct clock_data *d;
1428
1429                         s->enabled = m;
1430                         d = event_get_clock_data(s->event, s->type);
1431                         assert(d);
1432
1433                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1434                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1435                         d->needs_rearm = true;
1436                         break;
1437                 }
1438
1439                 case SOURCE_SIGNAL:
1440                         s->enabled = m;
1441                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1442                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1443                                 event_update_signal_fd(s->event);
1444                         }
1445
1446                         break;
1447
1448                 case SOURCE_CHILD:
1449                         s->enabled = m;
1450
1451                         assert(s->event->n_enabled_child_sources > 0);
1452                         s->event->n_enabled_child_sources--;
1453
1454                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1455                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1456                                 event_update_signal_fd(s->event);
1457                         }
1458
1459                         break;
1460
1461                 case SOURCE_EXIT:
1462                         s->enabled = m;
1463                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1464                         break;
1465
1466                 case SOURCE_DEFER:
1467                 case SOURCE_POST:
1468                         s->enabled = m;
1469                         break;
1470
1471                 default:
1472                         assert_not_reached("Wut? I shouldn't exist.");
1473                 }
1474
1475         } else {
1476                 switch (s->type) {
1477
1478                 case SOURCE_IO:
1479                         r = source_io_register(s, m, s->io.events);
1480                         if (r < 0)
1481                                 return r;
1482
1483                         s->enabled = m;
1484                         break;
1485
1486                 case SOURCE_TIME_REALTIME:
1487                 case SOURCE_TIME_BOOTTIME:
1488                 case SOURCE_TIME_MONOTONIC:
1489                 case SOURCE_TIME_REALTIME_ALARM:
1490                 case SOURCE_TIME_BOOTTIME_ALARM: {
1491                         struct clock_data *d;
1492
1493                         s->enabled = m;
1494                         d = event_get_clock_data(s->event, s->type);
1495                         assert(d);
1496
1497                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1498                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1499                         d->needs_rearm = true;
1500                         break;
1501                 }
1502
1503                 case SOURCE_SIGNAL:
1504                         s->enabled = m;
1505
1506                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1507                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1508                                 event_update_signal_fd(s->event);
1509                         }
1510                         break;
1511
1512                 case SOURCE_CHILD:
1513                         if (s->enabled == SD_EVENT_OFF) {
1514                                 s->event->n_enabled_child_sources++;
1515
1516                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1517                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1518                                         event_update_signal_fd(s->event);
1519                                 }
1520                         }
1521
1522                         s->enabled = m;
1523                         break;
1524
1525                 case SOURCE_EXIT:
1526                         s->enabled = m;
1527                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1528                         break;
1529
1530                 case SOURCE_DEFER:
1531                 case SOURCE_POST:
1532                         s->enabled = m;
1533                         break;
1534
1535                 default:
1536                         assert_not_reached("Wut? I shouldn't exist.");
1537                 }
1538         }
1539
1540         if (s->pending)
1541                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1542
1543         if (s->prepare)
1544                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1545
1546         return 0;
1547 }
1548
1549 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1550         assert_return(s, -EINVAL);
1551         assert_return(usec, -EINVAL);
1552         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1553         assert_return(!event_pid_changed(s->event), -ECHILD);
1554
1555         *usec = s->time.next;
1556         return 0;
1557 }
1558
1559 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1560         struct clock_data *d;
1561
1562         assert_return(s, -EINVAL);
1563         assert_return(usec != (uint64_t) -1, -EINVAL);
1564         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1565         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1566         assert_return(!event_pid_changed(s->event), -ECHILD);
1567
1568         s->time.next = usec;
1569
1570         source_set_pending(s, false);
1571
1572         d = event_get_clock_data(s->event, s->type);
1573         assert(d);
1574
1575         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1576         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1577         d->needs_rearm = true;
1578
1579         return 0;
1580 }
1581
1582 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1583         assert_return(s, -EINVAL);
1584         assert_return(usec, -EINVAL);
1585         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1586         assert_return(!event_pid_changed(s->event), -ECHILD);
1587
1588         *usec = s->time.accuracy;
1589         return 0;
1590 }
1591
1592 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1593         struct clock_data *d;
1594
1595         assert_return(s, -EINVAL);
1596         assert_return(usec != (uint64_t) -1, -EINVAL);
1597         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1598         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1599         assert_return(!event_pid_changed(s->event), -ECHILD);
1600
1601         if (usec == 0)
1602                 usec = DEFAULT_ACCURACY_USEC;
1603
1604         s->time.accuracy = usec;
1605
1606         source_set_pending(s, false);
1607
1608         d = event_get_clock_data(s->event, s->type);
1609         assert(d);
1610
1611         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1612         d->needs_rearm = true;
1613
1614         return 0;
1615 }
1616
1617 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1618         assert_return(s, -EINVAL);
1619         assert_return(clock, -EINVAL);
1620         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1621         assert_return(!event_pid_changed(s->event), -ECHILD);
1622
1623         *clock = event_source_type_to_clock(s->type);
1624         return 0;
1625 }
1626
1627 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1628         assert_return(s, -EINVAL);
1629         assert_return(pid, -EINVAL);
1630         assert_return(s->type == SOURCE_CHILD, -EDOM);
1631         assert_return(!event_pid_changed(s->event), -ECHILD);
1632
1633         *pid = s->child.pid;
1634         return 0;
1635 }
1636
1637 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1638         int r;
1639
1640         assert_return(s, -EINVAL);
1641         assert_return(s->type != SOURCE_EXIT, -EDOM);
1642         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1643         assert_return(!event_pid_changed(s->event), -ECHILD);
1644
1645         if (s->prepare == callback)
1646                 return 0;
1647
1648         if (callback && s->prepare) {
1649                 s->prepare = callback;
1650                 return 0;
1651         }
1652
1653         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1654         if (r < 0)
1655                 return r;
1656
1657         s->prepare = callback;
1658
1659         if (callback) {
1660                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1661                 if (r < 0)
1662                         return r;
1663         } else
1664                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1665
1666         return 0;
1667 }
1668
1669 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1670         assert_return(s, NULL);
1671
1672         return s->userdata;
1673 }
1674
1675 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1676         void *ret;
1677
1678         assert_return(s, NULL);
1679
1680         ret = s->userdata;
1681         s->userdata = userdata;
1682
1683         return ret;
1684 }
1685
1686 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1687         usec_t c;
1688         assert(e);
1689         assert(a <= b);
1690
1691         if (a <= 0)
1692                 return 0;
1693
1694         if (b <= a + 1)
1695                 return a;
1696
1697         initialize_perturb(e);
1698
1699         /*
1700           Find a good time to wake up again between times a and b. We
1701           have two goals here:
1702
1703           a) We want to wake up as seldom as possible, hence prefer
1704              later times over earlier times.
1705
1706           b) But if we have to wake up, then let's make sure to
1707              dispatch as much as possible on the entire system.
1708
1709           We implement this by waking up everywhere at the same time
1710           within any given minute if we can, synchronised via the
1711           perturbation value determined from the boot ID. If we can't,
1712           then we try to find the same spot in every 10s, then 1s and
1713           then 250ms step. Otherwise, we pick the last possible time
1714           to wake up.
1715         */
1716
1717         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1718         if (c >= b) {
1719                 if (_unlikely_(c < USEC_PER_MINUTE))
1720                         return b;
1721
1722                 c -= USEC_PER_MINUTE;
1723         }
1724
1725         if (c >= a)
1726                 return c;
1727
1728         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1729         if (c >= b) {
1730                 if (_unlikely_(c < USEC_PER_SEC*10))
1731                         return b;
1732
1733                 c -= USEC_PER_SEC*10;
1734         }
1735
1736         if (c >= a)
1737                 return c;
1738
1739         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1740         if (c >= b) {
1741                 if (_unlikely_(c < USEC_PER_SEC))
1742                         return b;
1743
1744                 c -= USEC_PER_SEC;
1745         }
1746
1747         if (c >= a)
1748                 return c;
1749
1750         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1751         if (c >= b) {
1752                 if (_unlikely_(c < USEC_PER_MSEC*250))
1753                         return b;
1754
1755                 c -= USEC_PER_MSEC*250;
1756         }
1757
1758         if (c >= a)
1759                 return c;
1760
1761         return b;
1762 }
1763
1764 static int event_arm_timer(
1765                 sd_event *e,
1766                 struct clock_data *d) {
1767
1768         struct itimerspec its = {};
1769         sd_event_source *a, *b;
1770         usec_t t;
1771         int r;
1772
1773         assert(e);
1774         assert(d);
1775
1776         if (!d->needs_rearm)
1777                 return 0;
1778         else
1779                 d->needs_rearm = false;
1780
1781         a = prioq_peek(d->earliest);
1782         if (!a || a->enabled == SD_EVENT_OFF) {
1783
1784                 if (d->fd < 0)
1785                         return 0;
1786
1787                 if (d->next == USEC_INFINITY)
1788                         return 0;
1789
1790                 /* disarm */
1791                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1792                 if (r < 0)
1793                         return r;
1794
1795                 d->next = USEC_INFINITY;
1796                 return 0;
1797         }
1798
1799         b = prioq_peek(d->latest);
1800         assert_se(b && b->enabled != SD_EVENT_OFF);
1801
1802         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1803         if (d->next == t)
1804                 return 0;
1805
1806         assert_se(d->fd >= 0);
1807
1808         if (t == 0) {
1809                 /* We don' want to disarm here, just mean some time looooong ago. */
1810                 its.it_value.tv_sec = 0;
1811                 its.it_value.tv_nsec = 1;
1812         } else
1813                 timespec_store(&its.it_value, t);
1814
1815         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1816         if (r < 0)
1817                 return -errno;
1818
1819         d->next = t;
1820         return 0;
1821 }
1822
1823 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1824         assert(e);
1825         assert(s);
1826         assert(s->type == SOURCE_IO);
1827
1828         /* If the event source was already pending, we just OR in the
1829          * new revents, otherwise we reset the value. The ORing is
1830          * necessary to handle EPOLLONESHOT events properly where
1831          * readability might happen independently of writability, and
1832          * we need to keep track of both */
1833
1834         if (s->pending)
1835                 s->io.revents |= revents;
1836         else
1837                 s->io.revents = revents;
1838
1839         return source_set_pending(s, true);
1840 }
1841
1842 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1843         uint64_t x;
1844         ssize_t ss;
1845
1846         assert(e);
1847         assert(fd >= 0);
1848
1849         assert_return(events == EPOLLIN, -EIO);
1850
1851         ss = read(fd, &x, sizeof(x));
1852         if (ss < 0) {
1853                 if (errno == EAGAIN || errno == EINTR)
1854                         return 0;
1855
1856                 return -errno;
1857         }
1858
1859         if (_unlikely_(ss != sizeof(x)))
1860                 return -EIO;
1861
1862         if (next)
1863                 *next = USEC_INFINITY;
1864
1865         return 0;
1866 }
1867
1868 static int process_timer(
1869                 sd_event *e,
1870                 usec_t n,
1871                 struct clock_data *d) {
1872
1873         sd_event_source *s;
1874         int r;
1875
1876         assert(e);
1877         assert(d);
1878
1879         for (;;) {
1880                 s = prioq_peek(d->earliest);
1881                 if (!s ||
1882                     s->time.next > n ||
1883                     s->enabled == SD_EVENT_OFF ||
1884                     s->pending)
1885                         break;
1886
1887                 r = source_set_pending(s, true);
1888                 if (r < 0)
1889                         return r;
1890
1891                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1892                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1893                 d->needs_rearm = true;
1894         }
1895
1896         return 0;
1897 }
1898
1899 static int process_child(sd_event *e) {
1900         sd_event_source *s;
1901         Iterator i;
1902         int r;
1903
1904         assert(e);
1905
1906         e->need_process_child = false;
1907
1908         /*
1909            So, this is ugly. We iteratively invoke waitid() with P_PID
1910            + WNOHANG for each PID we wait for, instead of using
1911            P_ALL. This is because we only want to get child
1912            information of very specific child processes, and not all
1913            of them. We might not have processed the SIGCHLD even of a
1914            previous invocation and we don't want to maintain a
1915            unbounded *per-child* event queue, hence we really don't
1916            want anything flushed out of the kernel's queue that we
1917            don't care about. Since this is O(n) this means that if you
1918            have a lot of processes you probably want to handle SIGCHLD
1919            yourself.
1920
1921            We do not reap the children here (by using WNOWAIT), this
1922            is only done after the event source is dispatched so that
1923            the callback still sees the process as a zombie.
1924         */
1925
1926         HASHMAP_FOREACH(s, e->child_sources, i) {
1927                 assert(s->type == SOURCE_CHILD);
1928
1929                 if (s->pending)
1930                         continue;
1931
1932                 if (s->enabled == SD_EVENT_OFF)
1933                         continue;
1934
1935                 zero(s->child.siginfo);
1936                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1937                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1938                 if (r < 0)
1939                         return -errno;
1940
1941                 if (s->child.siginfo.si_pid != 0) {
1942                         bool zombie =
1943                                 s->child.siginfo.si_code == CLD_EXITED ||
1944                                 s->child.siginfo.si_code == CLD_KILLED ||
1945                                 s->child.siginfo.si_code == CLD_DUMPED;
1946
1947                         if (!zombie && (s->child.options & WEXITED)) {
1948                                 /* If the child isn't dead then let's
1949                                  * immediately remove the state change
1950                                  * from the queue, since there's no
1951                                  * benefit in leaving it queued */
1952
1953                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1954                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1955                         }
1956
1957                         r = source_set_pending(s, true);
1958                         if (r < 0)
1959                                 return r;
1960                 }
1961         }
1962
1963         return 0;
1964 }
1965
1966 static int process_signal(sd_event *e, uint32_t events) {
1967         bool read_one = false;
1968         int r;
1969
1970         assert(e);
1971
1972         assert_return(events == EPOLLIN, -EIO);
1973
1974         for (;;) {
1975                 struct signalfd_siginfo si;
1976                 ssize_t ss;
1977                 sd_event_source *s = NULL;
1978
1979                 ss = read(e->signal_fd, &si, sizeof(si));
1980                 if (ss < 0) {
1981                         if (errno == EAGAIN || errno == EINTR)
1982                                 return read_one;
1983
1984                         return -errno;
1985                 }
1986
1987                 if (_unlikely_(ss != sizeof(si)))
1988                         return -EIO;
1989
1990                 read_one = true;
1991
1992                 if (si.ssi_signo == SIGCHLD) {
1993                         r = process_child(e);
1994                         if (r < 0)
1995                                 return r;
1996                         if (r > 0)
1997                                 continue;
1998                 }
1999
2000                 if (e->signal_sources)
2001                         s = e->signal_sources[si.ssi_signo];
2002
2003                 if (!s)
2004                         continue;
2005
2006                 s->signal.siginfo = si;
2007                 r = source_set_pending(s, true);
2008                 if (r < 0)
2009                         return r;
2010         }
2011 }
2012
2013 static int source_dispatch(sd_event_source *s) {
2014         int r = 0;
2015
2016         assert(s);
2017         assert(s->pending || s->type == SOURCE_EXIT);
2018
2019         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2020                 r = source_set_pending(s, false);
2021                 if (r < 0)
2022                         return r;
2023         }
2024
2025         if (s->type != SOURCE_POST) {
2026                 sd_event_source *z;
2027                 Iterator i;
2028
2029                 /* If we execute a non-post source, let's mark all
2030                  * post sources as pending */
2031
2032                 SET_FOREACH(z, s->event->post_sources, i) {
2033                         if (z->enabled == SD_EVENT_OFF)
2034                                 continue;
2035
2036                         r = source_set_pending(z, true);
2037                         if (r < 0)
2038                                 return r;
2039                 }
2040         }
2041
2042         if (s->enabled == SD_EVENT_ONESHOT) {
2043                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2044                 if (r < 0)
2045                         return r;
2046         }
2047
2048         s->dispatching = true;
2049
2050         switch (s->type) {
2051
2052         case SOURCE_IO:
2053                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2054                 break;
2055
2056         case SOURCE_TIME_REALTIME:
2057         case SOURCE_TIME_BOOTTIME:
2058         case SOURCE_TIME_MONOTONIC:
2059         case SOURCE_TIME_REALTIME_ALARM:
2060         case SOURCE_TIME_BOOTTIME_ALARM:
2061                 r = s->time.callback(s, s->time.next, s->userdata);
2062                 break;
2063
2064         case SOURCE_SIGNAL:
2065                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2066                 break;
2067
2068         case SOURCE_CHILD: {
2069                 bool zombie;
2070
2071                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2072                          s->child.siginfo.si_code == CLD_KILLED ||
2073                          s->child.siginfo.si_code == CLD_DUMPED;
2074
2075                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2076
2077                 /* Now, reap the PID for good. */
2078                 if (zombie)
2079                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2080
2081                 break;
2082         }
2083
2084         case SOURCE_DEFER:
2085                 r = s->defer.callback(s, s->userdata);
2086                 break;
2087
2088         case SOURCE_POST:
2089                 r = s->post.callback(s, s->userdata);
2090                 break;
2091
2092         case SOURCE_EXIT:
2093                 r = s->exit.callback(s, s->userdata);
2094                 break;
2095
2096         case SOURCE_WATCHDOG:
2097         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2098         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2099                 assert_not_reached("Wut? I shouldn't exist.");
2100         }
2101
2102         s->dispatching = false;
2103
2104         if (r < 0) {
2105                 if (s->name)
2106                         log_debug("Event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2107                 else
2108                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2109         }
2110
2111         if (s->n_ref == 0)
2112                 source_free(s);
2113         else if (r < 0)
2114                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2115
2116         return 1;
2117 }
2118
2119 static int event_prepare(sd_event *e) {
2120         int r;
2121
2122         assert(e);
2123
2124         for (;;) {
2125                 sd_event_source *s;
2126
2127                 s = prioq_peek(e->prepare);
2128                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2129                         break;
2130
2131                 s->prepare_iteration = e->iteration;
2132                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2133                 if (r < 0)
2134                         return r;
2135
2136                 assert(s->prepare);
2137
2138                 s->dispatching = true;
2139                 r = s->prepare(s, s->userdata);
2140                 s->dispatching = false;
2141
2142                 if (r < 0) {
2143                         if (s->name)
2144                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2145                         else
2146                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2147                 }
2148
2149                 if (s->n_ref == 0)
2150                         source_free(s);
2151                 else if (r < 0)
2152                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2153         }
2154
2155         return 0;
2156 }
2157
2158 static int dispatch_exit(sd_event *e) {
2159         sd_event_source *p;
2160         int r;
2161
2162         assert(e);
2163
2164         p = prioq_peek(e->exit);
2165         if (!p || p->enabled == SD_EVENT_OFF) {
2166                 e->state = SD_EVENT_FINISHED;
2167                 return 0;
2168         }
2169
2170         sd_event_ref(e);
2171         e->iteration++;
2172         e->state = SD_EVENT_EXITING;
2173
2174         r = source_dispatch(p);
2175
2176         e->state = SD_EVENT_PASSIVE;
2177         sd_event_unref(e);
2178
2179         return r;
2180 }
2181
2182 static sd_event_source* event_next_pending(sd_event *e) {
2183         sd_event_source *p;
2184
2185         assert(e);
2186
2187         p = prioq_peek(e->pending);
2188         if (!p)
2189                 return NULL;
2190
2191         if (p->enabled == SD_EVENT_OFF)
2192                 return NULL;
2193
2194         return p;
2195 }
2196
2197 static int arm_watchdog(sd_event *e) {
2198         struct itimerspec its = {};
2199         usec_t t;
2200         int r;
2201
2202         assert(e);
2203         assert(e->watchdog_fd >= 0);
2204
2205         t = sleep_between(e,
2206                           e->watchdog_last + (e->watchdog_period / 2),
2207                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2208
2209         timespec_store(&its.it_value, t);
2210
2211         /* Make sure we never set the watchdog to 0, which tells the
2212          * kernel to disable it. */
2213         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2214                 its.it_value.tv_nsec = 1;
2215
2216         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2217         if (r < 0)
2218                 return -errno;
2219
2220         return 0;
2221 }
2222
2223 static int process_watchdog(sd_event *e) {
2224         assert(e);
2225
2226         if (!e->watchdog)
2227                 return 0;
2228
2229         /* Don't notify watchdog too often */
2230         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2231                 return 0;
2232
2233         sd_notify(false, "WATCHDOG=1");
2234         e->watchdog_last = e->timestamp.monotonic;
2235
2236         return arm_watchdog(e);
2237 }
2238
2239 _public_ int sd_event_prepare(sd_event *e) {
2240         int r;
2241
2242         assert_return(e, -EINVAL);
2243         assert_return(!event_pid_changed(e), -ECHILD);
2244         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2245         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2246
2247         if (e->exit_requested)
2248                 goto pending;
2249
2250         e->iteration++;
2251
2252         r = event_prepare(e);
2253         if (r < 0)
2254                 return r;
2255
2256         r = event_arm_timer(e, &e->realtime);
2257         if (r < 0)
2258                 return r;
2259
2260         r = event_arm_timer(e, &e->boottime);
2261         if (r < 0)
2262                 return r;
2263
2264         r = event_arm_timer(e, &e->monotonic);
2265         if (r < 0)
2266                 return r;
2267
2268         r = event_arm_timer(e, &e->realtime_alarm);
2269         if (r < 0)
2270                 return r;
2271
2272         r = event_arm_timer(e, &e->boottime_alarm);
2273         if (r < 0)
2274                 return r;
2275
2276         if (event_next_pending(e) || e->need_process_child)
2277                 goto pending;
2278
2279         e->state = SD_EVENT_PREPARED;
2280
2281         return 0;
2282
2283 pending:
2284         e->state = SD_EVENT_PREPARED;
2285         r = sd_event_wait(e, 0);
2286         if (r == 0)
2287                 e->state = SD_EVENT_PREPARED;
2288
2289         return r;
2290 }
2291
2292 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2293         struct epoll_event *ev_queue;
2294         unsigned ev_queue_max;
2295         int r, m, i;
2296
2297         assert_return(e, -EINVAL);
2298         assert_return(!event_pid_changed(e), -ECHILD);
2299         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2300         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2301
2302         if (e->exit_requested) {
2303                 e->state = SD_EVENT_PENDING;
2304                 return 1;
2305         }
2306
2307         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2308         ev_queue = newa(struct epoll_event, ev_queue_max);
2309
2310         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2311                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2312         if (m < 0) {
2313                 if (errno == EINTR) {
2314                         e->state = SD_EVENT_PENDING;
2315                         return 1;
2316                 }
2317
2318                 r = -errno;
2319
2320                 goto finish;
2321         }
2322
2323         dual_timestamp_get(&e->timestamp);
2324         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2325
2326         for (i = 0; i < m; i++) {
2327
2328                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2329                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2330                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2331                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2332                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2333                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2334                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2335                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2336                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2337                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2338                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2339                         r = process_signal(e, ev_queue[i].events);
2340                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2341                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2342                 else
2343                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2344
2345                 if (r < 0)
2346                         goto finish;
2347         }
2348
2349         r = process_watchdog(e);
2350         if (r < 0)
2351                 goto finish;
2352
2353         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2354         if (r < 0)
2355                 goto finish;
2356
2357         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2358         if (r < 0)
2359                 goto finish;
2360
2361         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2362         if (r < 0)
2363                 goto finish;
2364
2365         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2366         if (r < 0)
2367                 goto finish;
2368
2369         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2370         if (r < 0)
2371                 goto finish;
2372
2373         if (e->need_process_child) {
2374                 r = process_child(e);
2375                 if (r < 0)
2376                         goto finish;
2377         }
2378
2379         if (event_next_pending(e)) {
2380                 e->state = SD_EVENT_PENDING;
2381
2382                 return 1;
2383         }
2384
2385         r = 0;
2386
2387 finish:
2388         e->state = SD_EVENT_PASSIVE;
2389
2390         return r;
2391 }
2392
2393 _public_ int sd_event_dispatch(sd_event *e) {
2394         sd_event_source *p;
2395         int r;
2396
2397         assert_return(e, -EINVAL);
2398         assert_return(!event_pid_changed(e), -ECHILD);
2399         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2400         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2401
2402         if (e->exit_requested)
2403                 return dispatch_exit(e);
2404
2405         p = event_next_pending(e);
2406         if (p) {
2407                 sd_event_ref(e);
2408
2409                 e->state = SD_EVENT_RUNNING;
2410                 r = source_dispatch(p);
2411                 e->state = SD_EVENT_PASSIVE;
2412
2413                 sd_event_unref(e);
2414
2415                 return r;
2416         }
2417
2418         e->state = SD_EVENT_PASSIVE;
2419
2420         return 1;
2421 }
2422
2423 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2424         int r;
2425
2426         assert_return(e, -EINVAL);
2427         assert_return(!event_pid_changed(e), -ECHILD);
2428         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2429         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2430
2431         r = sd_event_prepare(e);
2432         if (r > 0)
2433                 return sd_event_dispatch(e);
2434         else if (r < 0)
2435                 return r;
2436
2437         r = sd_event_wait(e, timeout);
2438         if (r > 0)
2439                 return sd_event_dispatch(e);
2440         else
2441                 return r;
2442 }
2443
2444 _public_ int sd_event_loop(sd_event *e) {
2445         int r;
2446
2447         assert_return(e, -EINVAL);
2448         assert_return(!event_pid_changed(e), -ECHILD);
2449         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2450
2451         sd_event_ref(e);
2452
2453         while (e->state != SD_EVENT_FINISHED) {
2454                 r = sd_event_run(e, (uint64_t) -1);
2455                 if (r < 0)
2456                         goto finish;
2457         }
2458
2459         r = e->exit_code;
2460
2461 finish:
2462         sd_event_unref(e);
2463         return r;
2464 }
2465
2466 _public_ int sd_event_get_fd(sd_event *e) {
2467
2468         assert_return(e, -EINVAL);
2469         assert_return(!event_pid_changed(e), -ECHILD);
2470
2471         return e->epoll_fd;
2472 }
2473
2474 _public_ int sd_event_get_state(sd_event *e) {
2475         assert_return(e, -EINVAL);
2476         assert_return(!event_pid_changed(e), -ECHILD);
2477
2478         return e->state;
2479 }
2480
2481 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2482         assert_return(e, -EINVAL);
2483         assert_return(code, -EINVAL);
2484         assert_return(!event_pid_changed(e), -ECHILD);
2485
2486         if (!e->exit_requested)
2487                 return -ENODATA;
2488
2489         *code = e->exit_code;
2490         return 0;
2491 }
2492
2493 _public_ int sd_event_exit(sd_event *e, int code) {
2494         assert_return(e, -EINVAL);
2495         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2496         assert_return(!event_pid_changed(e), -ECHILD);
2497
2498         e->exit_requested = true;
2499         e->exit_code = code;
2500
2501         return 0;
2502 }
2503
2504 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2505         assert_return(e, -EINVAL);
2506         assert_return(usec, -EINVAL);
2507         assert_return(!event_pid_changed(e), -ECHILD);
2508
2509         /* If we haven't run yet, just get the actual time */
2510         if (!dual_timestamp_is_set(&e->timestamp))
2511                 return -ENODATA;
2512
2513         switch (clock) {
2514
2515         case CLOCK_REALTIME:
2516         case CLOCK_REALTIME_ALARM:
2517                 *usec = e->timestamp.realtime;
2518                 break;
2519
2520         case CLOCK_MONOTONIC:
2521                 *usec = e->timestamp.monotonic;
2522                 break;
2523
2524         case CLOCK_BOOTTIME:
2525         case CLOCK_BOOTTIME_ALARM:
2526                 *usec = e->timestamp_boottime;
2527                 break;
2528         }
2529
2530         return 0;
2531 }
2532
2533 _public_ int sd_event_default(sd_event **ret) {
2534
2535         static thread_local sd_event *default_event = NULL;
2536         sd_event *e = NULL;
2537         int r;
2538
2539         if (!ret)
2540                 return !!default_event;
2541
2542         if (default_event) {
2543                 *ret = sd_event_ref(default_event);
2544                 return 0;
2545         }
2546
2547         r = sd_event_new(&e);
2548         if (r < 0)
2549                 return r;
2550
2551         e->default_event_ptr = &default_event;
2552         e->tid = gettid();
2553         default_event = e;
2554
2555         *ret = e;
2556         return 1;
2557 }
2558
2559 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2560         assert_return(e, -EINVAL);
2561         assert_return(tid, -EINVAL);
2562         assert_return(!event_pid_changed(e), -ECHILD);
2563
2564         if (e->tid != 0) {
2565                 *tid = e->tid;
2566                 return 0;
2567         }
2568
2569         return -ENXIO;
2570 }
2571
2572 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2573         int r;
2574
2575         assert_return(e, -EINVAL);
2576         assert_return(!event_pid_changed(e), -ECHILD);
2577
2578         if (e->watchdog == !!b)
2579                 return e->watchdog;
2580
2581         if (b) {
2582                 struct epoll_event ev = {};
2583
2584                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2585                 if (r <= 0)
2586                         return r;
2587
2588                 /* Issue first ping immediately */
2589                 sd_notify(false, "WATCHDOG=1");
2590                 e->watchdog_last = now(CLOCK_MONOTONIC);
2591
2592                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2593                 if (e->watchdog_fd < 0)
2594                         return -errno;
2595
2596                 r = arm_watchdog(e);
2597                 if (r < 0)
2598                         goto fail;
2599
2600                 ev.events = EPOLLIN;
2601                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2602
2603                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2604                 if (r < 0) {
2605                         r = -errno;
2606                         goto fail;
2607                 }
2608
2609         } else {
2610                 if (e->watchdog_fd >= 0) {
2611                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2612                         e->watchdog_fd = safe_close(e->watchdog_fd);
2613                 }
2614         }
2615
2616         e->watchdog = !!b;
2617         return e->watchdog;
2618
2619 fail:
2620         e->watchdog_fd = safe_close(e->watchdog_fd);
2621         return r;
2622 }
2623
2624 _public_ int sd_event_get_watchdog(sd_event *e) {
2625         assert_return(e, -EINVAL);
2626         assert_return(!event_pid_changed(e), -ECHILD);
2627
2628         return e->watchdog;
2629 }