chiark / gitweb /
sd-event: implicitly set signal event source's descriptions to the signal name
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *description;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static int event_update_signal_fd(sd_event *e) {
602         struct epoll_event ev = {};
603         bool add_to_epoll;
604         int r;
605
606         assert(e);
607
608         add_to_epoll = e->signal_fd < 0;
609
610         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
611         if (r < 0)
612                 return -errno;
613
614         e->signal_fd = r;
615
616         if (!add_to_epoll)
617                 return 0;
618
619         ev.events = EPOLLIN;
620         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
621
622         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
623         if (r < 0) {
624                 e->signal_fd = safe_close(e->signal_fd);
625                 return -errno;
626         }
627
628         return 0;
629 }
630
631 static void source_disconnect(sd_event_source *s) {
632         sd_event *event;
633
634         assert(s);
635
636         if (!s->event)
637                 return;
638
639         assert(s->event->n_sources > 0);
640
641         switch (s->type) {
642
643         case SOURCE_IO:
644                 if (s->io.fd >= 0)
645                         source_io_unregister(s);
646
647                 break;
648
649         case SOURCE_TIME_REALTIME:
650         case SOURCE_TIME_BOOTTIME:
651         case SOURCE_TIME_MONOTONIC:
652         case SOURCE_TIME_REALTIME_ALARM:
653         case SOURCE_TIME_BOOTTIME_ALARM: {
654                 struct clock_data *d;
655
656                 d = event_get_clock_data(s->event, s->type);
657                 assert(d);
658
659                 prioq_remove(d->earliest, s, &s->time.earliest_index);
660                 prioq_remove(d->latest, s, &s->time.latest_index);
661                 d->needs_rearm = true;
662                 break;
663         }
664
665         case SOURCE_SIGNAL:
666                 if (s->signal.sig > 0) {
667                         if (s->event->signal_sources)
668                                 s->event->signal_sources[s->signal.sig] = NULL;
669
670                         /* If the signal was on and now it is off... */
671                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
672                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
673
674                                 (void) event_update_signal_fd(s->event);
675                                 /* If disabling failed, we might get a spurious event,
676                                  * but otherwise nothing bad should happen. */
677                         }
678                 }
679
680                 break;
681
682         case SOURCE_CHILD:
683                 if (s->child.pid > 0) {
684                         if (s->enabled != SD_EVENT_OFF) {
685                                 assert(s->event->n_enabled_child_sources > 0);
686                                 s->event->n_enabled_child_sources--;
687
688                                 /* We know the signal was on, if it is off now... */
689                                 if (!need_signal(s->event, SIGCHLD)) {
690                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
691
692                                         (void) event_update_signal_fd(s->event);
693                                         /* If disabling failed, we might get a spurious event,
694                                          * but otherwise nothing bad should happen. */
695                                 }
696                         }
697
698                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
699                 }
700
701                 break;
702
703         case SOURCE_DEFER:
704                 /* nothing */
705                 break;
706
707         case SOURCE_POST:
708                 set_remove(s->event->post_sources, s);
709                 break;
710
711         case SOURCE_EXIT:
712                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713                 break;
714
715         default:
716                 assert_not_reached("Wut? I shouldn't exist.");
717         }
718
719         if (s->pending)
720                 prioq_remove(s->event->pending, s, &s->pending_index);
721
722         if (s->prepare)
723                 prioq_remove(s->event->prepare, s, &s->prepare_index);
724
725         event = s->event;
726
727         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
728         s->event = NULL;
729         LIST_REMOVE(sources, event->sources, s);
730         event->n_sources--;
731
732         if (!s->floating)
733                 sd_event_unref(event);
734 }
735
736 static void source_free(sd_event_source *s) {
737         assert(s);
738
739         source_disconnect(s);
740         free(s->description);
741         free(s);
742 }
743
744 static int source_set_pending(sd_event_source *s, bool b) {
745         int r;
746
747         assert(s);
748         assert(s->type != SOURCE_EXIT);
749
750         if (s->pending == b)
751                 return 0;
752
753         s->pending = b;
754
755         if (b) {
756                 s->pending_iteration = s->event->iteration;
757
758                 r = prioq_put(s->event->pending, s, &s->pending_index);
759                 if (r < 0) {
760                         s->pending = false;
761                         return r;
762                 }
763         } else
764                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
765
766         if (EVENT_SOURCE_IS_TIME(s->type)) {
767                 struct clock_data *d;
768
769                 d = event_get_clock_data(s->event, s->type);
770                 assert(d);
771
772                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
773                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
774                 d->needs_rearm = true;
775         }
776
777         return 0;
778 }
779
780 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
781         sd_event_source *s;
782
783         assert(e);
784
785         s = new0(sd_event_source, 1);
786         if (!s)
787                 return NULL;
788
789         s->n_ref = 1;
790         s->event = e;
791         s->floating = floating;
792         s->type = type;
793         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
794
795         if (!floating)
796                 sd_event_ref(e);
797
798         LIST_PREPEND(sources, e->sources, s);
799         e->n_sources ++;
800
801         return s;
802 }
803
804 _public_ int sd_event_add_io(
805                 sd_event *e,
806                 sd_event_source **ret,
807                 int fd,
808                 uint32_t events,
809                 sd_event_io_handler_t callback,
810                 void *userdata) {
811
812         sd_event_source *s;
813         int r;
814
815         assert_return(e, -EINVAL);
816         assert_return(fd >= 0, -EINVAL);
817         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
818         assert_return(callback, -EINVAL);
819         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
820         assert_return(!event_pid_changed(e), -ECHILD);
821
822         s = source_new(e, !ret, SOURCE_IO);
823         if (!s)
824                 return -ENOMEM;
825
826         s->io.fd = fd;
827         s->io.events = events;
828         s->io.callback = callback;
829         s->userdata = userdata;
830         s->enabled = SD_EVENT_ON;
831
832         r = source_io_register(s, s->enabled, events);
833         if (r < 0) {
834                 source_free(s);
835                 return r;
836         }
837
838         if (ret)
839                 *ret = s;
840
841         return 0;
842 }
843
844 static void initialize_perturb(sd_event *e) {
845         sd_id128_t bootid = {};
846
847         /* When we sleep for longer, we try to realign the wakeup to
848            the same time wihtin each minute/second/250ms, so that
849            events all across the system can be coalesced into a single
850            CPU wakeup. However, let's take some system-specific
851            randomness for this value, so that in a network of systems
852            with synced clocks timer events are distributed a
853            bit. Here, we calculate a perturbation usec offset from the
854            boot ID. */
855
856         if (_likely_(e->perturb != USEC_INFINITY))
857                 return;
858
859         if (sd_id128_get_boot(&bootid) >= 0)
860                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
861 }
862
863 static int event_setup_timer_fd(
864                 sd_event *e,
865                 struct clock_data *d,
866                 clockid_t clock) {
867
868         struct epoll_event ev = {};
869         int r, fd;
870
871         assert(e);
872         assert(d);
873
874         if (_likely_(d->fd >= 0))
875                 return 0;
876
877         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
878         if (fd < 0)
879                 return -errno;
880
881         ev.events = EPOLLIN;
882         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
883
884         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
885         if (r < 0) {
886                 safe_close(fd);
887                 return -errno;
888         }
889
890         d->fd = fd;
891         return 0;
892 }
893
894 _public_ int sd_event_add_time(
895                 sd_event *e,
896                 sd_event_source **ret,
897                 clockid_t clock,
898                 uint64_t usec,
899                 uint64_t accuracy,
900                 sd_event_time_handler_t callback,
901                 void *userdata) {
902
903         EventSourceType type;
904         sd_event_source *s;
905         struct clock_data *d;
906         int r;
907
908         assert_return(e, -EINVAL);
909         assert_return(usec != (uint64_t) -1, -EINVAL);
910         assert_return(accuracy != (uint64_t) -1, -EINVAL);
911         assert_return(callback, -EINVAL);
912         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
913         assert_return(!event_pid_changed(e), -ECHILD);
914
915         type = clock_to_event_source_type(clock);
916         assert_return(type >= 0, -ENOTSUP);
917
918         d = event_get_clock_data(e, type);
919         assert(d);
920
921         if (!d->earliest) {
922                 d->earliest = prioq_new(earliest_time_prioq_compare);
923                 if (!d->earliest)
924                         return -ENOMEM;
925         }
926
927         if (!d->latest) {
928                 d->latest = prioq_new(latest_time_prioq_compare);
929                 if (!d->latest)
930                         return -ENOMEM;
931         }
932
933         if (d->fd < 0) {
934                 r = event_setup_timer_fd(e, d, clock);
935                 if (r < 0)
936                         return r;
937         }
938
939         s = source_new(e, !ret, type);
940         if (!s)
941                 return -ENOMEM;
942
943         s->time.next = usec;
944         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
945         s->time.callback = callback;
946         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
947         s->userdata = userdata;
948         s->enabled = SD_EVENT_ONESHOT;
949
950         d->needs_rearm = true;
951
952         r = prioq_put(d->earliest, s, &s->time.earliest_index);
953         if (r < 0)
954                 goto fail;
955
956         r = prioq_put(d->latest, s, &s->time.latest_index);
957         if (r < 0)
958                 goto fail;
959
960         if (ret)
961                 *ret = s;
962
963         return 0;
964
965 fail:
966         source_free(s);
967         return r;
968 }
969
970 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
971         assert(s);
972
973         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
974 }
975
976 _public_ int sd_event_add_signal(
977                 sd_event *e,
978                 sd_event_source **ret,
979                 int sig,
980                 sd_event_signal_handler_t callback,
981                 void *userdata) {
982
983         sd_event_source *s;
984         sigset_t ss;
985         int r;
986         bool previous;
987
988         assert_return(e, -EINVAL);
989         assert_return(sig > 0, -EINVAL);
990         assert_return(sig < _NSIG, -EINVAL);
991         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992         assert_return(!event_pid_changed(e), -ECHILD);
993
994         if (!callback)
995                 callback = signal_exit_callback;
996
997         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
998         if (r < 0)
999                 return -errno;
1000
1001         if (!sigismember(&ss, sig))
1002                 return -EBUSY;
1003
1004         if (!e->signal_sources) {
1005                 e->signal_sources = new0(sd_event_source*, _NSIG);
1006                 if (!e->signal_sources)
1007                         return -ENOMEM;
1008         } else if (e->signal_sources[sig])
1009                 return -EBUSY;
1010
1011         previous = need_signal(e, sig);
1012
1013         s = source_new(e, !ret, SOURCE_SIGNAL);
1014         if (!s)
1015                 return -ENOMEM;
1016
1017         s->signal.sig = sig;
1018         s->signal.callback = callback;
1019         s->userdata = userdata;
1020         s->enabled = SD_EVENT_ON;
1021
1022         e->signal_sources[sig] = s;
1023
1024         if (!previous) {
1025                 assert_se(sigaddset(&e->sigset, sig) == 0);
1026
1027                 r = event_update_signal_fd(e);
1028                 if (r < 0) {
1029                         source_free(s);
1030                         return r;
1031                 }
1032         }
1033
1034         /* Use the signal name as description for the event source by default */
1035         (void) sd_event_source_set_description(s, signal_to_string(sig));
1036
1037         if (ret)
1038                 *ret = s;
1039
1040         return 0;
1041 }
1042
1043 _public_ int sd_event_add_child(
1044                 sd_event *e,
1045                 sd_event_source **ret,
1046                 pid_t pid,
1047                 int options,
1048                 sd_event_child_handler_t callback,
1049                 void *userdata) {
1050
1051         sd_event_source *s;
1052         int r;
1053         bool previous;
1054
1055         assert_return(e, -EINVAL);
1056         assert_return(pid > 1, -EINVAL);
1057         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1058         assert_return(options != 0, -EINVAL);
1059         assert_return(callback, -EINVAL);
1060         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1061         assert_return(!event_pid_changed(e), -ECHILD);
1062
1063         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1064         if (r < 0)
1065                 return r;
1066
1067         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1068                 return -EBUSY;
1069
1070         previous = need_signal(e, SIGCHLD);
1071
1072         s = source_new(e, !ret, SOURCE_CHILD);
1073         if (!s)
1074                 return -ENOMEM;
1075
1076         s->child.pid = pid;
1077         s->child.options = options;
1078         s->child.callback = callback;
1079         s->userdata = userdata;
1080         s->enabled = SD_EVENT_ONESHOT;
1081
1082         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1083         if (r < 0) {
1084                 source_free(s);
1085                 return r;
1086         }
1087
1088         e->n_enabled_child_sources ++;
1089
1090         if (!previous) {
1091                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1092
1093                 r = event_update_signal_fd(e);
1094                 if (r < 0) {
1095                         source_free(s);
1096                         return r;
1097                 }
1098         }
1099
1100         e->need_process_child = true;
1101
1102         if (ret)
1103                 *ret = s;
1104
1105         return 0;
1106 }
1107
1108 _public_ int sd_event_add_defer(
1109                 sd_event *e,
1110                 sd_event_source **ret,
1111                 sd_event_handler_t callback,
1112                 void *userdata) {
1113
1114         sd_event_source *s;
1115         int r;
1116
1117         assert_return(e, -EINVAL);
1118         assert_return(callback, -EINVAL);
1119         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1120         assert_return(!event_pid_changed(e), -ECHILD);
1121
1122         s = source_new(e, !ret, SOURCE_DEFER);
1123         if (!s)
1124                 return -ENOMEM;
1125
1126         s->defer.callback = callback;
1127         s->userdata = userdata;
1128         s->enabled = SD_EVENT_ONESHOT;
1129
1130         r = source_set_pending(s, true);
1131         if (r < 0) {
1132                 source_free(s);
1133                 return r;
1134         }
1135
1136         if (ret)
1137                 *ret = s;
1138
1139         return 0;
1140 }
1141
1142 _public_ int sd_event_add_post(
1143                 sd_event *e,
1144                 sd_event_source **ret,
1145                 sd_event_handler_t callback,
1146                 void *userdata) {
1147
1148         sd_event_source *s;
1149         int r;
1150
1151         assert_return(e, -EINVAL);
1152         assert_return(callback, -EINVAL);
1153         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1154         assert_return(!event_pid_changed(e), -ECHILD);
1155
1156         r = set_ensure_allocated(&e->post_sources, NULL);
1157         if (r < 0)
1158                 return r;
1159
1160         s = source_new(e, !ret, SOURCE_POST);
1161         if (!s)
1162                 return -ENOMEM;
1163
1164         s->post.callback = callback;
1165         s->userdata = userdata;
1166         s->enabled = SD_EVENT_ON;
1167
1168         r = set_put(e->post_sources, s);
1169         if (r < 0) {
1170                 source_free(s);
1171                 return r;
1172         }
1173
1174         if (ret)
1175                 *ret = s;
1176
1177         return 0;
1178 }
1179
1180 _public_ int sd_event_add_exit(
1181                 sd_event *e,
1182                 sd_event_source **ret,
1183                 sd_event_handler_t callback,
1184                 void *userdata) {
1185
1186         sd_event_source *s;
1187         int r;
1188
1189         assert_return(e, -EINVAL);
1190         assert_return(callback, -EINVAL);
1191         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1192         assert_return(!event_pid_changed(e), -ECHILD);
1193
1194         if (!e->exit) {
1195                 e->exit = prioq_new(exit_prioq_compare);
1196                 if (!e->exit)
1197                         return -ENOMEM;
1198         }
1199
1200         s = source_new(e, !ret, SOURCE_EXIT);
1201         if (!s)
1202                 return -ENOMEM;
1203
1204         s->exit.callback = callback;
1205         s->userdata = userdata;
1206         s->exit.prioq_index = PRIOQ_IDX_NULL;
1207         s->enabled = SD_EVENT_ONESHOT;
1208
1209         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1210         if (r < 0) {
1211                 source_free(s);
1212                 return r;
1213         }
1214
1215         if (ret)
1216                 *ret = s;
1217
1218         return 0;
1219 }
1220
1221 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1222         assert_return(s, NULL);
1223
1224         assert(s->n_ref >= 1);
1225         s->n_ref++;
1226
1227         return s;
1228 }
1229
1230 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1231
1232         if (!s)
1233                 return NULL;
1234
1235         assert(s->n_ref >= 1);
1236         s->n_ref--;
1237
1238         if (s->n_ref <= 0) {
1239                 /* Here's a special hack: when we are called from a
1240                  * dispatch handler we won't free the event source
1241                  * immediately, but we will detach the fd from the
1242                  * epoll. This way it is safe for the caller to unref
1243                  * the event source and immediately close the fd, but
1244                  * we still retain a valid event source object after
1245                  * the callback. */
1246
1247                 if (s->dispatching) {
1248                         if (s->type == SOURCE_IO)
1249                                 source_io_unregister(s);
1250
1251                         source_disconnect(s);
1252                 } else
1253                         source_free(s);
1254         }
1255
1256         return NULL;
1257 }
1258
1259 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1260         assert_return(s, -EINVAL);
1261
1262         return free_and_strdup(&s->description, description);
1263 }
1264
1265 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1266         assert_return(s, -EINVAL);
1267         assert_return(description, -EINVAL);
1268
1269         *description = s->description;
1270         return 0;
1271 }
1272
1273 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1274         assert_return(s, NULL);
1275
1276         return s->event;
1277 }
1278
1279 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1280         assert_return(s, -EINVAL);
1281         assert_return(s->type != SOURCE_EXIT, -EDOM);
1282         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1283         assert_return(!event_pid_changed(s->event), -ECHILD);
1284
1285         return s->pending;
1286 }
1287
1288 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1289         assert_return(s, -EINVAL);
1290         assert_return(s->type == SOURCE_IO, -EDOM);
1291         assert_return(!event_pid_changed(s->event), -ECHILD);
1292
1293         return s->io.fd;
1294 }
1295
1296 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1297         int r;
1298
1299         assert_return(s, -EINVAL);
1300         assert_return(fd >= 0, -EINVAL);
1301         assert_return(s->type == SOURCE_IO, -EDOM);
1302         assert_return(!event_pid_changed(s->event), -ECHILD);
1303
1304         if (s->io.fd == fd)
1305                 return 0;
1306
1307         if (s->enabled == SD_EVENT_OFF) {
1308                 s->io.fd = fd;
1309                 s->io.registered = false;
1310         } else {
1311                 int saved_fd;
1312
1313                 saved_fd = s->io.fd;
1314                 assert(s->io.registered);
1315
1316                 s->io.fd = fd;
1317                 s->io.registered = false;
1318
1319                 r = source_io_register(s, s->enabled, s->io.events);
1320                 if (r < 0) {
1321                         s->io.fd = saved_fd;
1322                         s->io.registered = true;
1323                         return r;
1324                 }
1325
1326                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1327         }
1328
1329         return 0;
1330 }
1331
1332 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1333         assert_return(s, -EINVAL);
1334         assert_return(events, -EINVAL);
1335         assert_return(s->type == SOURCE_IO, -EDOM);
1336         assert_return(!event_pid_changed(s->event), -ECHILD);
1337
1338         *events = s->io.events;
1339         return 0;
1340 }
1341
1342 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1343         int r;
1344
1345         assert_return(s, -EINVAL);
1346         assert_return(s->type == SOURCE_IO, -EDOM);
1347         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1348         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1349         assert_return(!event_pid_changed(s->event), -ECHILD);
1350
1351         /* edge-triggered updates are never skipped, so we can reset edges */
1352         if (s->io.events == events && !(events & EPOLLET))
1353                 return 0;
1354
1355         if (s->enabled != SD_EVENT_OFF) {
1356                 r = source_io_register(s, s->enabled, events);
1357                 if (r < 0)
1358                         return r;
1359         }
1360
1361         s->io.events = events;
1362         source_set_pending(s, false);
1363
1364         return 0;
1365 }
1366
1367 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1368         assert_return(s, -EINVAL);
1369         assert_return(revents, -EINVAL);
1370         assert_return(s->type == SOURCE_IO, -EDOM);
1371         assert_return(s->pending, -ENODATA);
1372         assert_return(!event_pid_changed(s->event), -ECHILD);
1373
1374         *revents = s->io.revents;
1375         return 0;
1376 }
1377
1378 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1379         assert_return(s, -EINVAL);
1380         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1381         assert_return(!event_pid_changed(s->event), -ECHILD);
1382
1383         return s->signal.sig;
1384 }
1385
1386 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1387         assert_return(s, -EINVAL);
1388         assert_return(!event_pid_changed(s->event), -ECHILD);
1389
1390         return s->priority;
1391 }
1392
1393 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1394         assert_return(s, -EINVAL);
1395         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1396         assert_return(!event_pid_changed(s->event), -ECHILD);
1397
1398         if (s->priority == priority)
1399                 return 0;
1400
1401         s->priority = priority;
1402
1403         if (s->pending)
1404                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1405
1406         if (s->prepare)
1407                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1408
1409         if (s->type == SOURCE_EXIT)
1410                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1411
1412         return 0;
1413 }
1414
1415 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1416         assert_return(s, -EINVAL);
1417         assert_return(m, -EINVAL);
1418         assert_return(!event_pid_changed(s->event), -ECHILD);
1419
1420         *m = s->enabled;
1421         return 0;
1422 }
1423
1424 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1425         int r;
1426
1427         assert_return(s, -EINVAL);
1428         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1429         assert_return(!event_pid_changed(s->event), -ECHILD);
1430
1431         /* If we are dead anyway, we are fine with turning off
1432          * sources, but everything else needs to fail. */
1433         if (s->event->state == SD_EVENT_FINISHED)
1434                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1435
1436         if (s->enabled == m)
1437                 return 0;
1438
1439         if (m == SD_EVENT_OFF) {
1440
1441                 switch (s->type) {
1442
1443                 case SOURCE_IO:
1444                         r = source_io_unregister(s);
1445                         if (r < 0)
1446                                 return r;
1447
1448                         s->enabled = m;
1449                         break;
1450
1451                 case SOURCE_TIME_REALTIME:
1452                 case SOURCE_TIME_BOOTTIME:
1453                 case SOURCE_TIME_MONOTONIC:
1454                 case SOURCE_TIME_REALTIME_ALARM:
1455                 case SOURCE_TIME_BOOTTIME_ALARM: {
1456                         struct clock_data *d;
1457
1458                         s->enabled = m;
1459                         d = event_get_clock_data(s->event, s->type);
1460                         assert(d);
1461
1462                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1463                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1464                         d->needs_rearm = true;
1465                         break;
1466                 }
1467
1468                 case SOURCE_SIGNAL:
1469                         assert(need_signal(s->event, s->signal.sig));
1470
1471                         s->enabled = m;
1472
1473                         if (!need_signal(s->event, s->signal.sig)) {
1474                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1475
1476                                 (void) event_update_signal_fd(s->event);
1477                                 /* If disabling failed, we might get a spurious event,
1478                                  * but otherwise nothing bad should happen. */
1479                         }
1480
1481                         break;
1482
1483                 case SOURCE_CHILD:
1484                         assert(need_signal(s->event, SIGCHLD));
1485
1486                         s->enabled = m;
1487
1488                         assert(s->event->n_enabled_child_sources > 0);
1489                         s->event->n_enabled_child_sources--;
1490
1491                         if (!need_signal(s->event, SIGCHLD)) {
1492                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1493
1494                                 (void) event_update_signal_fd(s->event);
1495                         }
1496
1497                         break;
1498
1499                 case SOURCE_EXIT:
1500                         s->enabled = m;
1501                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1502                         break;
1503
1504                 case SOURCE_DEFER:
1505                 case SOURCE_POST:
1506                         s->enabled = m;
1507                         break;
1508
1509                 default:
1510                         assert_not_reached("Wut? I shouldn't exist.");
1511                 }
1512
1513         } else {
1514                 switch (s->type) {
1515
1516                 case SOURCE_IO:
1517                         r = source_io_register(s, m, s->io.events);
1518                         if (r < 0)
1519                                 return r;
1520
1521                         s->enabled = m;
1522                         break;
1523
1524                 case SOURCE_TIME_REALTIME:
1525                 case SOURCE_TIME_BOOTTIME:
1526                 case SOURCE_TIME_MONOTONIC:
1527                 case SOURCE_TIME_REALTIME_ALARM:
1528                 case SOURCE_TIME_BOOTTIME_ALARM: {
1529                         struct clock_data *d;
1530
1531                         s->enabled = m;
1532                         d = event_get_clock_data(s->event, s->type);
1533                         assert(d);
1534
1535                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1536                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1537                         d->needs_rearm = true;
1538                         break;
1539                 }
1540
1541                 case SOURCE_SIGNAL:
1542                         /* Check status before enabling. */
1543                         if (!need_signal(s->event, s->signal.sig)) {
1544                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1545
1546                                 r = event_update_signal_fd(s->event);
1547                                 if (r < 0) {
1548                                         s->enabled = SD_EVENT_OFF;
1549                                         return r;
1550                                 }
1551                         }
1552
1553                         s->enabled = m;
1554                         break;
1555
1556                 case SOURCE_CHILD:
1557                         /* Check status before enabling. */
1558                         if (s->enabled == SD_EVENT_OFF) {
1559                                 if (!need_signal(s->event, SIGCHLD)) {
1560                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1561
1562                                         r = event_update_signal_fd(s->event);
1563                                         if (r < 0) {
1564                                                 s->enabled = SD_EVENT_OFF;
1565                                                 return r;
1566                                         }
1567                                 }
1568
1569                                 s->event->n_enabled_child_sources++;
1570                         }
1571
1572                         s->enabled = m;
1573                         break;
1574
1575                 case SOURCE_EXIT:
1576                         s->enabled = m;
1577                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1578                         break;
1579
1580                 case SOURCE_DEFER:
1581                 case SOURCE_POST:
1582                         s->enabled = m;
1583                         break;
1584
1585                 default:
1586                         assert_not_reached("Wut? I shouldn't exist.");
1587                 }
1588         }
1589
1590         if (s->pending)
1591                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1592
1593         if (s->prepare)
1594                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1595
1596         return 0;
1597 }
1598
1599 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1600         assert_return(s, -EINVAL);
1601         assert_return(usec, -EINVAL);
1602         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1603         assert_return(!event_pid_changed(s->event), -ECHILD);
1604
1605         *usec = s->time.next;
1606         return 0;
1607 }
1608
1609 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1610         struct clock_data *d;
1611
1612         assert_return(s, -EINVAL);
1613         assert_return(usec != (uint64_t) -1, -EINVAL);
1614         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1615         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1616         assert_return(!event_pid_changed(s->event), -ECHILD);
1617
1618         s->time.next = usec;
1619
1620         source_set_pending(s, false);
1621
1622         d = event_get_clock_data(s->event, s->type);
1623         assert(d);
1624
1625         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1626         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1627         d->needs_rearm = true;
1628
1629         return 0;
1630 }
1631
1632 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1633         assert_return(s, -EINVAL);
1634         assert_return(usec, -EINVAL);
1635         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1636         assert_return(!event_pid_changed(s->event), -ECHILD);
1637
1638         *usec = s->time.accuracy;
1639         return 0;
1640 }
1641
1642 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1643         struct clock_data *d;
1644
1645         assert_return(s, -EINVAL);
1646         assert_return(usec != (uint64_t) -1, -EINVAL);
1647         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1648         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1649         assert_return(!event_pid_changed(s->event), -ECHILD);
1650
1651         if (usec == 0)
1652                 usec = DEFAULT_ACCURACY_USEC;
1653
1654         s->time.accuracy = usec;
1655
1656         source_set_pending(s, false);
1657
1658         d = event_get_clock_data(s->event, s->type);
1659         assert(d);
1660
1661         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1662         d->needs_rearm = true;
1663
1664         return 0;
1665 }
1666
1667 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1668         assert_return(s, -EINVAL);
1669         assert_return(clock, -EINVAL);
1670         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1671         assert_return(!event_pid_changed(s->event), -ECHILD);
1672
1673         *clock = event_source_type_to_clock(s->type);
1674         return 0;
1675 }
1676
1677 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1678         assert_return(s, -EINVAL);
1679         assert_return(pid, -EINVAL);
1680         assert_return(s->type == SOURCE_CHILD, -EDOM);
1681         assert_return(!event_pid_changed(s->event), -ECHILD);
1682
1683         *pid = s->child.pid;
1684         return 0;
1685 }
1686
1687 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1688         int r;
1689
1690         assert_return(s, -EINVAL);
1691         assert_return(s->type != SOURCE_EXIT, -EDOM);
1692         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1693         assert_return(!event_pid_changed(s->event), -ECHILD);
1694
1695         if (s->prepare == callback)
1696                 return 0;
1697
1698         if (callback && s->prepare) {
1699                 s->prepare = callback;
1700                 return 0;
1701         }
1702
1703         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1704         if (r < 0)
1705                 return r;
1706
1707         s->prepare = callback;
1708
1709         if (callback) {
1710                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1711                 if (r < 0)
1712                         return r;
1713         } else
1714                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1715
1716         return 0;
1717 }
1718
1719 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1720         assert_return(s, NULL);
1721
1722         return s->userdata;
1723 }
1724
1725 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1726         void *ret;
1727
1728         assert_return(s, NULL);
1729
1730         ret = s->userdata;
1731         s->userdata = userdata;
1732
1733         return ret;
1734 }
1735
1736 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1737         usec_t c;
1738         assert(e);
1739         assert(a <= b);
1740
1741         if (a <= 0)
1742                 return 0;
1743
1744         if (b <= a + 1)
1745                 return a;
1746
1747         initialize_perturb(e);
1748
1749         /*
1750           Find a good time to wake up again between times a and b. We
1751           have two goals here:
1752
1753           a) We want to wake up as seldom as possible, hence prefer
1754              later times over earlier times.
1755
1756           b) But if we have to wake up, then let's make sure to
1757              dispatch as much as possible on the entire system.
1758
1759           We implement this by waking up everywhere at the same time
1760           within any given minute if we can, synchronised via the
1761           perturbation value determined from the boot ID. If we can't,
1762           then we try to find the same spot in every 10s, then 1s and
1763           then 250ms step. Otherwise, we pick the last possible time
1764           to wake up.
1765         */
1766
1767         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1768         if (c >= b) {
1769                 if (_unlikely_(c < USEC_PER_MINUTE))
1770                         return b;
1771
1772                 c -= USEC_PER_MINUTE;
1773         }
1774
1775         if (c >= a)
1776                 return c;
1777
1778         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1779         if (c >= b) {
1780                 if (_unlikely_(c < USEC_PER_SEC*10))
1781                         return b;
1782
1783                 c -= USEC_PER_SEC*10;
1784         }
1785
1786         if (c >= a)
1787                 return c;
1788
1789         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1790         if (c >= b) {
1791                 if (_unlikely_(c < USEC_PER_SEC))
1792                         return b;
1793
1794                 c -= USEC_PER_SEC;
1795         }
1796
1797         if (c >= a)
1798                 return c;
1799
1800         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1801         if (c >= b) {
1802                 if (_unlikely_(c < USEC_PER_MSEC*250))
1803                         return b;
1804
1805                 c -= USEC_PER_MSEC*250;
1806         }
1807
1808         if (c >= a)
1809                 return c;
1810
1811         return b;
1812 }
1813
1814 static int event_arm_timer(
1815                 sd_event *e,
1816                 struct clock_data *d) {
1817
1818         struct itimerspec its = {};
1819         sd_event_source *a, *b;
1820         usec_t t;
1821         int r;
1822
1823         assert(e);
1824         assert(d);
1825
1826         if (!d->needs_rearm)
1827                 return 0;
1828         else
1829                 d->needs_rearm = false;
1830
1831         a = prioq_peek(d->earliest);
1832         if (!a || a->enabled == SD_EVENT_OFF) {
1833
1834                 if (d->fd < 0)
1835                         return 0;
1836
1837                 if (d->next == USEC_INFINITY)
1838                         return 0;
1839
1840                 /* disarm */
1841                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1842                 if (r < 0)
1843                         return r;
1844
1845                 d->next = USEC_INFINITY;
1846                 return 0;
1847         }
1848
1849         b = prioq_peek(d->latest);
1850         assert_se(b && b->enabled != SD_EVENT_OFF);
1851
1852         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1853         if (d->next == t)
1854                 return 0;
1855
1856         assert_se(d->fd >= 0);
1857
1858         if (t == 0) {
1859                 /* We don' want to disarm here, just mean some time looooong ago. */
1860                 its.it_value.tv_sec = 0;
1861                 its.it_value.tv_nsec = 1;
1862         } else
1863                 timespec_store(&its.it_value, t);
1864
1865         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1866         if (r < 0)
1867                 return -errno;
1868
1869         d->next = t;
1870         return 0;
1871 }
1872
1873 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1874         assert(e);
1875         assert(s);
1876         assert(s->type == SOURCE_IO);
1877
1878         /* If the event source was already pending, we just OR in the
1879          * new revents, otherwise we reset the value. The ORing is
1880          * necessary to handle EPOLLONESHOT events properly where
1881          * readability might happen independently of writability, and
1882          * we need to keep track of both */
1883
1884         if (s->pending)
1885                 s->io.revents |= revents;
1886         else
1887                 s->io.revents = revents;
1888
1889         return source_set_pending(s, true);
1890 }
1891
1892 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1893         uint64_t x;
1894         ssize_t ss;
1895
1896         assert(e);
1897         assert(fd >= 0);
1898
1899         assert_return(events == EPOLLIN, -EIO);
1900
1901         ss = read(fd, &x, sizeof(x));
1902         if (ss < 0) {
1903                 if (errno == EAGAIN || errno == EINTR)
1904                         return 0;
1905
1906                 return -errno;
1907         }
1908
1909         if (_unlikely_(ss != sizeof(x)))
1910                 return -EIO;
1911
1912         if (next)
1913                 *next = USEC_INFINITY;
1914
1915         return 0;
1916 }
1917
1918 static int process_timer(
1919                 sd_event *e,
1920                 usec_t n,
1921                 struct clock_data *d) {
1922
1923         sd_event_source *s;
1924         int r;
1925
1926         assert(e);
1927         assert(d);
1928
1929         for (;;) {
1930                 s = prioq_peek(d->earliest);
1931                 if (!s ||
1932                     s->time.next > n ||
1933                     s->enabled == SD_EVENT_OFF ||
1934                     s->pending)
1935                         break;
1936
1937                 r = source_set_pending(s, true);
1938                 if (r < 0)
1939                         return r;
1940
1941                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1942                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1943                 d->needs_rearm = true;
1944         }
1945
1946         return 0;
1947 }
1948
1949 static int process_child(sd_event *e) {
1950         sd_event_source *s;
1951         Iterator i;
1952         int r;
1953
1954         assert(e);
1955
1956         e->need_process_child = false;
1957
1958         /*
1959            So, this is ugly. We iteratively invoke waitid() with P_PID
1960            + WNOHANG for each PID we wait for, instead of using
1961            P_ALL. This is because we only want to get child
1962            information of very specific child processes, and not all
1963            of them. We might not have processed the SIGCHLD even of a
1964            previous invocation and we don't want to maintain a
1965            unbounded *per-child* event queue, hence we really don't
1966            want anything flushed out of the kernel's queue that we
1967            don't care about. Since this is O(n) this means that if you
1968            have a lot of processes you probably want to handle SIGCHLD
1969            yourself.
1970
1971            We do not reap the children here (by using WNOWAIT), this
1972            is only done after the event source is dispatched so that
1973            the callback still sees the process as a zombie.
1974         */
1975
1976         HASHMAP_FOREACH(s, e->child_sources, i) {
1977                 assert(s->type == SOURCE_CHILD);
1978
1979                 if (s->pending)
1980                         continue;
1981
1982                 if (s->enabled == SD_EVENT_OFF)
1983                         continue;
1984
1985                 zero(s->child.siginfo);
1986                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1987                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1988                 if (r < 0)
1989                         return -errno;
1990
1991                 if (s->child.siginfo.si_pid != 0) {
1992                         bool zombie =
1993                                 s->child.siginfo.si_code == CLD_EXITED ||
1994                                 s->child.siginfo.si_code == CLD_KILLED ||
1995                                 s->child.siginfo.si_code == CLD_DUMPED;
1996
1997                         if (!zombie && (s->child.options & WEXITED)) {
1998                                 /* If the child isn't dead then let's
1999                                  * immediately remove the state change
2000                                  * from the queue, since there's no
2001                                  * benefit in leaving it queued */
2002
2003                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2004                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2005                         }
2006
2007                         r = source_set_pending(s, true);
2008                         if (r < 0)
2009                                 return r;
2010                 }
2011         }
2012
2013         return 0;
2014 }
2015
2016 static int process_signal(sd_event *e, uint32_t events) {
2017         bool read_one = false;
2018         int r;
2019
2020         assert(e);
2021
2022         assert_return(events == EPOLLIN, -EIO);
2023
2024         for (;;) {
2025                 struct signalfd_siginfo si;
2026                 ssize_t n;
2027                 sd_event_source *s = NULL;
2028
2029                 n = read(e->signal_fd, &si, sizeof(si));
2030                 if (n < 0) {
2031                         if (errno == EAGAIN || errno == EINTR)
2032                                 return read_one;
2033
2034                         return -errno;
2035                 }
2036
2037                 if (_unlikely_(n != sizeof(si)))
2038                         return -EIO;
2039
2040                 assert(si.ssi_signo < _NSIG);
2041
2042                 read_one = true;
2043
2044                 if (si.ssi_signo == SIGCHLD) {
2045                         r = process_child(e);
2046                         if (r < 0)
2047                                 return r;
2048                         if (r > 0)
2049                                 continue;
2050                 }
2051
2052                 if (e->signal_sources)
2053                         s = e->signal_sources[si.ssi_signo];
2054
2055                 if (!s)
2056                         continue;
2057
2058                 s->signal.siginfo = si;
2059                 r = source_set_pending(s, true);
2060                 if (r < 0)
2061                         return r;
2062         }
2063 }
2064
2065 static int source_dispatch(sd_event_source *s) {
2066         int r = 0;
2067
2068         assert(s);
2069         assert(s->pending || s->type == SOURCE_EXIT);
2070
2071         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2072                 r = source_set_pending(s, false);
2073                 if (r < 0)
2074                         return r;
2075         }
2076
2077         if (s->type != SOURCE_POST) {
2078                 sd_event_source *z;
2079                 Iterator i;
2080
2081                 /* If we execute a non-post source, let's mark all
2082                  * post sources as pending */
2083
2084                 SET_FOREACH(z, s->event->post_sources, i) {
2085                         if (z->enabled == SD_EVENT_OFF)
2086                                 continue;
2087
2088                         r = source_set_pending(z, true);
2089                         if (r < 0)
2090                                 return r;
2091                 }
2092         }
2093
2094         if (s->enabled == SD_EVENT_ONESHOT) {
2095                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2096                 if (r < 0)
2097                         return r;
2098         }
2099
2100         s->dispatching = true;
2101
2102         switch (s->type) {
2103
2104         case SOURCE_IO:
2105                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2106                 break;
2107
2108         case SOURCE_TIME_REALTIME:
2109         case SOURCE_TIME_BOOTTIME:
2110         case SOURCE_TIME_MONOTONIC:
2111         case SOURCE_TIME_REALTIME_ALARM:
2112         case SOURCE_TIME_BOOTTIME_ALARM:
2113                 r = s->time.callback(s, s->time.next, s->userdata);
2114                 break;
2115
2116         case SOURCE_SIGNAL:
2117                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2118                 break;
2119
2120         case SOURCE_CHILD: {
2121                 bool zombie;
2122
2123                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2124                          s->child.siginfo.si_code == CLD_KILLED ||
2125                          s->child.siginfo.si_code == CLD_DUMPED;
2126
2127                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2128
2129                 /* Now, reap the PID for good. */
2130                 if (zombie)
2131                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2132
2133                 break;
2134         }
2135
2136         case SOURCE_DEFER:
2137                 r = s->defer.callback(s, s->userdata);
2138                 break;
2139
2140         case SOURCE_POST:
2141                 r = s->post.callback(s, s->userdata);
2142                 break;
2143
2144         case SOURCE_EXIT:
2145                 r = s->exit.callback(s, s->userdata);
2146                 break;
2147
2148         case SOURCE_WATCHDOG:
2149         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2150         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2151                 assert_not_reached("Wut? I shouldn't exist.");
2152         }
2153
2154         s->dispatching = false;
2155
2156         if (r < 0) {
2157                 if (s->description)
2158                         log_debug("Event source '%s' returned error, disabling: %s", s->description, strerror(-r));
2159                 else
2160                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2161         }
2162
2163         if (s->n_ref == 0)
2164                 source_free(s);
2165         else if (r < 0)
2166                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2167
2168         return 1;
2169 }
2170
2171 static int event_prepare(sd_event *e) {
2172         int r;
2173
2174         assert(e);
2175
2176         for (;;) {
2177                 sd_event_source *s;
2178
2179                 s = prioq_peek(e->prepare);
2180                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2181                         break;
2182
2183                 s->prepare_iteration = e->iteration;
2184                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2185                 if (r < 0)
2186                         return r;
2187
2188                 assert(s->prepare);
2189
2190                 s->dispatching = true;
2191                 r = s->prepare(s, s->userdata);
2192                 s->dispatching = false;
2193
2194                 if (r < 0) {
2195                         if (s->description)
2196                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->description, strerror(-r));
2197                         else
2198                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2199                 }
2200
2201                 if (s->n_ref == 0)
2202                         source_free(s);
2203                 else if (r < 0)
2204                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2205         }
2206
2207         return 0;
2208 }
2209
2210 static int dispatch_exit(sd_event *e) {
2211         sd_event_source *p;
2212         int r;
2213
2214         assert(e);
2215
2216         p = prioq_peek(e->exit);
2217         if (!p || p->enabled == SD_EVENT_OFF) {
2218                 e->state = SD_EVENT_FINISHED;
2219                 return 0;
2220         }
2221
2222         sd_event_ref(e);
2223         e->iteration++;
2224         e->state = SD_EVENT_EXITING;
2225
2226         r = source_dispatch(p);
2227
2228         e->state = SD_EVENT_PASSIVE;
2229         sd_event_unref(e);
2230
2231         return r;
2232 }
2233
2234 static sd_event_source* event_next_pending(sd_event *e) {
2235         sd_event_source *p;
2236
2237         assert(e);
2238
2239         p = prioq_peek(e->pending);
2240         if (!p)
2241                 return NULL;
2242
2243         if (p->enabled == SD_EVENT_OFF)
2244                 return NULL;
2245
2246         return p;
2247 }
2248
2249 static int arm_watchdog(sd_event *e) {
2250         struct itimerspec its = {};
2251         usec_t t;
2252         int r;
2253
2254         assert(e);
2255         assert(e->watchdog_fd >= 0);
2256
2257         t = sleep_between(e,
2258                           e->watchdog_last + (e->watchdog_period / 2),
2259                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2260
2261         timespec_store(&its.it_value, t);
2262
2263         /* Make sure we never set the watchdog to 0, which tells the
2264          * kernel to disable it. */
2265         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2266                 its.it_value.tv_nsec = 1;
2267
2268         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2269         if (r < 0)
2270                 return -errno;
2271
2272         return 0;
2273 }
2274
2275 static int process_watchdog(sd_event *e) {
2276         assert(e);
2277
2278         if (!e->watchdog)
2279                 return 0;
2280
2281         /* Don't notify watchdog too often */
2282         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2283                 return 0;
2284
2285         sd_notify(false, "WATCHDOG=1");
2286         e->watchdog_last = e->timestamp.monotonic;
2287
2288         return arm_watchdog(e);
2289 }
2290
2291 _public_ int sd_event_prepare(sd_event *e) {
2292         int r;
2293
2294         assert_return(e, -EINVAL);
2295         assert_return(!event_pid_changed(e), -ECHILD);
2296         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2297         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2298
2299         if (e->exit_requested)
2300                 goto pending;
2301
2302         e->iteration++;
2303
2304         r = event_prepare(e);
2305         if (r < 0)
2306                 return r;
2307
2308         r = event_arm_timer(e, &e->realtime);
2309         if (r < 0)
2310                 return r;
2311
2312         r = event_arm_timer(e, &e->boottime);
2313         if (r < 0)
2314                 return r;
2315
2316         r = event_arm_timer(e, &e->monotonic);
2317         if (r < 0)
2318                 return r;
2319
2320         r = event_arm_timer(e, &e->realtime_alarm);
2321         if (r < 0)
2322                 return r;
2323
2324         r = event_arm_timer(e, &e->boottime_alarm);
2325         if (r < 0)
2326                 return r;
2327
2328         if (event_next_pending(e) || e->need_process_child)
2329                 goto pending;
2330
2331         e->state = SD_EVENT_PREPARED;
2332
2333         return 0;
2334
2335 pending:
2336         e->state = SD_EVENT_PREPARED;
2337         r = sd_event_wait(e, 0);
2338         if (r == 0)
2339                 e->state = SD_EVENT_PREPARED;
2340
2341         return r;
2342 }
2343
2344 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2345         struct epoll_event *ev_queue;
2346         unsigned ev_queue_max;
2347         int r, m, i;
2348
2349         assert_return(e, -EINVAL);
2350         assert_return(!event_pid_changed(e), -ECHILD);
2351         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2352         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2353
2354         if (e->exit_requested) {
2355                 e->state = SD_EVENT_PENDING;
2356                 return 1;
2357         }
2358
2359         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2360         ev_queue = newa(struct epoll_event, ev_queue_max);
2361
2362         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2363                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2364         if (m < 0) {
2365                 if (errno == EINTR) {
2366                         e->state = SD_EVENT_PENDING;
2367                         return 1;
2368                 }
2369
2370                 r = -errno;
2371
2372                 goto finish;
2373         }
2374
2375         dual_timestamp_get(&e->timestamp);
2376         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2377
2378         for (i = 0; i < m; i++) {
2379
2380                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2381                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2382                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2383                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2384                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2385                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2386                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2387                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2388                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2389                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2390                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2391                         r = process_signal(e, ev_queue[i].events);
2392                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2393                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2394                 else
2395                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2396
2397                 if (r < 0)
2398                         goto finish;
2399         }
2400
2401         r = process_watchdog(e);
2402         if (r < 0)
2403                 goto finish;
2404
2405         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2406         if (r < 0)
2407                 goto finish;
2408
2409         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2410         if (r < 0)
2411                 goto finish;
2412
2413         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2414         if (r < 0)
2415                 goto finish;
2416
2417         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2418         if (r < 0)
2419                 goto finish;
2420
2421         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2422         if (r < 0)
2423                 goto finish;
2424
2425         if (e->need_process_child) {
2426                 r = process_child(e);
2427                 if (r < 0)
2428                         goto finish;
2429         }
2430
2431         if (event_next_pending(e)) {
2432                 e->state = SD_EVENT_PENDING;
2433
2434                 return 1;
2435         }
2436
2437         r = 0;
2438
2439 finish:
2440         e->state = SD_EVENT_PASSIVE;
2441
2442         return r;
2443 }
2444
2445 _public_ int sd_event_dispatch(sd_event *e) {
2446         sd_event_source *p;
2447         int r;
2448
2449         assert_return(e, -EINVAL);
2450         assert_return(!event_pid_changed(e), -ECHILD);
2451         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2452         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2453
2454         if (e->exit_requested)
2455                 return dispatch_exit(e);
2456
2457         p = event_next_pending(e);
2458         if (p) {
2459                 sd_event_ref(e);
2460
2461                 e->state = SD_EVENT_RUNNING;
2462                 r = source_dispatch(p);
2463                 e->state = SD_EVENT_PASSIVE;
2464
2465                 sd_event_unref(e);
2466
2467                 return r;
2468         }
2469
2470         e->state = SD_EVENT_PASSIVE;
2471
2472         return 1;
2473 }
2474
2475 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2476         int r;
2477
2478         assert_return(e, -EINVAL);
2479         assert_return(!event_pid_changed(e), -ECHILD);
2480         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2481         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2482
2483         r = sd_event_prepare(e);
2484         if (r > 0)
2485                 return sd_event_dispatch(e);
2486         else if (r < 0)
2487                 return r;
2488
2489         r = sd_event_wait(e, timeout);
2490         if (r > 0)
2491                 return sd_event_dispatch(e);
2492         else
2493                 return r;
2494 }
2495
2496 _public_ int sd_event_loop(sd_event *e) {
2497         int r;
2498
2499         assert_return(e, -EINVAL);
2500         assert_return(!event_pid_changed(e), -ECHILD);
2501         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2502
2503         sd_event_ref(e);
2504
2505         while (e->state != SD_EVENT_FINISHED) {
2506                 r = sd_event_run(e, (uint64_t) -1);
2507                 if (r < 0)
2508                         goto finish;
2509         }
2510
2511         r = e->exit_code;
2512
2513 finish:
2514         sd_event_unref(e);
2515         return r;
2516 }
2517
2518 _public_ int sd_event_get_fd(sd_event *e) {
2519
2520         assert_return(e, -EINVAL);
2521         assert_return(!event_pid_changed(e), -ECHILD);
2522
2523         return e->epoll_fd;
2524 }
2525
2526 _public_ int sd_event_get_state(sd_event *e) {
2527         assert_return(e, -EINVAL);
2528         assert_return(!event_pid_changed(e), -ECHILD);
2529
2530         return e->state;
2531 }
2532
2533 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2534         assert_return(e, -EINVAL);
2535         assert_return(code, -EINVAL);
2536         assert_return(!event_pid_changed(e), -ECHILD);
2537
2538         if (!e->exit_requested)
2539                 return -ENODATA;
2540
2541         *code = e->exit_code;
2542         return 0;
2543 }
2544
2545 _public_ int sd_event_exit(sd_event *e, int code) {
2546         assert_return(e, -EINVAL);
2547         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2548         assert_return(!event_pid_changed(e), -ECHILD);
2549
2550         e->exit_requested = true;
2551         e->exit_code = code;
2552
2553         return 0;
2554 }
2555
2556 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2557         assert_return(e, -EINVAL);
2558         assert_return(usec, -EINVAL);
2559         assert_return(!event_pid_changed(e), -ECHILD);
2560
2561         /* If we haven't run yet, just get the actual time */
2562         if (!dual_timestamp_is_set(&e->timestamp))
2563                 return -ENODATA;
2564
2565         switch (clock) {
2566
2567         case CLOCK_REALTIME:
2568         case CLOCK_REALTIME_ALARM:
2569                 *usec = e->timestamp.realtime;
2570                 break;
2571
2572         case CLOCK_MONOTONIC:
2573                 *usec = e->timestamp.monotonic;
2574                 break;
2575
2576         case CLOCK_BOOTTIME:
2577         case CLOCK_BOOTTIME_ALARM:
2578                 *usec = e->timestamp_boottime;
2579                 break;
2580         }
2581
2582         return 0;
2583 }
2584
2585 _public_ int sd_event_default(sd_event **ret) {
2586
2587         static thread_local sd_event *default_event = NULL;
2588         sd_event *e = NULL;
2589         int r;
2590
2591         if (!ret)
2592                 return !!default_event;
2593
2594         if (default_event) {
2595                 *ret = sd_event_ref(default_event);
2596                 return 0;
2597         }
2598
2599         r = sd_event_new(&e);
2600         if (r < 0)
2601                 return r;
2602
2603         e->default_event_ptr = &default_event;
2604         e->tid = gettid();
2605         default_event = e;
2606
2607         *ret = e;
2608         return 1;
2609 }
2610
2611 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2612         assert_return(e, -EINVAL);
2613         assert_return(tid, -EINVAL);
2614         assert_return(!event_pid_changed(e), -ECHILD);
2615
2616         if (e->tid != 0) {
2617                 *tid = e->tid;
2618                 return 0;
2619         }
2620
2621         return -ENXIO;
2622 }
2623
2624 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2625         int r;
2626
2627         assert_return(e, -EINVAL);
2628         assert_return(!event_pid_changed(e), -ECHILD);
2629
2630         if (e->watchdog == !!b)
2631                 return e->watchdog;
2632
2633         if (b) {
2634                 struct epoll_event ev = {};
2635
2636                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2637                 if (r <= 0)
2638                         return r;
2639
2640                 /* Issue first ping immediately */
2641                 sd_notify(false, "WATCHDOG=1");
2642                 e->watchdog_last = now(CLOCK_MONOTONIC);
2643
2644                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2645                 if (e->watchdog_fd < 0)
2646                         return -errno;
2647
2648                 r = arm_watchdog(e);
2649                 if (r < 0)
2650                         goto fail;
2651
2652                 ev.events = EPOLLIN;
2653                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2654
2655                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2656                 if (r < 0) {
2657                         r = -errno;
2658                         goto fail;
2659                 }
2660
2661         } else {
2662                 if (e->watchdog_fd >= 0) {
2663                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2664                         e->watchdog_fd = safe_close(e->watchdog_fd);
2665                 }
2666         }
2667
2668         e->watchdog = !!b;
2669         return e->watchdog;
2670
2671 fail:
2672         e->watchdog_fd = safe_close(e->watchdog_fd);
2673         return r;
2674 }
2675
2676 _public_ int sd_event_get_watchdog(sd_event *e) {
2677         assert_return(e, -EINVAL);
2678         assert_return(!event_pid_changed(e), -ECHILD);
2679
2680         return e->watchdog;
2681 }