chiark / gitweb /
80a2ae97e8e7ece88cfe87b487d1b5a470e0cf93
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36 #include "list.h"
37
38 #include "sd-event.h"
39
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42
43 typedef enum EventSourceType {
44         SOURCE_IO,
45         SOURCE_TIME_REALTIME,
46         SOURCE_TIME_BOOTTIME,
47         SOURCE_TIME_MONOTONIC,
48         SOURCE_TIME_REALTIME_ALARM,
49         SOURCE_TIME_BOOTTIME_ALARM,
50         SOURCE_SIGNAL,
51         SOURCE_CHILD,
52         SOURCE_DEFER,
53         SOURCE_POST,
54         SOURCE_EXIT,
55         SOURCE_WATCHDOG,
56         _SOURCE_EVENT_SOURCE_TYPE_MAX,
57         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
58 } EventSourceType;
59
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
61
62 struct sd_event_source {
63         unsigned n_ref;
64
65         sd_event *event;
66         void *userdata;
67         sd_event_handler_t prepare;
68
69         char *name;
70
71         EventSourceType type:5;
72         int enabled:3;
73         bool pending:1;
74         bool dispatching:1;
75         bool floating:1;
76
77         int64_t priority;
78         unsigned pending_index;
79         unsigned prepare_index;
80         unsigned pending_iteration;
81         unsigned prepare_iteration;
82
83         LIST_FIELDS(sd_event_source, sources);
84
85         union {
86                 struct {
87                         sd_event_io_handler_t callback;
88                         int fd;
89                         uint32_t events;
90                         uint32_t revents;
91                         bool registered:1;
92                 } io;
93                 struct {
94                         sd_event_time_handler_t callback;
95                         usec_t next, accuracy;
96                         unsigned earliest_index;
97                         unsigned latest_index;
98                 } time;
99                 struct {
100                         sd_event_signal_handler_t callback;
101                         struct signalfd_siginfo siginfo;
102                         int sig;
103                 } signal;
104                 struct {
105                         sd_event_child_handler_t callback;
106                         siginfo_t siginfo;
107                         pid_t pid;
108                         int options;
109                 } child;
110                 struct {
111                         sd_event_handler_t callback;
112                 } defer;
113                 struct {
114                         sd_event_handler_t callback;
115                 } post;
116                 struct {
117                         sd_event_handler_t callback;
118                         unsigned prioq_index;
119                 } exit;
120         };
121 };
122
123 struct clock_data {
124         int fd;
125
126         /* For all clocks we maintain two priority queues each, one
127          * ordered for the earliest times the events may be
128          * dispatched, and one ordered by the latest times they must
129          * have been dispatched. The range between the top entries in
130          * the two prioqs is the time window we can freely schedule
131          * wakeups in */
132
133         Prioq *earliest;
134         Prioq *latest;
135         usec_t next;
136
137         bool needs_rearm:1;
138 };
139
140 struct sd_event {
141         unsigned n_ref;
142
143         int epoll_fd;
144         int signal_fd;
145         int watchdog_fd;
146
147         Prioq *pending;
148         Prioq *prepare;
149
150         /* timerfd_create() only supports these five clocks so far. We
151          * can add support for more clocks when the kernel learns to
152          * deal with them, too. */
153         struct clock_data realtime;
154         struct clock_data boottime;
155         struct clock_data monotonic;
156         struct clock_data realtime_alarm;
157         struct clock_data boottime_alarm;
158
159         usec_t perturb;
160
161         sigset_t sigset;
162         sd_event_source **signal_sources;
163
164         Hashmap *child_sources;
165         unsigned n_enabled_child_sources;
166
167         Set *post_sources;
168
169         Prioq *exit;
170
171         pid_t original_pid;
172
173         unsigned iteration;
174         dual_timestamp timestamp;
175         usec_t timestamp_boottime;
176         int state;
177
178         bool exit_requested:1;
179         bool need_process_child:1;
180         bool watchdog:1;
181
182         int exit_code;
183
184         pid_t tid;
185         sd_event **default_event_ptr;
186
187         usec_t watchdog_last, watchdog_period;
188
189         unsigned n_sources;
190
191         LIST_HEAD(sd_event_source, sources);
192 };
193
194 static void source_disconnect(sd_event_source *s);
195
196 static int pending_prioq_compare(const void *a, const void *b) {
197         const sd_event_source *x = a, *y = b;
198
199         assert(x->pending);
200         assert(y->pending);
201
202         /* Enabled ones first */
203         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204                 return -1;
205         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206                 return 1;
207
208         /* Lower priority values first */
209         if (x->priority < y->priority)
210                 return -1;
211         if (x->priority > y->priority)
212                 return 1;
213
214         /* Older entries first */
215         if (x->pending_iteration < y->pending_iteration)
216                 return -1;
217         if (x->pending_iteration > y->pending_iteration)
218                 return 1;
219
220         /* Stability for the rest */
221         if (x < y)
222                 return -1;
223         if (x > y)
224                 return 1;
225
226         return 0;
227 }
228
229 static int prepare_prioq_compare(const void *a, const void *b) {
230         const sd_event_source *x = a, *y = b;
231
232         assert(x->prepare);
233         assert(y->prepare);
234
235         /* Move most recently prepared ones last, so that we can stop
236          * preparing as soon as we hit one that has already been
237          * prepared in the current iteration */
238         if (x->prepare_iteration < y->prepare_iteration)
239                 return -1;
240         if (x->prepare_iteration > y->prepare_iteration)
241                 return 1;
242
243         /* Enabled ones first */
244         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
245                 return -1;
246         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
247                 return 1;
248
249         /* Lower priority values first */
250         if (x->priority < y->priority)
251                 return -1;
252         if (x->priority > y->priority)
253                 return 1;
254
255         /* Stability for the rest */
256         if (x < y)
257                 return -1;
258         if (x > y)
259                 return 1;
260
261         return 0;
262 }
263
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265         const sd_event_source *x = a, *y = b;
266
267         assert(EVENT_SOURCE_IS_TIME(x->type));
268         assert(x->type == y->type);
269
270         /* Enabled ones first */
271         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
272                 return -1;
273         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
274                 return 1;
275
276         /* Move the pending ones to the end */
277         if (!x->pending && y->pending)
278                 return -1;
279         if (x->pending && !y->pending)
280                 return 1;
281
282         /* Order by time */
283         if (x->time.next < y->time.next)
284                 return -1;
285         if (x->time.next > y->time.next)
286                 return 1;
287
288         /* Stability for the rest */
289         if (x < y)
290                 return -1;
291         if (x > y)
292                 return 1;
293
294         return 0;
295 }
296
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298         const sd_event_source *x = a, *y = b;
299
300         assert(EVENT_SOURCE_IS_TIME(x->type));
301         assert(x->type == y->type);
302
303         /* Enabled ones first */
304         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305                 return -1;
306         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307                 return 1;
308
309         /* Move the pending ones to the end */
310         if (!x->pending && y->pending)
311                 return -1;
312         if (x->pending && !y->pending)
313                 return 1;
314
315         /* Order by time */
316         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
317                 return -1;
318         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
319                 return 1;
320
321         /* Stability for the rest */
322         if (x < y)
323                 return -1;
324         if (x > y)
325                 return 1;
326
327         return 0;
328 }
329
330 static int exit_prioq_compare(const void *a, const void *b) {
331         const sd_event_source *x = a, *y = b;
332
333         assert(x->type == SOURCE_EXIT);
334         assert(y->type == SOURCE_EXIT);
335
336         /* Enabled ones first */
337         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
338                 return -1;
339         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
340                 return 1;
341
342         /* Lower priority values first */
343         if (x->priority < y->priority)
344                 return -1;
345         if (x->priority > y->priority)
346                 return 1;
347
348         /* Stability for the rest */
349         if (x < y)
350                 return -1;
351         if (x > y)
352                 return 1;
353
354         return 0;
355 }
356
357 static void free_clock_data(struct clock_data *d) {
358         assert(d);
359
360         safe_close(d->fd);
361         prioq_free(d->earliest);
362         prioq_free(d->latest);
363 }
364
365 static void event_free(sd_event *e) {
366         sd_event_source *s;
367
368         assert(e);
369
370         while ((s = e->sources)) {
371                 assert(s->floating);
372                 source_disconnect(s);
373                 sd_event_source_unref(s);
374         }
375
376         assert(e->n_sources == 0);
377
378         if (e->default_event_ptr)
379                 *(e->default_event_ptr) = NULL;
380
381         safe_close(e->epoll_fd);
382         safe_close(e->signal_fd);
383         safe_close(e->watchdog_fd);
384
385         free_clock_data(&e->realtime);
386         free_clock_data(&e->boottime);
387         free_clock_data(&e->monotonic);
388         free_clock_data(&e->realtime_alarm);
389         free_clock_data(&e->boottime_alarm);
390
391         prioq_free(e->pending);
392         prioq_free(e->prepare);
393         prioq_free(e->exit);
394
395         free(e->signal_sources);
396
397         hashmap_free(e->child_sources);
398         set_free(e->post_sources);
399         free(e);
400 }
401
402 _public_ int sd_event_new(sd_event** ret) {
403         sd_event *e;
404         int r;
405
406         assert_return(ret, -EINVAL);
407
408         e = new0(sd_event, 1);
409         if (!e)
410                 return -ENOMEM;
411
412         e->n_ref = 1;
413         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415         e->original_pid = getpid();
416         e->perturb = USEC_INFINITY;
417
418         assert_se(sigemptyset(&e->sigset) == 0);
419
420         e->pending = prioq_new(pending_prioq_compare);
421         if (!e->pending) {
422                 r = -ENOMEM;
423                 goto fail;
424         }
425
426         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427         if (e->epoll_fd < 0) {
428                 r = -errno;
429                 goto fail;
430         }
431
432         *ret = e;
433         return 0;
434
435 fail:
436         event_free(e);
437         return r;
438 }
439
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441         assert_return(e, NULL);
442
443         assert(e->n_ref >= 1);
444         e->n_ref++;
445
446         return e;
447 }
448
449 _public_ sd_event* sd_event_unref(sd_event *e) {
450
451         if (!e)
452                 return NULL;
453
454         assert(e->n_ref >= 1);
455         e->n_ref--;
456
457         if (e->n_ref <= 0)
458                 event_free(e);
459
460         return NULL;
461 }
462
463 static bool event_pid_changed(sd_event *e) {
464         assert(e);
465
466         /* We don't support people creating am event loop and keeping
467          * it around over a fork(). Let's complain. */
468
469         return e->original_pid != getpid();
470 }
471
472 static int source_io_unregister(sd_event_source *s) {
473         int r;
474
475         assert(s);
476         assert(s->type == SOURCE_IO);
477
478         if (!s->io.registered)
479                 return 0;
480
481         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
482         if (r < 0)
483                 return -errno;
484
485         s->io.registered = false;
486         return 0;
487 }
488
489 static int source_io_register(
490                 sd_event_source *s,
491                 int enabled,
492                 uint32_t events) {
493
494         struct epoll_event ev = {};
495         int r;
496
497         assert(s);
498         assert(s->type == SOURCE_IO);
499         assert(enabled != SD_EVENT_OFF);
500
501         ev.events = events;
502         ev.data.ptr = s;
503
504         if (enabled == SD_EVENT_ONESHOT)
505                 ev.events |= EPOLLONESHOT;
506
507         if (s->io.registered)
508                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
509         else
510                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
511
512         if (r < 0)
513                 return -errno;
514
515         s->io.registered = true;
516
517         return 0;
518 }
519
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
521
522         switch (t) {
523
524         case SOURCE_TIME_REALTIME:
525                 return CLOCK_REALTIME;
526
527         case SOURCE_TIME_BOOTTIME:
528                 return CLOCK_BOOTTIME;
529
530         case SOURCE_TIME_MONOTONIC:
531                 return CLOCK_MONOTONIC;
532
533         case SOURCE_TIME_REALTIME_ALARM:
534                 return CLOCK_REALTIME_ALARM;
535
536         case SOURCE_TIME_BOOTTIME_ALARM:
537                 return CLOCK_BOOTTIME_ALARM;
538
539         default:
540                 return (clockid_t) -1;
541         }
542 }
543
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
545
546         switch (clock) {
547
548         case CLOCK_REALTIME:
549                 return SOURCE_TIME_REALTIME;
550
551         case CLOCK_BOOTTIME:
552                 return SOURCE_TIME_BOOTTIME;
553
554         case CLOCK_MONOTONIC:
555                 return SOURCE_TIME_MONOTONIC;
556
557         case CLOCK_REALTIME_ALARM:
558                 return SOURCE_TIME_REALTIME_ALARM;
559
560         case CLOCK_BOOTTIME_ALARM:
561                 return SOURCE_TIME_BOOTTIME_ALARM;
562
563         default:
564                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
565         }
566 }
567
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
569         assert(e);
570
571         switch (t) {
572
573         case SOURCE_TIME_REALTIME:
574                 return &e->realtime;
575
576         case SOURCE_TIME_BOOTTIME:
577                 return &e->boottime;
578
579         case SOURCE_TIME_MONOTONIC:
580                 return &e->monotonic;
581
582         case SOURCE_TIME_REALTIME_ALARM:
583                 return &e->realtime_alarm;
584
585         case SOURCE_TIME_BOOTTIME_ALARM:
586                 return &e->boottime_alarm;
587
588         default:
589                 return NULL;
590         }
591 }
592
593 static bool need_signal(sd_event *e, int signal) {
594         return (e->signal_sources && e->signal_sources[signal] &&
595                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
596                 ||
597                (signal == SIGCHLD &&
598                 e->n_enabled_child_sources > 0);
599 }
600
601 static int event_update_signal_fd(sd_event *e) {
602         struct epoll_event ev = {};
603         bool add_to_epoll;
604         int r;
605
606         assert(e);
607
608         add_to_epoll = e->signal_fd < 0;
609
610         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
611         if (r < 0)
612                 return -errno;
613
614         e->signal_fd = r;
615
616         if (!add_to_epoll)
617                 return 0;
618
619         ev.events = EPOLLIN;
620         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
621
622         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
623         if (r < 0) {
624                 e->signal_fd = safe_close(e->signal_fd);
625                 return -errno;
626         }
627
628         return 0;
629 }
630
631 static void source_disconnect(sd_event_source *s) {
632         sd_event *event;
633
634         assert(s);
635
636         if (!s->event)
637                 return;
638
639         assert(s->event->n_sources > 0);
640
641         switch (s->type) {
642
643         case SOURCE_IO:
644                 if (s->io.fd >= 0)
645                         source_io_unregister(s);
646
647                 break;
648
649         case SOURCE_TIME_REALTIME:
650         case SOURCE_TIME_BOOTTIME:
651         case SOURCE_TIME_MONOTONIC:
652         case SOURCE_TIME_REALTIME_ALARM:
653         case SOURCE_TIME_BOOTTIME_ALARM: {
654                 struct clock_data *d;
655
656                 d = event_get_clock_data(s->event, s->type);
657                 assert(d);
658
659                 prioq_remove(d->earliest, s, &s->time.earliest_index);
660                 prioq_remove(d->latest, s, &s->time.latest_index);
661                 d->needs_rearm = true;
662                 break;
663         }
664
665         case SOURCE_SIGNAL:
666                 if (s->signal.sig > 0) {
667                         if (s->event->signal_sources)
668                                 s->event->signal_sources[s->signal.sig] = NULL;
669
670                         /* If the signal was on and now it is off... */
671                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
672                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
673
674                                 (void) event_update_signal_fd(s->event);
675                                 /* If disabling failed, we might get a spurious event,
676                                  * but otherwise nothing bad should happen. */
677                         }
678                 }
679
680                 break;
681
682         case SOURCE_CHILD:
683                 if (s->child.pid > 0) {
684                         if (s->enabled != SD_EVENT_OFF) {
685                                 assert(s->event->n_enabled_child_sources > 0);
686                                 s->event->n_enabled_child_sources--;
687
688                                 /* We know the signal was on, if it is off now... */
689                                 if (!need_signal(s->event, SIGCHLD)) {
690                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
691
692                                         (void) event_update_signal_fd(s->event);
693                                         /* If disabling failed, we might get a spurious event,
694                                          * but otherwise nothing bad should happen. */
695                                 }
696                         }
697
698                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
699                 }
700
701                 break;
702
703         case SOURCE_DEFER:
704                 /* nothing */
705                 break;
706
707         case SOURCE_POST:
708                 set_remove(s->event->post_sources, s);
709                 break;
710
711         case SOURCE_EXIT:
712                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713                 break;
714
715         default:
716                 assert_not_reached("Wut? I shouldn't exist.");
717         }
718
719         if (s->pending)
720                 prioq_remove(s->event->pending, s, &s->pending_index);
721
722         if (s->prepare)
723                 prioq_remove(s->event->prepare, s, &s->prepare_index);
724
725         event = s->event;
726
727         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
728         s->event = NULL;
729         LIST_REMOVE(sources, event->sources, s);
730         event->n_sources--;
731
732         if (!s->floating)
733                 sd_event_unref(event);
734 }
735
736 static void source_free(sd_event_source *s) {
737         assert(s);
738
739         source_disconnect(s);
740         free(s->name);
741         free(s);
742 }
743
744 static int source_set_pending(sd_event_source *s, bool b) {
745         int r;
746
747         assert(s);
748         assert(s->type != SOURCE_EXIT);
749
750         if (s->pending == b)
751                 return 0;
752
753         s->pending = b;
754
755         if (b) {
756                 s->pending_iteration = s->event->iteration;
757
758                 r = prioq_put(s->event->pending, s, &s->pending_index);
759                 if (r < 0) {
760                         s->pending = false;
761                         return r;
762                 }
763         } else
764                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
765
766         if (EVENT_SOURCE_IS_TIME(s->type)) {
767                 struct clock_data *d;
768
769                 d = event_get_clock_data(s->event, s->type);
770                 assert(d);
771
772                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
773                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
774                 d->needs_rearm = true;
775         }
776
777         return 0;
778 }
779
780 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
781         sd_event_source *s;
782
783         assert(e);
784
785         s = new0(sd_event_source, 1);
786         if (!s)
787                 return NULL;
788
789         s->n_ref = 1;
790         s->event = e;
791         s->floating = floating;
792         s->type = type;
793         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
794
795         if (!floating)
796                 sd_event_ref(e);
797
798         LIST_PREPEND(sources, e->sources, s);
799         e->n_sources ++;
800
801         return s;
802 }
803
804 _public_ int sd_event_add_io(
805                 sd_event *e,
806                 sd_event_source **ret,
807                 int fd,
808                 uint32_t events,
809                 sd_event_io_handler_t callback,
810                 void *userdata) {
811
812         sd_event_source *s;
813         int r;
814
815         assert_return(e, -EINVAL);
816         assert_return(fd >= 0, -EINVAL);
817         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
818         assert_return(callback, -EINVAL);
819         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
820         assert_return(!event_pid_changed(e), -ECHILD);
821
822         s = source_new(e, !ret, SOURCE_IO);
823         if (!s)
824                 return -ENOMEM;
825
826         s->io.fd = fd;
827         s->io.events = events;
828         s->io.callback = callback;
829         s->userdata = userdata;
830         s->enabled = SD_EVENT_ON;
831
832         r = source_io_register(s, s->enabled, events);
833         if (r < 0) {
834                 source_free(s);
835                 return r;
836         }
837
838         if (ret)
839                 *ret = s;
840
841         return 0;
842 }
843
844 static void initialize_perturb(sd_event *e) {
845         sd_id128_t bootid = {};
846
847         /* When we sleep for longer, we try to realign the wakeup to
848            the same time wihtin each minute/second/250ms, so that
849            events all across the system can be coalesced into a single
850            CPU wakeup. However, let's take some system-specific
851            randomness for this value, so that in a network of systems
852            with synced clocks timer events are distributed a
853            bit. Here, we calculate a perturbation usec offset from the
854            boot ID. */
855
856         if (_likely_(e->perturb != USEC_INFINITY))
857                 return;
858
859         if (sd_id128_get_boot(&bootid) >= 0)
860                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
861 }
862
863 static int event_setup_timer_fd(
864                 sd_event *e,
865                 struct clock_data *d,
866                 clockid_t clock) {
867
868         struct epoll_event ev = {};
869         int r, fd;
870
871         assert(e);
872         assert(d);
873
874         if (_likely_(d->fd >= 0))
875                 return 0;
876
877         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
878         if (fd < 0)
879                 return -errno;
880
881         ev.events = EPOLLIN;
882         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
883
884         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
885         if (r < 0) {
886                 safe_close(fd);
887                 return -errno;
888         }
889
890         d->fd = fd;
891         return 0;
892 }
893
894 _public_ int sd_event_add_time(
895                 sd_event *e,
896                 sd_event_source **ret,
897                 clockid_t clock,
898                 uint64_t usec,
899                 uint64_t accuracy,
900                 sd_event_time_handler_t callback,
901                 void *userdata) {
902
903         EventSourceType type;
904         sd_event_source *s;
905         struct clock_data *d;
906         int r;
907
908         assert_return(e, -EINVAL);
909         assert_return(usec != (uint64_t) -1, -EINVAL);
910         assert_return(accuracy != (uint64_t) -1, -EINVAL);
911         assert_return(callback, -EINVAL);
912         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
913         assert_return(!event_pid_changed(e), -ECHILD);
914
915         type = clock_to_event_source_type(clock);
916         assert_return(type >= 0, -ENOTSUP);
917
918         d = event_get_clock_data(e, type);
919         assert(d);
920
921         if (!d->earliest) {
922                 d->earliest = prioq_new(earliest_time_prioq_compare);
923                 if (!d->earliest)
924                         return -ENOMEM;
925         }
926
927         if (!d->latest) {
928                 d->latest = prioq_new(latest_time_prioq_compare);
929                 if (!d->latest)
930                         return -ENOMEM;
931         }
932
933         if (d->fd < 0) {
934                 r = event_setup_timer_fd(e, d, clock);
935                 if (r < 0)
936                         return r;
937         }
938
939         s = source_new(e, !ret, type);
940         if (!s)
941                 return -ENOMEM;
942
943         s->time.next = usec;
944         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
945         s->time.callback = callback;
946         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
947         s->userdata = userdata;
948         s->enabled = SD_EVENT_ONESHOT;
949
950         d->needs_rearm = true;
951
952         r = prioq_put(d->earliest, s, &s->time.earliest_index);
953         if (r < 0)
954                 goto fail;
955
956         r = prioq_put(d->latest, s, &s->time.latest_index);
957         if (r < 0)
958                 goto fail;
959
960         if (ret)
961                 *ret = s;
962
963         return 0;
964
965 fail:
966         source_free(s);
967         return r;
968 }
969
970 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
971         assert(s);
972
973         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
974 }
975
976 _public_ int sd_event_add_signal(
977                 sd_event *e,
978                 sd_event_source **ret,
979                 int sig,
980                 sd_event_signal_handler_t callback,
981                 void *userdata) {
982
983         sd_event_source *s;
984         sigset_t ss;
985         int r;
986         bool previous;
987
988         assert_return(e, -EINVAL);
989         assert_return(sig > 0, -EINVAL);
990         assert_return(sig < _NSIG, -EINVAL);
991         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992         assert_return(!event_pid_changed(e), -ECHILD);
993
994         if (!callback)
995                 callback = signal_exit_callback;
996
997         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
998         if (r < 0)
999                 return -errno;
1000
1001         if (!sigismember(&ss, sig))
1002                 return -EBUSY;
1003
1004         if (!e->signal_sources) {
1005                 e->signal_sources = new0(sd_event_source*, _NSIG);
1006                 if (!e->signal_sources)
1007                         return -ENOMEM;
1008         } else if (e->signal_sources[sig])
1009                 return -EBUSY;
1010
1011         previous = need_signal(e, sig);
1012
1013         s = source_new(e, !ret, SOURCE_SIGNAL);
1014         if (!s)
1015                 return -ENOMEM;
1016
1017         s->signal.sig = sig;
1018         s->signal.callback = callback;
1019         s->userdata = userdata;
1020         s->enabled = SD_EVENT_ON;
1021
1022         e->signal_sources[sig] = s;
1023
1024         if (!previous) {
1025                 assert_se(sigaddset(&e->sigset, sig) == 0);
1026
1027                 r = event_update_signal_fd(e);
1028                 if (r < 0) {
1029                         source_free(s);
1030                         return r;
1031                 }
1032         }
1033
1034         if (ret)
1035                 *ret = s;
1036
1037         return 0;
1038 }
1039
1040 _public_ int sd_event_add_child(
1041                 sd_event *e,
1042                 sd_event_source **ret,
1043                 pid_t pid,
1044                 int options,
1045                 sd_event_child_handler_t callback,
1046                 void *userdata) {
1047
1048         sd_event_source *s;
1049         int r;
1050         bool previous;
1051
1052         assert_return(e, -EINVAL);
1053         assert_return(pid > 1, -EINVAL);
1054         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1055         assert_return(options != 0, -EINVAL);
1056         assert_return(callback, -EINVAL);
1057         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1058         assert_return(!event_pid_changed(e), -ECHILD);
1059
1060         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1061         if (r < 0)
1062                 return r;
1063
1064         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1065                 return -EBUSY;
1066
1067         previous = need_signal(e, SIGCHLD);
1068
1069         s = source_new(e, !ret, SOURCE_CHILD);
1070         if (!s)
1071                 return -ENOMEM;
1072
1073         s->child.pid = pid;
1074         s->child.options = options;
1075         s->child.callback = callback;
1076         s->userdata = userdata;
1077         s->enabled = SD_EVENT_ONESHOT;
1078
1079         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1080         if (r < 0) {
1081                 source_free(s);
1082                 return r;
1083         }
1084
1085         e->n_enabled_child_sources ++;
1086
1087         if (!previous) {
1088                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1089
1090                 r = event_update_signal_fd(e);
1091                 if (r < 0) {
1092                         source_free(s);
1093                         return r;
1094                 }
1095         }
1096
1097         e->need_process_child = true;
1098
1099         if (ret)
1100                 *ret = s;
1101
1102         return 0;
1103 }
1104
1105 _public_ int sd_event_add_defer(
1106                 sd_event *e,
1107                 sd_event_source **ret,
1108                 sd_event_handler_t callback,
1109                 void *userdata) {
1110
1111         sd_event_source *s;
1112         int r;
1113
1114         assert_return(e, -EINVAL);
1115         assert_return(callback, -EINVAL);
1116         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1117         assert_return(!event_pid_changed(e), -ECHILD);
1118
1119         s = source_new(e, !ret, SOURCE_DEFER);
1120         if (!s)
1121                 return -ENOMEM;
1122
1123         s->defer.callback = callback;
1124         s->userdata = userdata;
1125         s->enabled = SD_EVENT_ONESHOT;
1126
1127         r = source_set_pending(s, true);
1128         if (r < 0) {
1129                 source_free(s);
1130                 return r;
1131         }
1132
1133         if (ret)
1134                 *ret = s;
1135
1136         return 0;
1137 }
1138
1139 _public_ int sd_event_add_post(
1140                 sd_event *e,
1141                 sd_event_source **ret,
1142                 sd_event_handler_t callback,
1143                 void *userdata) {
1144
1145         sd_event_source *s;
1146         int r;
1147
1148         assert_return(e, -EINVAL);
1149         assert_return(callback, -EINVAL);
1150         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1151         assert_return(!event_pid_changed(e), -ECHILD);
1152
1153         r = set_ensure_allocated(&e->post_sources, NULL);
1154         if (r < 0)
1155                 return r;
1156
1157         s = source_new(e, !ret, SOURCE_POST);
1158         if (!s)
1159                 return -ENOMEM;
1160
1161         s->post.callback = callback;
1162         s->userdata = userdata;
1163         s->enabled = SD_EVENT_ON;
1164
1165         r = set_put(e->post_sources, s);
1166         if (r < 0) {
1167                 source_free(s);
1168                 return r;
1169         }
1170
1171         if (ret)
1172                 *ret = s;
1173
1174         return 0;
1175 }
1176
1177 _public_ int sd_event_add_exit(
1178                 sd_event *e,
1179                 sd_event_source **ret,
1180                 sd_event_handler_t callback,
1181                 void *userdata) {
1182
1183         sd_event_source *s;
1184         int r;
1185
1186         assert_return(e, -EINVAL);
1187         assert_return(callback, -EINVAL);
1188         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1189         assert_return(!event_pid_changed(e), -ECHILD);
1190
1191         if (!e->exit) {
1192                 e->exit = prioq_new(exit_prioq_compare);
1193                 if (!e->exit)
1194                         return -ENOMEM;
1195         }
1196
1197         s = source_new(e, !ret, SOURCE_EXIT);
1198         if (!s)
1199                 return -ENOMEM;
1200
1201         s->exit.callback = callback;
1202         s->userdata = userdata;
1203         s->exit.prioq_index = PRIOQ_IDX_NULL;
1204         s->enabled = SD_EVENT_ONESHOT;
1205
1206         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1207         if (r < 0) {
1208                 source_free(s);
1209                 return r;
1210         }
1211
1212         if (ret)
1213                 *ret = s;
1214
1215         return 0;
1216 }
1217
1218 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1219         assert_return(s, NULL);
1220
1221         assert(s->n_ref >= 1);
1222         s->n_ref++;
1223
1224         return s;
1225 }
1226
1227 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1228
1229         if (!s)
1230                 return NULL;
1231
1232         assert(s->n_ref >= 1);
1233         s->n_ref--;
1234
1235         if (s->n_ref <= 0) {
1236                 /* Here's a special hack: when we are called from a
1237                  * dispatch handler we won't free the event source
1238                  * immediately, but we will detach the fd from the
1239                  * epoll. This way it is safe for the caller to unref
1240                  * the event source and immediately close the fd, but
1241                  * we still retain a valid event source object after
1242                  * the callback. */
1243
1244                 if (s->dispatching) {
1245                         if (s->type == SOURCE_IO)
1246                                 source_io_unregister(s);
1247
1248                         source_disconnect(s);
1249                 } else
1250                         source_free(s);
1251         }
1252
1253         return NULL;
1254 }
1255
1256 _public_ int sd_event_source_set_name(sd_event_source *s, const char *name) {
1257         assert_return(s, -EINVAL);
1258
1259         return free_and_strdup(&s->name, name);
1260 }
1261
1262 _public_ int sd_event_source_get_name(sd_event_source *s, const char **name) {
1263         assert_return(s, -EINVAL);
1264         assert_return(name, -EINVAL);
1265
1266         *name = s->name;
1267
1268         return 0;
1269 }
1270
1271 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1272         assert_return(s, NULL);
1273
1274         return s->event;
1275 }
1276
1277 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1278         assert_return(s, -EINVAL);
1279         assert_return(s->type != SOURCE_EXIT, -EDOM);
1280         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1281         assert_return(!event_pid_changed(s->event), -ECHILD);
1282
1283         return s->pending;
1284 }
1285
1286 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1287         assert_return(s, -EINVAL);
1288         assert_return(s->type == SOURCE_IO, -EDOM);
1289         assert_return(!event_pid_changed(s->event), -ECHILD);
1290
1291         return s->io.fd;
1292 }
1293
1294 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1295         int r;
1296
1297         assert_return(s, -EINVAL);
1298         assert_return(fd >= 0, -EINVAL);
1299         assert_return(s->type == SOURCE_IO, -EDOM);
1300         assert_return(!event_pid_changed(s->event), -ECHILD);
1301
1302         if (s->io.fd == fd)
1303                 return 0;
1304
1305         if (s->enabled == SD_EVENT_OFF) {
1306                 s->io.fd = fd;
1307                 s->io.registered = false;
1308         } else {
1309                 int saved_fd;
1310
1311                 saved_fd = s->io.fd;
1312                 assert(s->io.registered);
1313
1314                 s->io.fd = fd;
1315                 s->io.registered = false;
1316
1317                 r = source_io_register(s, s->enabled, s->io.events);
1318                 if (r < 0) {
1319                         s->io.fd = saved_fd;
1320                         s->io.registered = true;
1321                         return r;
1322                 }
1323
1324                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1325         }
1326
1327         return 0;
1328 }
1329
1330 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1331         assert_return(s, -EINVAL);
1332         assert_return(events, -EINVAL);
1333         assert_return(s->type == SOURCE_IO, -EDOM);
1334         assert_return(!event_pid_changed(s->event), -ECHILD);
1335
1336         *events = s->io.events;
1337         return 0;
1338 }
1339
1340 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1341         int r;
1342
1343         assert_return(s, -EINVAL);
1344         assert_return(s->type == SOURCE_IO, -EDOM);
1345         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1346         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1347         assert_return(!event_pid_changed(s->event), -ECHILD);
1348
1349         /* edge-triggered updates are never skipped, so we can reset edges */
1350         if (s->io.events == events && !(events & EPOLLET))
1351                 return 0;
1352
1353         if (s->enabled != SD_EVENT_OFF) {
1354                 r = source_io_register(s, s->enabled, events);
1355                 if (r < 0)
1356                         return r;
1357         }
1358
1359         s->io.events = events;
1360         source_set_pending(s, false);
1361
1362         return 0;
1363 }
1364
1365 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1366         assert_return(s, -EINVAL);
1367         assert_return(revents, -EINVAL);
1368         assert_return(s->type == SOURCE_IO, -EDOM);
1369         assert_return(s->pending, -ENODATA);
1370         assert_return(!event_pid_changed(s->event), -ECHILD);
1371
1372         *revents = s->io.revents;
1373         return 0;
1374 }
1375
1376 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1377         assert_return(s, -EINVAL);
1378         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1379         assert_return(!event_pid_changed(s->event), -ECHILD);
1380
1381         return s->signal.sig;
1382 }
1383
1384 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1385         assert_return(s, -EINVAL);
1386         assert_return(!event_pid_changed(s->event), -ECHILD);
1387
1388         return s->priority;
1389 }
1390
1391 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1392         assert_return(s, -EINVAL);
1393         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1394         assert_return(!event_pid_changed(s->event), -ECHILD);
1395
1396         if (s->priority == priority)
1397                 return 0;
1398
1399         s->priority = priority;
1400
1401         if (s->pending)
1402                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1403
1404         if (s->prepare)
1405                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1406
1407         if (s->type == SOURCE_EXIT)
1408                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1409
1410         return 0;
1411 }
1412
1413 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1414         assert_return(s, -EINVAL);
1415         assert_return(m, -EINVAL);
1416         assert_return(!event_pid_changed(s->event), -ECHILD);
1417
1418         *m = s->enabled;
1419         return 0;
1420 }
1421
1422 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1423         int r;
1424
1425         assert_return(s, -EINVAL);
1426         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1427         assert_return(!event_pid_changed(s->event), -ECHILD);
1428
1429         /* If we are dead anyway, we are fine with turning off
1430          * sources, but everything else needs to fail. */
1431         if (s->event->state == SD_EVENT_FINISHED)
1432                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1433
1434         if (s->enabled == m)
1435                 return 0;
1436
1437         if (m == SD_EVENT_OFF) {
1438
1439                 switch (s->type) {
1440
1441                 case SOURCE_IO:
1442                         r = source_io_unregister(s);
1443                         if (r < 0)
1444                                 return r;
1445
1446                         s->enabled = m;
1447                         break;
1448
1449                 case SOURCE_TIME_REALTIME:
1450                 case SOURCE_TIME_BOOTTIME:
1451                 case SOURCE_TIME_MONOTONIC:
1452                 case SOURCE_TIME_REALTIME_ALARM:
1453                 case SOURCE_TIME_BOOTTIME_ALARM: {
1454                         struct clock_data *d;
1455
1456                         s->enabled = m;
1457                         d = event_get_clock_data(s->event, s->type);
1458                         assert(d);
1459
1460                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1461                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1462                         d->needs_rearm = true;
1463                         break;
1464                 }
1465
1466                 case SOURCE_SIGNAL:
1467                         assert(need_signal(s->event, s->signal.sig));
1468
1469                         s->enabled = m;
1470
1471                         if (!need_signal(s->event, s->signal.sig)) {
1472                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1473
1474                                 (void) event_update_signal_fd(s->event);
1475                                 /* If disabling failed, we might get a spurious event,
1476                                  * but otherwise nothing bad should happen. */
1477                         }
1478
1479                         break;
1480
1481                 case SOURCE_CHILD:
1482                         assert(need_signal(s->event, SIGCHLD));
1483
1484                         s->enabled = m;
1485
1486                         assert(s->event->n_enabled_child_sources > 0);
1487                         s->event->n_enabled_child_sources--;
1488
1489                         if (!need_signal(s->event, SIGCHLD)) {
1490                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1491
1492                                 (void) event_update_signal_fd(s->event);
1493                         }
1494
1495                         break;
1496
1497                 case SOURCE_EXIT:
1498                         s->enabled = m;
1499                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1500                         break;
1501
1502                 case SOURCE_DEFER:
1503                 case SOURCE_POST:
1504                         s->enabled = m;
1505                         break;
1506
1507                 default:
1508                         assert_not_reached("Wut? I shouldn't exist.");
1509                 }
1510
1511         } else {
1512                 switch (s->type) {
1513
1514                 case SOURCE_IO:
1515                         r = source_io_register(s, m, s->io.events);
1516                         if (r < 0)
1517                                 return r;
1518
1519                         s->enabled = m;
1520                         break;
1521
1522                 case SOURCE_TIME_REALTIME:
1523                 case SOURCE_TIME_BOOTTIME:
1524                 case SOURCE_TIME_MONOTONIC:
1525                 case SOURCE_TIME_REALTIME_ALARM:
1526                 case SOURCE_TIME_BOOTTIME_ALARM: {
1527                         struct clock_data *d;
1528
1529                         s->enabled = m;
1530                         d = event_get_clock_data(s->event, s->type);
1531                         assert(d);
1532
1533                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1534                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1535                         d->needs_rearm = true;
1536                         break;
1537                 }
1538
1539                 case SOURCE_SIGNAL:
1540                         /* Check status before enabling. */
1541                         if (!need_signal(s->event, s->signal.sig)) {
1542                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1543
1544                                 r = event_update_signal_fd(s->event);
1545                                 if (r < 0) {
1546                                         s->enabled = SD_EVENT_OFF;
1547                                         return r;
1548                                 }
1549                         }
1550
1551                         s->enabled = m;
1552                         break;
1553
1554                 case SOURCE_CHILD:
1555                         /* Check status before enabling. */
1556                         if (s->enabled == SD_EVENT_OFF) {
1557                                 if (!need_signal(s->event, SIGCHLD)) {
1558                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1559
1560                                         r = event_update_signal_fd(s->event);
1561                                         if (r < 0) {
1562                                                 s->enabled = SD_EVENT_OFF;
1563                                                 return r;
1564                                         }
1565                                 }
1566
1567                                 s->event->n_enabled_child_sources++;
1568                         }
1569
1570                         s->enabled = m;
1571                         break;
1572
1573                 case SOURCE_EXIT:
1574                         s->enabled = m;
1575                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1576                         break;
1577
1578                 case SOURCE_DEFER:
1579                 case SOURCE_POST:
1580                         s->enabled = m;
1581                         break;
1582
1583                 default:
1584                         assert_not_reached("Wut? I shouldn't exist.");
1585                 }
1586         }
1587
1588         if (s->pending)
1589                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1590
1591         if (s->prepare)
1592                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1593
1594         return 0;
1595 }
1596
1597 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1598         assert_return(s, -EINVAL);
1599         assert_return(usec, -EINVAL);
1600         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1601         assert_return(!event_pid_changed(s->event), -ECHILD);
1602
1603         *usec = s->time.next;
1604         return 0;
1605 }
1606
1607 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1608         struct clock_data *d;
1609
1610         assert_return(s, -EINVAL);
1611         assert_return(usec != (uint64_t) -1, -EINVAL);
1612         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1613         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1614         assert_return(!event_pid_changed(s->event), -ECHILD);
1615
1616         s->time.next = usec;
1617
1618         source_set_pending(s, false);
1619
1620         d = event_get_clock_data(s->event, s->type);
1621         assert(d);
1622
1623         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1624         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1625         d->needs_rearm = true;
1626
1627         return 0;
1628 }
1629
1630 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1631         assert_return(s, -EINVAL);
1632         assert_return(usec, -EINVAL);
1633         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1634         assert_return(!event_pid_changed(s->event), -ECHILD);
1635
1636         *usec = s->time.accuracy;
1637         return 0;
1638 }
1639
1640 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1641         struct clock_data *d;
1642
1643         assert_return(s, -EINVAL);
1644         assert_return(usec != (uint64_t) -1, -EINVAL);
1645         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1646         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1647         assert_return(!event_pid_changed(s->event), -ECHILD);
1648
1649         if (usec == 0)
1650                 usec = DEFAULT_ACCURACY_USEC;
1651
1652         s->time.accuracy = usec;
1653
1654         source_set_pending(s, false);
1655
1656         d = event_get_clock_data(s->event, s->type);
1657         assert(d);
1658
1659         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1660         d->needs_rearm = true;
1661
1662         return 0;
1663 }
1664
1665 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1666         assert_return(s, -EINVAL);
1667         assert_return(clock, -EINVAL);
1668         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1669         assert_return(!event_pid_changed(s->event), -ECHILD);
1670
1671         *clock = event_source_type_to_clock(s->type);
1672         return 0;
1673 }
1674
1675 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1676         assert_return(s, -EINVAL);
1677         assert_return(pid, -EINVAL);
1678         assert_return(s->type == SOURCE_CHILD, -EDOM);
1679         assert_return(!event_pid_changed(s->event), -ECHILD);
1680
1681         *pid = s->child.pid;
1682         return 0;
1683 }
1684
1685 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1686         int r;
1687
1688         assert_return(s, -EINVAL);
1689         assert_return(s->type != SOURCE_EXIT, -EDOM);
1690         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1691         assert_return(!event_pid_changed(s->event), -ECHILD);
1692
1693         if (s->prepare == callback)
1694                 return 0;
1695
1696         if (callback && s->prepare) {
1697                 s->prepare = callback;
1698                 return 0;
1699         }
1700
1701         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1702         if (r < 0)
1703                 return r;
1704
1705         s->prepare = callback;
1706
1707         if (callback) {
1708                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1709                 if (r < 0)
1710                         return r;
1711         } else
1712                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1713
1714         return 0;
1715 }
1716
1717 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1718         assert_return(s, NULL);
1719
1720         return s->userdata;
1721 }
1722
1723 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1724         void *ret;
1725
1726         assert_return(s, NULL);
1727
1728         ret = s->userdata;
1729         s->userdata = userdata;
1730
1731         return ret;
1732 }
1733
1734 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1735         usec_t c;
1736         assert(e);
1737         assert(a <= b);
1738
1739         if (a <= 0)
1740                 return 0;
1741
1742         if (b <= a + 1)
1743                 return a;
1744
1745         initialize_perturb(e);
1746
1747         /*
1748           Find a good time to wake up again between times a and b. We
1749           have two goals here:
1750
1751           a) We want to wake up as seldom as possible, hence prefer
1752              later times over earlier times.
1753
1754           b) But if we have to wake up, then let's make sure to
1755              dispatch as much as possible on the entire system.
1756
1757           We implement this by waking up everywhere at the same time
1758           within any given minute if we can, synchronised via the
1759           perturbation value determined from the boot ID. If we can't,
1760           then we try to find the same spot in every 10s, then 1s and
1761           then 250ms step. Otherwise, we pick the last possible time
1762           to wake up.
1763         */
1764
1765         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1766         if (c >= b) {
1767                 if (_unlikely_(c < USEC_PER_MINUTE))
1768                         return b;
1769
1770                 c -= USEC_PER_MINUTE;
1771         }
1772
1773         if (c >= a)
1774                 return c;
1775
1776         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1777         if (c >= b) {
1778                 if (_unlikely_(c < USEC_PER_SEC*10))
1779                         return b;
1780
1781                 c -= USEC_PER_SEC*10;
1782         }
1783
1784         if (c >= a)
1785                 return c;
1786
1787         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1788         if (c >= b) {
1789                 if (_unlikely_(c < USEC_PER_SEC))
1790                         return b;
1791
1792                 c -= USEC_PER_SEC;
1793         }
1794
1795         if (c >= a)
1796                 return c;
1797
1798         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1799         if (c >= b) {
1800                 if (_unlikely_(c < USEC_PER_MSEC*250))
1801                         return b;
1802
1803                 c -= USEC_PER_MSEC*250;
1804         }
1805
1806         if (c >= a)
1807                 return c;
1808
1809         return b;
1810 }
1811
1812 static int event_arm_timer(
1813                 sd_event *e,
1814                 struct clock_data *d) {
1815
1816         struct itimerspec its = {};
1817         sd_event_source *a, *b;
1818         usec_t t;
1819         int r;
1820
1821         assert(e);
1822         assert(d);
1823
1824         if (!d->needs_rearm)
1825                 return 0;
1826         else
1827                 d->needs_rearm = false;
1828
1829         a = prioq_peek(d->earliest);
1830         if (!a || a->enabled == SD_EVENT_OFF) {
1831
1832                 if (d->fd < 0)
1833                         return 0;
1834
1835                 if (d->next == USEC_INFINITY)
1836                         return 0;
1837
1838                 /* disarm */
1839                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1840                 if (r < 0)
1841                         return r;
1842
1843                 d->next = USEC_INFINITY;
1844                 return 0;
1845         }
1846
1847         b = prioq_peek(d->latest);
1848         assert_se(b && b->enabled != SD_EVENT_OFF);
1849
1850         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1851         if (d->next == t)
1852                 return 0;
1853
1854         assert_se(d->fd >= 0);
1855
1856         if (t == 0) {
1857                 /* We don' want to disarm here, just mean some time looooong ago. */
1858                 its.it_value.tv_sec = 0;
1859                 its.it_value.tv_nsec = 1;
1860         } else
1861                 timespec_store(&its.it_value, t);
1862
1863         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1864         if (r < 0)
1865                 return -errno;
1866
1867         d->next = t;
1868         return 0;
1869 }
1870
1871 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1872         assert(e);
1873         assert(s);
1874         assert(s->type == SOURCE_IO);
1875
1876         /* If the event source was already pending, we just OR in the
1877          * new revents, otherwise we reset the value. The ORing is
1878          * necessary to handle EPOLLONESHOT events properly where
1879          * readability might happen independently of writability, and
1880          * we need to keep track of both */
1881
1882         if (s->pending)
1883                 s->io.revents |= revents;
1884         else
1885                 s->io.revents = revents;
1886
1887         return source_set_pending(s, true);
1888 }
1889
1890 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1891         uint64_t x;
1892         ssize_t ss;
1893
1894         assert(e);
1895         assert(fd >= 0);
1896
1897         assert_return(events == EPOLLIN, -EIO);
1898
1899         ss = read(fd, &x, sizeof(x));
1900         if (ss < 0) {
1901                 if (errno == EAGAIN || errno == EINTR)
1902                         return 0;
1903
1904                 return -errno;
1905         }
1906
1907         if (_unlikely_(ss != sizeof(x)))
1908                 return -EIO;
1909
1910         if (next)
1911                 *next = USEC_INFINITY;
1912
1913         return 0;
1914 }
1915
1916 static int process_timer(
1917                 sd_event *e,
1918                 usec_t n,
1919                 struct clock_data *d) {
1920
1921         sd_event_source *s;
1922         int r;
1923
1924         assert(e);
1925         assert(d);
1926
1927         for (;;) {
1928                 s = prioq_peek(d->earliest);
1929                 if (!s ||
1930                     s->time.next > n ||
1931                     s->enabled == SD_EVENT_OFF ||
1932                     s->pending)
1933                         break;
1934
1935                 r = source_set_pending(s, true);
1936                 if (r < 0)
1937                         return r;
1938
1939                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1940                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1941                 d->needs_rearm = true;
1942         }
1943
1944         return 0;
1945 }
1946
1947 static int process_child(sd_event *e) {
1948         sd_event_source *s;
1949         Iterator i;
1950         int r;
1951
1952         assert(e);
1953
1954         e->need_process_child = false;
1955
1956         /*
1957            So, this is ugly. We iteratively invoke waitid() with P_PID
1958            + WNOHANG for each PID we wait for, instead of using
1959            P_ALL. This is because we only want to get child
1960            information of very specific child processes, and not all
1961            of them. We might not have processed the SIGCHLD even of a
1962            previous invocation and we don't want to maintain a
1963            unbounded *per-child* event queue, hence we really don't
1964            want anything flushed out of the kernel's queue that we
1965            don't care about. Since this is O(n) this means that if you
1966            have a lot of processes you probably want to handle SIGCHLD
1967            yourself.
1968
1969            We do not reap the children here (by using WNOWAIT), this
1970            is only done after the event source is dispatched so that
1971            the callback still sees the process as a zombie.
1972         */
1973
1974         HASHMAP_FOREACH(s, e->child_sources, i) {
1975                 assert(s->type == SOURCE_CHILD);
1976
1977                 if (s->pending)
1978                         continue;
1979
1980                 if (s->enabled == SD_EVENT_OFF)
1981                         continue;
1982
1983                 zero(s->child.siginfo);
1984                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1985                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1986                 if (r < 0)
1987                         return -errno;
1988
1989                 if (s->child.siginfo.si_pid != 0) {
1990                         bool zombie =
1991                                 s->child.siginfo.si_code == CLD_EXITED ||
1992                                 s->child.siginfo.si_code == CLD_KILLED ||
1993                                 s->child.siginfo.si_code == CLD_DUMPED;
1994
1995                         if (!zombie && (s->child.options & WEXITED)) {
1996                                 /* If the child isn't dead then let's
1997                                  * immediately remove the state change
1998                                  * from the queue, since there's no
1999                                  * benefit in leaving it queued */
2000
2001                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2002                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2003                         }
2004
2005                         r = source_set_pending(s, true);
2006                         if (r < 0)
2007                                 return r;
2008                 }
2009         }
2010
2011         return 0;
2012 }
2013
2014 static int process_signal(sd_event *e, uint32_t events) {
2015         bool read_one = false;
2016         int r;
2017
2018         assert(e);
2019
2020         assert_return(events == EPOLLIN, -EIO);
2021
2022         for (;;) {
2023                 struct signalfd_siginfo si;
2024                 ssize_t n;
2025                 sd_event_source *s = NULL;
2026
2027                 n = read(e->signal_fd, &si, sizeof(si));
2028                 if (n < 0) {
2029                         if (errno == EAGAIN || errno == EINTR)
2030                                 return read_one;
2031
2032                         return -errno;
2033                 }
2034
2035                 if (_unlikely_(n != sizeof(si)))
2036                         return -EIO;
2037
2038                 assert(si.ssi_signo < _NSIG);
2039
2040                 read_one = true;
2041
2042                 if (si.ssi_signo == SIGCHLD) {
2043                         r = process_child(e);
2044                         if (r < 0)
2045                                 return r;
2046                         if (r > 0)
2047                                 continue;
2048                 }
2049
2050                 if (e->signal_sources)
2051                         s = e->signal_sources[si.ssi_signo];
2052
2053                 if (!s)
2054                         continue;
2055
2056                 s->signal.siginfo = si;
2057                 r = source_set_pending(s, true);
2058                 if (r < 0)
2059                         return r;
2060         }
2061 }
2062
2063 static int source_dispatch(sd_event_source *s) {
2064         int r = 0;
2065
2066         assert(s);
2067         assert(s->pending || s->type == SOURCE_EXIT);
2068
2069         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2070                 r = source_set_pending(s, false);
2071                 if (r < 0)
2072                         return r;
2073         }
2074
2075         if (s->type != SOURCE_POST) {
2076                 sd_event_source *z;
2077                 Iterator i;
2078
2079                 /* If we execute a non-post source, let's mark all
2080                  * post sources as pending */
2081
2082                 SET_FOREACH(z, s->event->post_sources, i) {
2083                         if (z->enabled == SD_EVENT_OFF)
2084                                 continue;
2085
2086                         r = source_set_pending(z, true);
2087                         if (r < 0)
2088                                 return r;
2089                 }
2090         }
2091
2092         if (s->enabled == SD_EVENT_ONESHOT) {
2093                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2094                 if (r < 0)
2095                         return r;
2096         }
2097
2098         s->dispatching = true;
2099
2100         switch (s->type) {
2101
2102         case SOURCE_IO:
2103                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2104                 break;
2105
2106         case SOURCE_TIME_REALTIME:
2107         case SOURCE_TIME_BOOTTIME:
2108         case SOURCE_TIME_MONOTONIC:
2109         case SOURCE_TIME_REALTIME_ALARM:
2110         case SOURCE_TIME_BOOTTIME_ALARM:
2111                 r = s->time.callback(s, s->time.next, s->userdata);
2112                 break;
2113
2114         case SOURCE_SIGNAL:
2115                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2116                 break;
2117
2118         case SOURCE_CHILD: {
2119                 bool zombie;
2120
2121                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2122                          s->child.siginfo.si_code == CLD_KILLED ||
2123                          s->child.siginfo.si_code == CLD_DUMPED;
2124
2125                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2126
2127                 /* Now, reap the PID for good. */
2128                 if (zombie)
2129                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2130
2131                 break;
2132         }
2133
2134         case SOURCE_DEFER:
2135                 r = s->defer.callback(s, s->userdata);
2136                 break;
2137
2138         case SOURCE_POST:
2139                 r = s->post.callback(s, s->userdata);
2140                 break;
2141
2142         case SOURCE_EXIT:
2143                 r = s->exit.callback(s, s->userdata);
2144                 break;
2145
2146         case SOURCE_WATCHDOG:
2147         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2148         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2149                 assert_not_reached("Wut? I shouldn't exist.");
2150         }
2151
2152         s->dispatching = false;
2153
2154         if (r < 0) {
2155                 if (s->name)
2156                         log_debug("Event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2157                 else
2158                         log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2159         }
2160
2161         if (s->n_ref == 0)
2162                 source_free(s);
2163         else if (r < 0)
2164                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2165
2166         return 1;
2167 }
2168
2169 static int event_prepare(sd_event *e) {
2170         int r;
2171
2172         assert(e);
2173
2174         for (;;) {
2175                 sd_event_source *s;
2176
2177                 s = prioq_peek(e->prepare);
2178                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2179                         break;
2180
2181                 s->prepare_iteration = e->iteration;
2182                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2183                 if (r < 0)
2184                         return r;
2185
2186                 assert(s->prepare);
2187
2188                 s->dispatching = true;
2189                 r = s->prepare(s, s->userdata);
2190                 s->dispatching = false;
2191
2192                 if (r < 0) {
2193                         if (s->name)
2194                                 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2195                         else
2196                                 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2197                 }
2198
2199                 if (s->n_ref == 0)
2200                         source_free(s);
2201                 else if (r < 0)
2202                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2203         }
2204
2205         return 0;
2206 }
2207
2208 static int dispatch_exit(sd_event *e) {
2209         sd_event_source *p;
2210         int r;
2211
2212         assert(e);
2213
2214         p = prioq_peek(e->exit);
2215         if (!p || p->enabled == SD_EVENT_OFF) {
2216                 e->state = SD_EVENT_FINISHED;
2217                 return 0;
2218         }
2219
2220         sd_event_ref(e);
2221         e->iteration++;
2222         e->state = SD_EVENT_EXITING;
2223
2224         r = source_dispatch(p);
2225
2226         e->state = SD_EVENT_PASSIVE;
2227         sd_event_unref(e);
2228
2229         return r;
2230 }
2231
2232 static sd_event_source* event_next_pending(sd_event *e) {
2233         sd_event_source *p;
2234
2235         assert(e);
2236
2237         p = prioq_peek(e->pending);
2238         if (!p)
2239                 return NULL;
2240
2241         if (p->enabled == SD_EVENT_OFF)
2242                 return NULL;
2243
2244         return p;
2245 }
2246
2247 static int arm_watchdog(sd_event *e) {
2248         struct itimerspec its = {};
2249         usec_t t;
2250         int r;
2251
2252         assert(e);
2253         assert(e->watchdog_fd >= 0);
2254
2255         t = sleep_between(e,
2256                           e->watchdog_last + (e->watchdog_period / 2),
2257                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2258
2259         timespec_store(&its.it_value, t);
2260
2261         /* Make sure we never set the watchdog to 0, which tells the
2262          * kernel to disable it. */
2263         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2264                 its.it_value.tv_nsec = 1;
2265
2266         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2267         if (r < 0)
2268                 return -errno;
2269
2270         return 0;
2271 }
2272
2273 static int process_watchdog(sd_event *e) {
2274         assert(e);
2275
2276         if (!e->watchdog)
2277                 return 0;
2278
2279         /* Don't notify watchdog too often */
2280         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2281                 return 0;
2282
2283         sd_notify(false, "WATCHDOG=1");
2284         e->watchdog_last = e->timestamp.monotonic;
2285
2286         return arm_watchdog(e);
2287 }
2288
2289 _public_ int sd_event_prepare(sd_event *e) {
2290         int r;
2291
2292         assert_return(e, -EINVAL);
2293         assert_return(!event_pid_changed(e), -ECHILD);
2294         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2295         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2296
2297         if (e->exit_requested)
2298                 goto pending;
2299
2300         e->iteration++;
2301
2302         r = event_prepare(e);
2303         if (r < 0)
2304                 return r;
2305
2306         r = event_arm_timer(e, &e->realtime);
2307         if (r < 0)
2308                 return r;
2309
2310         r = event_arm_timer(e, &e->boottime);
2311         if (r < 0)
2312                 return r;
2313
2314         r = event_arm_timer(e, &e->monotonic);
2315         if (r < 0)
2316                 return r;
2317
2318         r = event_arm_timer(e, &e->realtime_alarm);
2319         if (r < 0)
2320                 return r;
2321
2322         r = event_arm_timer(e, &e->boottime_alarm);
2323         if (r < 0)
2324                 return r;
2325
2326         if (event_next_pending(e) || e->need_process_child)
2327                 goto pending;
2328
2329         e->state = SD_EVENT_PREPARED;
2330
2331         return 0;
2332
2333 pending:
2334         e->state = SD_EVENT_PREPARED;
2335         r = sd_event_wait(e, 0);
2336         if (r == 0)
2337                 e->state = SD_EVENT_PREPARED;
2338
2339         return r;
2340 }
2341
2342 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2343         struct epoll_event *ev_queue;
2344         unsigned ev_queue_max;
2345         int r, m, i;
2346
2347         assert_return(e, -EINVAL);
2348         assert_return(!event_pid_changed(e), -ECHILD);
2349         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2350         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2351
2352         if (e->exit_requested) {
2353                 e->state = SD_EVENT_PENDING;
2354                 return 1;
2355         }
2356
2357         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2358         ev_queue = newa(struct epoll_event, ev_queue_max);
2359
2360         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2361                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2362         if (m < 0) {
2363                 if (errno == EINTR) {
2364                         e->state = SD_EVENT_PENDING;
2365                         return 1;
2366                 }
2367
2368                 r = -errno;
2369
2370                 goto finish;
2371         }
2372
2373         dual_timestamp_get(&e->timestamp);
2374         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2375
2376         for (i = 0; i < m; i++) {
2377
2378                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2379                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2380                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2381                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2382                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2383                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2384                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2385                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2386                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2387                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2388                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2389                         r = process_signal(e, ev_queue[i].events);
2390                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2391                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2392                 else
2393                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2394
2395                 if (r < 0)
2396                         goto finish;
2397         }
2398
2399         r = process_watchdog(e);
2400         if (r < 0)
2401                 goto finish;
2402
2403         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2404         if (r < 0)
2405                 goto finish;
2406
2407         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2408         if (r < 0)
2409                 goto finish;
2410
2411         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2412         if (r < 0)
2413                 goto finish;
2414
2415         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2416         if (r < 0)
2417                 goto finish;
2418
2419         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2420         if (r < 0)
2421                 goto finish;
2422
2423         if (e->need_process_child) {
2424                 r = process_child(e);
2425                 if (r < 0)
2426                         goto finish;
2427         }
2428
2429         if (event_next_pending(e)) {
2430                 e->state = SD_EVENT_PENDING;
2431
2432                 return 1;
2433         }
2434
2435         r = 0;
2436
2437 finish:
2438         e->state = SD_EVENT_PASSIVE;
2439
2440         return r;
2441 }
2442
2443 _public_ int sd_event_dispatch(sd_event *e) {
2444         sd_event_source *p;
2445         int r;
2446
2447         assert_return(e, -EINVAL);
2448         assert_return(!event_pid_changed(e), -ECHILD);
2449         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2450         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2451
2452         if (e->exit_requested)
2453                 return dispatch_exit(e);
2454
2455         p = event_next_pending(e);
2456         if (p) {
2457                 sd_event_ref(e);
2458
2459                 e->state = SD_EVENT_RUNNING;
2460                 r = source_dispatch(p);
2461                 e->state = SD_EVENT_PASSIVE;
2462
2463                 sd_event_unref(e);
2464
2465                 return r;
2466         }
2467
2468         e->state = SD_EVENT_PASSIVE;
2469
2470         return 1;
2471 }
2472
2473 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2474         int r;
2475
2476         assert_return(e, -EINVAL);
2477         assert_return(!event_pid_changed(e), -ECHILD);
2478         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2479         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2480
2481         r = sd_event_prepare(e);
2482         if (r > 0)
2483                 return sd_event_dispatch(e);
2484         else if (r < 0)
2485                 return r;
2486
2487         r = sd_event_wait(e, timeout);
2488         if (r > 0)
2489                 return sd_event_dispatch(e);
2490         else
2491                 return r;
2492 }
2493
2494 _public_ int sd_event_loop(sd_event *e) {
2495         int r;
2496
2497         assert_return(e, -EINVAL);
2498         assert_return(!event_pid_changed(e), -ECHILD);
2499         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2500
2501         sd_event_ref(e);
2502
2503         while (e->state != SD_EVENT_FINISHED) {
2504                 r = sd_event_run(e, (uint64_t) -1);
2505                 if (r < 0)
2506                         goto finish;
2507         }
2508
2509         r = e->exit_code;
2510
2511 finish:
2512         sd_event_unref(e);
2513         return r;
2514 }
2515
2516 _public_ int sd_event_get_fd(sd_event *e) {
2517
2518         assert_return(e, -EINVAL);
2519         assert_return(!event_pid_changed(e), -ECHILD);
2520
2521         return e->epoll_fd;
2522 }
2523
2524 _public_ int sd_event_get_state(sd_event *e) {
2525         assert_return(e, -EINVAL);
2526         assert_return(!event_pid_changed(e), -ECHILD);
2527
2528         return e->state;
2529 }
2530
2531 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2532         assert_return(e, -EINVAL);
2533         assert_return(code, -EINVAL);
2534         assert_return(!event_pid_changed(e), -ECHILD);
2535
2536         if (!e->exit_requested)
2537                 return -ENODATA;
2538
2539         *code = e->exit_code;
2540         return 0;
2541 }
2542
2543 _public_ int sd_event_exit(sd_event *e, int code) {
2544         assert_return(e, -EINVAL);
2545         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2546         assert_return(!event_pid_changed(e), -ECHILD);
2547
2548         e->exit_requested = true;
2549         e->exit_code = code;
2550
2551         return 0;
2552 }
2553
2554 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2555         assert_return(e, -EINVAL);
2556         assert_return(usec, -EINVAL);
2557         assert_return(!event_pid_changed(e), -ECHILD);
2558
2559         /* If we haven't run yet, just get the actual time */
2560         if (!dual_timestamp_is_set(&e->timestamp))
2561                 return -ENODATA;
2562
2563         switch (clock) {
2564
2565         case CLOCK_REALTIME:
2566         case CLOCK_REALTIME_ALARM:
2567                 *usec = e->timestamp.realtime;
2568                 break;
2569
2570         case CLOCK_MONOTONIC:
2571                 *usec = e->timestamp.monotonic;
2572                 break;
2573
2574         case CLOCK_BOOTTIME:
2575         case CLOCK_BOOTTIME_ALARM:
2576                 *usec = e->timestamp_boottime;
2577                 break;
2578         }
2579
2580         return 0;
2581 }
2582
2583 _public_ int sd_event_default(sd_event **ret) {
2584
2585         static thread_local sd_event *default_event = NULL;
2586         sd_event *e = NULL;
2587         int r;
2588
2589         if (!ret)
2590                 return !!default_event;
2591
2592         if (default_event) {
2593                 *ret = sd_event_ref(default_event);
2594                 return 0;
2595         }
2596
2597         r = sd_event_new(&e);
2598         if (r < 0)
2599                 return r;
2600
2601         e->default_event_ptr = &default_event;
2602         e->tid = gettid();
2603         default_event = e;
2604
2605         *ret = e;
2606         return 1;
2607 }
2608
2609 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2610         assert_return(e, -EINVAL);
2611         assert_return(tid, -EINVAL);
2612         assert_return(!event_pid_changed(e), -ECHILD);
2613
2614         if (e->tid != 0) {
2615                 *tid = e->tid;
2616                 return 0;
2617         }
2618
2619         return -ENXIO;
2620 }
2621
2622 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2623         int r;
2624
2625         assert_return(e, -EINVAL);
2626         assert_return(!event_pid_changed(e), -ECHILD);
2627
2628         if (e->watchdog == !!b)
2629                 return e->watchdog;
2630
2631         if (b) {
2632                 struct epoll_event ev = {};
2633
2634                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2635                 if (r <= 0)
2636                         return r;
2637
2638                 /* Issue first ping immediately */
2639                 sd_notify(false, "WATCHDOG=1");
2640                 e->watchdog_last = now(CLOCK_MONOTONIC);
2641
2642                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2643                 if (e->watchdog_fd < 0)
2644                         return -errno;
2645
2646                 r = arm_watchdog(e);
2647                 if (r < 0)
2648                         goto fail;
2649
2650                 ev.events = EPOLLIN;
2651                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2652
2653                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2654                 if (r < 0) {
2655                         r = -errno;
2656                         goto fail;
2657                 }
2658
2659         } else {
2660                 if (e->watchdog_fd >= 0) {
2661                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2662                         e->watchdog_fd = safe_close(e->watchdog_fd);
2663                 }
2664         }
2665
2666         e->watchdog = !!b;
2667         return e->watchdog;
2668
2669 fail:
2670         e->watchdog_fd = safe_close(e->watchdog_fd);
2671         return r;
2672 }
2673
2674 _public_ int sd_event_get_watchdog(sd_event *e) {
2675         assert_return(e, -EINVAL);
2676         assert_return(!event_pid_changed(e), -ECHILD);
2677
2678         return e->watchdog;
2679 }