chiark / gitweb /
remove unused includes
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36
37 #include "sd-event.h"
38
39 #define EPOLL_QUEUE_MAX 512U
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_BOOTTIME,
46         SOURCE_TIME_MONOTONIC,
47         SOURCE_TIME_REALTIME_ALARM,
48         SOURCE_TIME_BOOTTIME_ALARM,
49         SOURCE_SIGNAL,
50         SOURCE_CHILD,
51         SOURCE_DEFER,
52         SOURCE_POST,
53         SOURCE_EXIT,
54         SOURCE_WATCHDOG,
55         _SOURCE_EVENT_SOURCE_TYPE_MAX,
56         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60
61 struct sd_event_source {
62         unsigned n_ref;
63
64         sd_event *event;
65         void *userdata;
66         sd_event_handler_t prepare;
67
68         char *description;
69
70         EventSourceType type:5;
71         int enabled:3;
72         bool pending:1;
73         bool dispatching:1;
74         bool floating:1;
75
76         int64_t priority;
77         unsigned pending_index;
78         unsigned prepare_index;
79         unsigned pending_iteration;
80         unsigned prepare_iteration;
81
82         LIST_FIELDS(sd_event_source, sources);
83
84         union {
85                 struct {
86                         sd_event_io_handler_t callback;
87                         int fd;
88                         uint32_t events;
89                         uint32_t revents;
90                         bool registered:1;
91                 } io;
92                 struct {
93                         sd_event_time_handler_t callback;
94                         usec_t next, accuracy;
95                         unsigned earliest_index;
96                         unsigned latest_index;
97                 } time;
98                 struct {
99                         sd_event_signal_handler_t callback;
100                         struct signalfd_siginfo siginfo;
101                         int sig;
102                 } signal;
103                 struct {
104                         sd_event_child_handler_t callback;
105                         siginfo_t siginfo;
106                         pid_t pid;
107                         int options;
108                 } child;
109                 struct {
110                         sd_event_handler_t callback;
111                 } defer;
112                 struct {
113                         sd_event_handler_t callback;
114                 } post;
115                 struct {
116                         sd_event_handler_t callback;
117                         unsigned prioq_index;
118                 } exit;
119         };
120 };
121
122 struct clock_data {
123         int fd;
124
125         /* For all clocks we maintain two priority queues each, one
126          * ordered for the earliest times the events may be
127          * dispatched, and one ordered by the latest times they must
128          * have been dispatched. The range between the top entries in
129          * the two prioqs is the time window we can freely schedule
130          * wakeups in */
131
132         Prioq *earliest;
133         Prioq *latest;
134         usec_t next;
135
136         bool needs_rearm:1;
137 };
138
139 struct sd_event {
140         unsigned n_ref;
141
142         int epoll_fd;
143         int signal_fd;
144         int watchdog_fd;
145
146         Prioq *pending;
147         Prioq *prepare;
148
149         /* timerfd_create() only supports these five clocks so far. We
150          * can add support for more clocks when the kernel learns to
151          * deal with them, too. */
152         struct clock_data realtime;
153         struct clock_data boottime;
154         struct clock_data monotonic;
155         struct clock_data realtime_alarm;
156         struct clock_data boottime_alarm;
157
158         usec_t perturb;
159
160         sigset_t sigset;
161         sd_event_source **signal_sources;
162
163         Hashmap *child_sources;
164         unsigned n_enabled_child_sources;
165
166         Set *post_sources;
167
168         Prioq *exit;
169
170         pid_t original_pid;
171
172         unsigned iteration;
173         dual_timestamp timestamp;
174         usec_t timestamp_boottime;
175         int state;
176
177         bool exit_requested:1;
178         bool need_process_child:1;
179         bool watchdog:1;
180
181         int exit_code;
182
183         pid_t tid;
184         sd_event **default_event_ptr;
185
186         usec_t watchdog_last, watchdog_period;
187
188         unsigned n_sources;
189
190         LIST_HEAD(sd_event_source, sources);
191 };
192
193 static void source_disconnect(sd_event_source *s);
194
195 static int pending_prioq_compare(const void *a, const void *b) {
196         const sd_event_source *x = a, *y = b;
197
198         assert(x->pending);
199         assert(y->pending);
200
201         /* Enabled ones first */
202         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203                 return -1;
204         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205                 return 1;
206
207         /* Lower priority values first */
208         if (x->priority < y->priority)
209                 return -1;
210         if (x->priority > y->priority)
211                 return 1;
212
213         /* Older entries first */
214         if (x->pending_iteration < y->pending_iteration)
215                 return -1;
216         if (x->pending_iteration > y->pending_iteration)
217                 return 1;
218
219         /* Stability for the rest */
220         if (x < y)
221                 return -1;
222         if (x > y)
223                 return 1;
224
225         return 0;
226 }
227
228 static int prepare_prioq_compare(const void *a, const void *b) {
229         const sd_event_source *x = a, *y = b;
230
231         assert(x->prepare);
232         assert(y->prepare);
233
234         /* Move most recently prepared ones last, so that we can stop
235          * preparing as soon as we hit one that has already been
236          * prepared in the current iteration */
237         if (x->prepare_iteration < y->prepare_iteration)
238                 return -1;
239         if (x->prepare_iteration > y->prepare_iteration)
240                 return 1;
241
242         /* Enabled ones first */
243         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
244                 return -1;
245         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
246                 return 1;
247
248         /* Lower priority values first */
249         if (x->priority < y->priority)
250                 return -1;
251         if (x->priority > y->priority)
252                 return 1;
253
254         /* Stability for the rest */
255         if (x < y)
256                 return -1;
257         if (x > y)
258                 return 1;
259
260         return 0;
261 }
262
263 static int earliest_time_prioq_compare(const void *a, const void *b) {
264         const sd_event_source *x = a, *y = b;
265
266         assert(EVENT_SOURCE_IS_TIME(x->type));
267         assert(x->type == y->type);
268
269         /* Enabled ones first */
270         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
271                 return -1;
272         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
273                 return 1;
274
275         /* Move the pending ones to the end */
276         if (!x->pending && y->pending)
277                 return -1;
278         if (x->pending && !y->pending)
279                 return 1;
280
281         /* Order by time */
282         if (x->time.next < y->time.next)
283                 return -1;
284         if (x->time.next > y->time.next)
285                 return 1;
286
287         /* Stability for the rest */
288         if (x < y)
289                 return -1;
290         if (x > y)
291                 return 1;
292
293         return 0;
294 }
295
296 static int latest_time_prioq_compare(const void *a, const void *b) {
297         const sd_event_source *x = a, *y = b;
298
299         assert(EVENT_SOURCE_IS_TIME(x->type));
300         assert(x->type == y->type);
301
302         /* Enabled ones first */
303         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304                 return -1;
305         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
306                 return 1;
307
308         /* Move the pending ones to the end */
309         if (!x->pending && y->pending)
310                 return -1;
311         if (x->pending && !y->pending)
312                 return 1;
313
314         /* Order by time */
315         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
316                 return -1;
317         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
318                 return 1;
319
320         /* Stability for the rest */
321         if (x < y)
322                 return -1;
323         if (x > y)
324                 return 1;
325
326         return 0;
327 }
328
329 static int exit_prioq_compare(const void *a, const void *b) {
330         const sd_event_source *x = a, *y = b;
331
332         assert(x->type == SOURCE_EXIT);
333         assert(y->type == SOURCE_EXIT);
334
335         /* Enabled ones first */
336         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
337                 return -1;
338         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
339                 return 1;
340
341         /* Lower priority values first */
342         if (x->priority < y->priority)
343                 return -1;
344         if (x->priority > y->priority)
345                 return 1;
346
347         /* Stability for the rest */
348         if (x < y)
349                 return -1;
350         if (x > y)
351                 return 1;
352
353         return 0;
354 }
355
356 static void free_clock_data(struct clock_data *d) {
357         assert(d);
358
359         safe_close(d->fd);
360         prioq_free(d->earliest);
361         prioq_free(d->latest);
362 }
363
364 static void event_free(sd_event *e) {
365         sd_event_source *s;
366
367         assert(e);
368
369         while ((s = e->sources)) {
370                 assert(s->floating);
371                 source_disconnect(s);
372                 sd_event_source_unref(s);
373         }
374
375         assert(e->n_sources == 0);
376
377         if (e->default_event_ptr)
378                 *(e->default_event_ptr) = NULL;
379
380         safe_close(e->epoll_fd);
381         safe_close(e->signal_fd);
382         safe_close(e->watchdog_fd);
383
384         free_clock_data(&e->realtime);
385         free_clock_data(&e->boottime);
386         free_clock_data(&e->monotonic);
387         free_clock_data(&e->realtime_alarm);
388         free_clock_data(&e->boottime_alarm);
389
390         prioq_free(e->pending);
391         prioq_free(e->prepare);
392         prioq_free(e->exit);
393
394         free(e->signal_sources);
395
396         hashmap_free(e->child_sources);
397         set_free(e->post_sources);
398         free(e);
399 }
400
401 _public_ int sd_event_new(sd_event** ret) {
402         sd_event *e;
403         int r;
404
405         assert_return(ret, -EINVAL);
406
407         e = new0(sd_event, 1);
408         if (!e)
409                 return -ENOMEM;
410
411         e->n_ref = 1;
412         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
413         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
414         e->original_pid = getpid();
415         e->perturb = USEC_INFINITY;
416
417         assert_se(sigemptyset(&e->sigset) == 0);
418
419         e->pending = prioq_new(pending_prioq_compare);
420         if (!e->pending) {
421                 r = -ENOMEM;
422                 goto fail;
423         }
424
425         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
426         if (e->epoll_fd < 0) {
427                 r = -errno;
428                 goto fail;
429         }
430
431         *ret = e;
432         return 0;
433
434 fail:
435         event_free(e);
436         return r;
437 }
438
439 _public_ sd_event* sd_event_ref(sd_event *e) {
440         assert_return(e, NULL);
441
442         assert(e->n_ref >= 1);
443         e->n_ref++;
444
445         return e;
446 }
447
448 _public_ sd_event* sd_event_unref(sd_event *e) {
449
450         if (!e)
451                 return NULL;
452
453         assert(e->n_ref >= 1);
454         e->n_ref--;
455
456         if (e->n_ref <= 0)
457                 event_free(e);
458
459         return NULL;
460 }
461
462 static bool event_pid_changed(sd_event *e) {
463         assert(e);
464
465         /* We don't support people creating am event loop and keeping
466          * it around over a fork(). Let's complain. */
467
468         return e->original_pid != getpid();
469 }
470
471 static int source_io_unregister(sd_event_source *s) {
472         int r;
473
474         assert(s);
475         assert(s->type == SOURCE_IO);
476
477         if (!s->io.registered)
478                 return 0;
479
480         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
481         if (r < 0)
482                 return -errno;
483
484         s->io.registered = false;
485         return 0;
486 }
487
488 static int source_io_register(
489                 sd_event_source *s,
490                 int enabled,
491                 uint32_t events) {
492
493         struct epoll_event ev = {};
494         int r;
495
496         assert(s);
497         assert(s->type == SOURCE_IO);
498         assert(enabled != SD_EVENT_OFF);
499
500         ev.events = events;
501         ev.data.ptr = s;
502
503         if (enabled == SD_EVENT_ONESHOT)
504                 ev.events |= EPOLLONESHOT;
505
506         if (s->io.registered)
507                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
508         else
509                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
510
511         if (r < 0)
512                 return -errno;
513
514         s->io.registered = true;
515
516         return 0;
517 }
518
519 static clockid_t event_source_type_to_clock(EventSourceType t) {
520
521         switch (t) {
522
523         case SOURCE_TIME_REALTIME:
524                 return CLOCK_REALTIME;
525
526         case SOURCE_TIME_BOOTTIME:
527                 return CLOCK_BOOTTIME;
528
529         case SOURCE_TIME_MONOTONIC:
530                 return CLOCK_MONOTONIC;
531
532         case SOURCE_TIME_REALTIME_ALARM:
533                 return CLOCK_REALTIME_ALARM;
534
535         case SOURCE_TIME_BOOTTIME_ALARM:
536                 return CLOCK_BOOTTIME_ALARM;
537
538         default:
539                 return (clockid_t) -1;
540         }
541 }
542
543 static EventSourceType clock_to_event_source_type(clockid_t clock) {
544
545         switch (clock) {
546
547         case CLOCK_REALTIME:
548                 return SOURCE_TIME_REALTIME;
549
550         case CLOCK_BOOTTIME:
551                 return SOURCE_TIME_BOOTTIME;
552
553         case CLOCK_MONOTONIC:
554                 return SOURCE_TIME_MONOTONIC;
555
556         case CLOCK_REALTIME_ALARM:
557                 return SOURCE_TIME_REALTIME_ALARM;
558
559         case CLOCK_BOOTTIME_ALARM:
560                 return SOURCE_TIME_BOOTTIME_ALARM;
561
562         default:
563                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
564         }
565 }
566
567 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
568         assert(e);
569
570         switch (t) {
571
572         case SOURCE_TIME_REALTIME:
573                 return &e->realtime;
574
575         case SOURCE_TIME_BOOTTIME:
576                 return &e->boottime;
577
578         case SOURCE_TIME_MONOTONIC:
579                 return &e->monotonic;
580
581         case SOURCE_TIME_REALTIME_ALARM:
582                 return &e->realtime_alarm;
583
584         case SOURCE_TIME_BOOTTIME_ALARM:
585                 return &e->boottime_alarm;
586
587         default:
588                 return NULL;
589         }
590 }
591
592 static bool need_signal(sd_event *e, int signal) {
593         return (e->signal_sources && e->signal_sources[signal] &&
594                 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
595                 ||
596                (signal == SIGCHLD &&
597                 e->n_enabled_child_sources > 0);
598 }
599
600 static int event_update_signal_fd(sd_event *e) {
601         struct epoll_event ev = {};
602         bool add_to_epoll;
603         int r;
604
605         assert(e);
606
607         add_to_epoll = e->signal_fd < 0;
608
609         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
610         if (r < 0)
611                 return -errno;
612
613         e->signal_fd = r;
614
615         if (!add_to_epoll)
616                 return 0;
617
618         ev.events = EPOLLIN;
619         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
620
621         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
622         if (r < 0) {
623                 e->signal_fd = safe_close(e->signal_fd);
624                 return -errno;
625         }
626
627         return 0;
628 }
629
630 static void source_disconnect(sd_event_source *s) {
631         sd_event *event;
632
633         assert(s);
634
635         if (!s->event)
636                 return;
637
638         assert(s->event->n_sources > 0);
639
640         switch (s->type) {
641
642         case SOURCE_IO:
643                 if (s->io.fd >= 0)
644                         source_io_unregister(s);
645
646                 break;
647
648         case SOURCE_TIME_REALTIME:
649         case SOURCE_TIME_BOOTTIME:
650         case SOURCE_TIME_MONOTONIC:
651         case SOURCE_TIME_REALTIME_ALARM:
652         case SOURCE_TIME_BOOTTIME_ALARM: {
653                 struct clock_data *d;
654
655                 d = event_get_clock_data(s->event, s->type);
656                 assert(d);
657
658                 prioq_remove(d->earliest, s, &s->time.earliest_index);
659                 prioq_remove(d->latest, s, &s->time.latest_index);
660                 d->needs_rearm = true;
661                 break;
662         }
663
664         case SOURCE_SIGNAL:
665                 if (s->signal.sig > 0) {
666                         if (s->event->signal_sources)
667                                 s->event->signal_sources[s->signal.sig] = NULL;
668
669                         /* If the signal was on and now it is off... */
670                         if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
671                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
672
673                                 (void) event_update_signal_fd(s->event);
674                                 /* If disabling failed, we might get a spurious event,
675                                  * but otherwise nothing bad should happen. */
676                         }
677                 }
678
679                 break;
680
681         case SOURCE_CHILD:
682                 if (s->child.pid > 0) {
683                         if (s->enabled != SD_EVENT_OFF) {
684                                 assert(s->event->n_enabled_child_sources > 0);
685                                 s->event->n_enabled_child_sources--;
686
687                                 /* We know the signal was on, if it is off now... */
688                                 if (!need_signal(s->event, SIGCHLD)) {
689                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
690
691                                         (void) event_update_signal_fd(s->event);
692                                         /* If disabling failed, we might get a spurious event,
693                                          * but otherwise nothing bad should happen. */
694                                 }
695                         }
696
697                         hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
698                 }
699
700                 break;
701
702         case SOURCE_DEFER:
703                 /* nothing */
704                 break;
705
706         case SOURCE_POST:
707                 set_remove(s->event->post_sources, s);
708                 break;
709
710         case SOURCE_EXIT:
711                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
712                 break;
713
714         default:
715                 assert_not_reached("Wut? I shouldn't exist.");
716         }
717
718         if (s->pending)
719                 prioq_remove(s->event->pending, s, &s->pending_index);
720
721         if (s->prepare)
722                 prioq_remove(s->event->prepare, s, &s->prepare_index);
723
724         event = s->event;
725
726         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
727         s->event = NULL;
728         LIST_REMOVE(sources, event->sources, s);
729         event->n_sources--;
730
731         if (!s->floating)
732                 sd_event_unref(event);
733 }
734
735 static void source_free(sd_event_source *s) {
736         assert(s);
737
738         source_disconnect(s);
739         free(s->description);
740         free(s);
741 }
742
743 static int source_set_pending(sd_event_source *s, bool b) {
744         int r;
745
746         assert(s);
747         assert(s->type != SOURCE_EXIT);
748
749         if (s->pending == b)
750                 return 0;
751
752         s->pending = b;
753
754         if (b) {
755                 s->pending_iteration = s->event->iteration;
756
757                 r = prioq_put(s->event->pending, s, &s->pending_index);
758                 if (r < 0) {
759                         s->pending = false;
760                         return r;
761                 }
762         } else
763                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
764
765         if (EVENT_SOURCE_IS_TIME(s->type)) {
766                 struct clock_data *d;
767
768                 d = event_get_clock_data(s->event, s->type);
769                 assert(d);
770
771                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
772                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
773                 d->needs_rearm = true;
774         }
775
776         return 0;
777 }
778
779 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
780         sd_event_source *s;
781
782         assert(e);
783
784         s = new0(sd_event_source, 1);
785         if (!s)
786                 return NULL;
787
788         s->n_ref = 1;
789         s->event = e;
790         s->floating = floating;
791         s->type = type;
792         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
793
794         if (!floating)
795                 sd_event_ref(e);
796
797         LIST_PREPEND(sources, e->sources, s);
798         e->n_sources ++;
799
800         return s;
801 }
802
803 _public_ int sd_event_add_io(
804                 sd_event *e,
805                 sd_event_source **ret,
806                 int fd,
807                 uint32_t events,
808                 sd_event_io_handler_t callback,
809                 void *userdata) {
810
811         sd_event_source *s;
812         int r;
813
814         assert_return(e, -EINVAL);
815         assert_return(fd >= 0, -EINVAL);
816         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
817         assert_return(callback, -EINVAL);
818         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
819         assert_return(!event_pid_changed(e), -ECHILD);
820
821         s = source_new(e, !ret, SOURCE_IO);
822         if (!s)
823                 return -ENOMEM;
824
825         s->io.fd = fd;
826         s->io.events = events;
827         s->io.callback = callback;
828         s->userdata = userdata;
829         s->enabled = SD_EVENT_ON;
830
831         r = source_io_register(s, s->enabled, events);
832         if (r < 0) {
833                 source_free(s);
834                 return r;
835         }
836
837         if (ret)
838                 *ret = s;
839
840         return 0;
841 }
842
843 static void initialize_perturb(sd_event *e) {
844         sd_id128_t bootid = {};
845
846         /* When we sleep for longer, we try to realign the wakeup to
847            the same time wihtin each minute/second/250ms, so that
848            events all across the system can be coalesced into a single
849            CPU wakeup. However, let's take some system-specific
850            randomness for this value, so that in a network of systems
851            with synced clocks timer events are distributed a
852            bit. Here, we calculate a perturbation usec offset from the
853            boot ID. */
854
855         if (_likely_(e->perturb != USEC_INFINITY))
856                 return;
857
858         if (sd_id128_get_boot(&bootid) >= 0)
859                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
860 }
861
862 static int event_setup_timer_fd(
863                 sd_event *e,
864                 struct clock_data *d,
865                 clockid_t clock) {
866
867         struct epoll_event ev = {};
868         int r, fd;
869
870         assert(e);
871         assert(d);
872
873         if (_likely_(d->fd >= 0))
874                 return 0;
875
876         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
877         if (fd < 0)
878                 return -errno;
879
880         ev.events = EPOLLIN;
881         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
882
883         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
884         if (r < 0) {
885                 safe_close(fd);
886                 return -errno;
887         }
888
889         d->fd = fd;
890         return 0;
891 }
892
893 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
894         assert(s);
895
896         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
897 }
898
899 _public_ int sd_event_add_time(
900                 sd_event *e,
901                 sd_event_source **ret,
902                 clockid_t clock,
903                 uint64_t usec,
904                 uint64_t accuracy,
905                 sd_event_time_handler_t callback,
906                 void *userdata) {
907
908         EventSourceType type;
909         sd_event_source *s;
910         struct clock_data *d;
911         int r;
912
913         assert_return(e, -EINVAL);
914         assert_return(usec != (uint64_t) -1, -EINVAL);
915         assert_return(accuracy != (uint64_t) -1, -EINVAL);
916         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
917         assert_return(!event_pid_changed(e), -ECHILD);
918
919         if (!callback)
920                 callback = time_exit_callback;
921
922         type = clock_to_event_source_type(clock);
923         assert_return(type >= 0, -ENOTSUP);
924
925         d = event_get_clock_data(e, type);
926         assert(d);
927
928         if (!d->earliest) {
929                 d->earliest = prioq_new(earliest_time_prioq_compare);
930                 if (!d->earliest)
931                         return -ENOMEM;
932         }
933
934         if (!d->latest) {
935                 d->latest = prioq_new(latest_time_prioq_compare);
936                 if (!d->latest)
937                         return -ENOMEM;
938         }
939
940         if (d->fd < 0) {
941                 r = event_setup_timer_fd(e, d, clock);
942                 if (r < 0)
943                         return r;
944         }
945
946         s = source_new(e, !ret, type);
947         if (!s)
948                 return -ENOMEM;
949
950         s->time.next = usec;
951         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
952         s->time.callback = callback;
953         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
954         s->userdata = userdata;
955         s->enabled = SD_EVENT_ONESHOT;
956
957         d->needs_rearm = true;
958
959         r = prioq_put(d->earliest, s, &s->time.earliest_index);
960         if (r < 0)
961                 goto fail;
962
963         r = prioq_put(d->latest, s, &s->time.latest_index);
964         if (r < 0)
965                 goto fail;
966
967         if (ret)
968                 *ret = s;
969
970         return 0;
971
972 fail:
973         source_free(s);
974         return r;
975 }
976
977 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
978         assert(s);
979
980         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
981 }
982
983 _public_ int sd_event_add_signal(
984                 sd_event *e,
985                 sd_event_source **ret,
986                 int sig,
987                 sd_event_signal_handler_t callback,
988                 void *userdata) {
989
990         sd_event_source *s;
991         sigset_t ss;
992         int r;
993         bool previous;
994
995         assert_return(e, -EINVAL);
996         assert_return(sig > 0, -EINVAL);
997         assert_return(sig < _NSIG, -EINVAL);
998         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
999         assert_return(!event_pid_changed(e), -ECHILD);
1000
1001         if (!callback)
1002                 callback = signal_exit_callback;
1003
1004         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1005         if (r < 0)
1006                 return -errno;
1007
1008         if (!sigismember(&ss, sig))
1009                 return -EBUSY;
1010
1011         if (!e->signal_sources) {
1012                 e->signal_sources = new0(sd_event_source*, _NSIG);
1013                 if (!e->signal_sources)
1014                         return -ENOMEM;
1015         } else if (e->signal_sources[sig])
1016                 return -EBUSY;
1017
1018         previous = need_signal(e, sig);
1019
1020         s = source_new(e, !ret, SOURCE_SIGNAL);
1021         if (!s)
1022                 return -ENOMEM;
1023
1024         s->signal.sig = sig;
1025         s->signal.callback = callback;
1026         s->userdata = userdata;
1027         s->enabled = SD_EVENT_ON;
1028
1029         e->signal_sources[sig] = s;
1030
1031         if (!previous) {
1032                 assert_se(sigaddset(&e->sigset, sig) == 0);
1033
1034                 r = event_update_signal_fd(e);
1035                 if (r < 0) {
1036                         source_free(s);
1037                         return r;
1038                 }
1039         }
1040
1041         /* Use the signal name as description for the event source by default */
1042         (void) sd_event_source_set_description(s, signal_to_string(sig));
1043
1044         if (ret)
1045                 *ret = s;
1046
1047         return 0;
1048 }
1049
1050 _public_ int sd_event_add_child(
1051                 sd_event *e,
1052                 sd_event_source **ret,
1053                 pid_t pid,
1054                 int options,
1055                 sd_event_child_handler_t callback,
1056                 void *userdata) {
1057
1058         sd_event_source *s;
1059         int r;
1060         bool previous;
1061
1062         assert_return(e, -EINVAL);
1063         assert_return(pid > 1, -EINVAL);
1064         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1065         assert_return(options != 0, -EINVAL);
1066         assert_return(callback, -EINVAL);
1067         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1068         assert_return(!event_pid_changed(e), -ECHILD);
1069
1070         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1071         if (r < 0)
1072                 return r;
1073
1074         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1075                 return -EBUSY;
1076
1077         previous = need_signal(e, SIGCHLD);
1078
1079         s = source_new(e, !ret, SOURCE_CHILD);
1080         if (!s)
1081                 return -ENOMEM;
1082
1083         s->child.pid = pid;
1084         s->child.options = options;
1085         s->child.callback = callback;
1086         s->userdata = userdata;
1087         s->enabled = SD_EVENT_ONESHOT;
1088
1089         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1090         if (r < 0) {
1091                 source_free(s);
1092                 return r;
1093         }
1094
1095         e->n_enabled_child_sources ++;
1096
1097         if (!previous) {
1098                 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1099
1100                 r = event_update_signal_fd(e);
1101                 if (r < 0) {
1102                         source_free(s);
1103                         return r;
1104                 }
1105         }
1106
1107         e->need_process_child = true;
1108
1109         if (ret)
1110                 *ret = s;
1111
1112         return 0;
1113 }
1114
1115 _public_ int sd_event_add_defer(
1116                 sd_event *e,
1117                 sd_event_source **ret,
1118                 sd_event_handler_t callback,
1119                 void *userdata) {
1120
1121         sd_event_source *s;
1122         int r;
1123
1124         assert_return(e, -EINVAL);
1125         assert_return(callback, -EINVAL);
1126         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1127         assert_return(!event_pid_changed(e), -ECHILD);
1128
1129         s = source_new(e, !ret, SOURCE_DEFER);
1130         if (!s)
1131                 return -ENOMEM;
1132
1133         s->defer.callback = callback;
1134         s->userdata = userdata;
1135         s->enabled = SD_EVENT_ONESHOT;
1136
1137         r = source_set_pending(s, true);
1138         if (r < 0) {
1139                 source_free(s);
1140                 return r;
1141         }
1142
1143         if (ret)
1144                 *ret = s;
1145
1146         return 0;
1147 }
1148
1149 _public_ int sd_event_add_post(
1150                 sd_event *e,
1151                 sd_event_source **ret,
1152                 sd_event_handler_t callback,
1153                 void *userdata) {
1154
1155         sd_event_source *s;
1156         int r;
1157
1158         assert_return(e, -EINVAL);
1159         assert_return(callback, -EINVAL);
1160         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1161         assert_return(!event_pid_changed(e), -ECHILD);
1162
1163         r = set_ensure_allocated(&e->post_sources, NULL);
1164         if (r < 0)
1165                 return r;
1166
1167         s = source_new(e, !ret, SOURCE_POST);
1168         if (!s)
1169                 return -ENOMEM;
1170
1171         s->post.callback = callback;
1172         s->userdata = userdata;
1173         s->enabled = SD_EVENT_ON;
1174
1175         r = set_put(e->post_sources, s);
1176         if (r < 0) {
1177                 source_free(s);
1178                 return r;
1179         }
1180
1181         if (ret)
1182                 *ret = s;
1183
1184         return 0;
1185 }
1186
1187 _public_ int sd_event_add_exit(
1188                 sd_event *e,
1189                 sd_event_source **ret,
1190                 sd_event_handler_t callback,
1191                 void *userdata) {
1192
1193         sd_event_source *s;
1194         int r;
1195
1196         assert_return(e, -EINVAL);
1197         assert_return(callback, -EINVAL);
1198         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1199         assert_return(!event_pid_changed(e), -ECHILD);
1200
1201         if (!e->exit) {
1202                 e->exit = prioq_new(exit_prioq_compare);
1203                 if (!e->exit)
1204                         return -ENOMEM;
1205         }
1206
1207         s = source_new(e, !ret, SOURCE_EXIT);
1208         if (!s)
1209                 return -ENOMEM;
1210
1211         s->exit.callback = callback;
1212         s->userdata = userdata;
1213         s->exit.prioq_index = PRIOQ_IDX_NULL;
1214         s->enabled = SD_EVENT_ONESHOT;
1215
1216         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1217         if (r < 0) {
1218                 source_free(s);
1219                 return r;
1220         }
1221
1222         if (ret)
1223                 *ret = s;
1224
1225         return 0;
1226 }
1227
1228 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1229         assert_return(s, NULL);
1230
1231         assert(s->n_ref >= 1);
1232         s->n_ref++;
1233
1234         return s;
1235 }
1236
1237 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1238
1239         if (!s)
1240                 return NULL;
1241
1242         assert(s->n_ref >= 1);
1243         s->n_ref--;
1244
1245         if (s->n_ref <= 0) {
1246                 /* Here's a special hack: when we are called from a
1247                  * dispatch handler we won't free the event source
1248                  * immediately, but we will detach the fd from the
1249                  * epoll. This way it is safe for the caller to unref
1250                  * the event source and immediately close the fd, but
1251                  * we still retain a valid event source object after
1252                  * the callback. */
1253
1254                 if (s->dispatching) {
1255                         if (s->type == SOURCE_IO)
1256                                 source_io_unregister(s);
1257
1258                         source_disconnect(s);
1259                 } else
1260                         source_free(s);
1261         }
1262
1263         return NULL;
1264 }
1265
1266 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1267         assert_return(s, -EINVAL);
1268         assert_return(!event_pid_changed(s->event), -ECHILD);
1269
1270         return free_and_strdup(&s->description, description);
1271 }
1272
1273 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1274         assert_return(s, -EINVAL);
1275         assert_return(description, -EINVAL);
1276         assert_return(s->description, -ENXIO);
1277         assert_return(!event_pid_changed(s->event), -ECHILD);
1278
1279         *description = s->description;
1280         return 0;
1281 }
1282
1283 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1284         assert_return(s, NULL);
1285
1286         return s->event;
1287 }
1288
1289 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1290         assert_return(s, -EINVAL);
1291         assert_return(s->type != SOURCE_EXIT, -EDOM);
1292         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1293         assert_return(!event_pid_changed(s->event), -ECHILD);
1294
1295         return s->pending;
1296 }
1297
1298 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1299         assert_return(s, -EINVAL);
1300         assert_return(s->type == SOURCE_IO, -EDOM);
1301         assert_return(!event_pid_changed(s->event), -ECHILD);
1302
1303         return s->io.fd;
1304 }
1305
1306 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1307         int r;
1308
1309         assert_return(s, -EINVAL);
1310         assert_return(fd >= 0, -EINVAL);
1311         assert_return(s->type == SOURCE_IO, -EDOM);
1312         assert_return(!event_pid_changed(s->event), -ECHILD);
1313
1314         if (s->io.fd == fd)
1315                 return 0;
1316
1317         if (s->enabled == SD_EVENT_OFF) {
1318                 s->io.fd = fd;
1319                 s->io.registered = false;
1320         } else {
1321                 int saved_fd;
1322
1323                 saved_fd = s->io.fd;
1324                 assert(s->io.registered);
1325
1326                 s->io.fd = fd;
1327                 s->io.registered = false;
1328
1329                 r = source_io_register(s, s->enabled, s->io.events);
1330                 if (r < 0) {
1331                         s->io.fd = saved_fd;
1332                         s->io.registered = true;
1333                         return r;
1334                 }
1335
1336                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1337         }
1338
1339         return 0;
1340 }
1341
1342 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1343         assert_return(s, -EINVAL);
1344         assert_return(events, -EINVAL);
1345         assert_return(s->type == SOURCE_IO, -EDOM);
1346         assert_return(!event_pid_changed(s->event), -ECHILD);
1347
1348         *events = s->io.events;
1349         return 0;
1350 }
1351
1352 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1353         int r;
1354
1355         assert_return(s, -EINVAL);
1356         assert_return(s->type == SOURCE_IO, -EDOM);
1357         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1358         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1359         assert_return(!event_pid_changed(s->event), -ECHILD);
1360
1361         /* edge-triggered updates are never skipped, so we can reset edges */
1362         if (s->io.events == events && !(events & EPOLLET))
1363                 return 0;
1364
1365         if (s->enabled != SD_EVENT_OFF) {
1366                 r = source_io_register(s, s->enabled, events);
1367                 if (r < 0)
1368                         return r;
1369         }
1370
1371         s->io.events = events;
1372         source_set_pending(s, false);
1373
1374         return 0;
1375 }
1376
1377 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1378         assert_return(s, -EINVAL);
1379         assert_return(revents, -EINVAL);
1380         assert_return(s->type == SOURCE_IO, -EDOM);
1381         assert_return(s->pending, -ENODATA);
1382         assert_return(!event_pid_changed(s->event), -ECHILD);
1383
1384         *revents = s->io.revents;
1385         return 0;
1386 }
1387
1388 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1389         assert_return(s, -EINVAL);
1390         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1391         assert_return(!event_pid_changed(s->event), -ECHILD);
1392
1393         return s->signal.sig;
1394 }
1395
1396 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1397         assert_return(s, -EINVAL);
1398         assert_return(!event_pid_changed(s->event), -ECHILD);
1399
1400         return s->priority;
1401 }
1402
1403 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1404         assert_return(s, -EINVAL);
1405         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1406         assert_return(!event_pid_changed(s->event), -ECHILD);
1407
1408         if (s->priority == priority)
1409                 return 0;
1410
1411         s->priority = priority;
1412
1413         if (s->pending)
1414                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1415
1416         if (s->prepare)
1417                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1418
1419         if (s->type == SOURCE_EXIT)
1420                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1421
1422         return 0;
1423 }
1424
1425 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1426         assert_return(s, -EINVAL);
1427         assert_return(m, -EINVAL);
1428         assert_return(!event_pid_changed(s->event), -ECHILD);
1429
1430         *m = s->enabled;
1431         return 0;
1432 }
1433
1434 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1435         int r;
1436
1437         assert_return(s, -EINVAL);
1438         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1439         assert_return(!event_pid_changed(s->event), -ECHILD);
1440
1441         /* If we are dead anyway, we are fine with turning off
1442          * sources, but everything else needs to fail. */
1443         if (s->event->state == SD_EVENT_FINISHED)
1444                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1445
1446         if (s->enabled == m)
1447                 return 0;
1448
1449         if (m == SD_EVENT_OFF) {
1450
1451                 switch (s->type) {
1452
1453                 case SOURCE_IO:
1454                         r = source_io_unregister(s);
1455                         if (r < 0)
1456                                 return r;
1457
1458                         s->enabled = m;
1459                         break;
1460
1461                 case SOURCE_TIME_REALTIME:
1462                 case SOURCE_TIME_BOOTTIME:
1463                 case SOURCE_TIME_MONOTONIC:
1464                 case SOURCE_TIME_REALTIME_ALARM:
1465                 case SOURCE_TIME_BOOTTIME_ALARM: {
1466                         struct clock_data *d;
1467
1468                         s->enabled = m;
1469                         d = event_get_clock_data(s->event, s->type);
1470                         assert(d);
1471
1472                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1473                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1474                         d->needs_rearm = true;
1475                         break;
1476                 }
1477
1478                 case SOURCE_SIGNAL:
1479                         assert(need_signal(s->event, s->signal.sig));
1480
1481                         s->enabled = m;
1482
1483                         if (!need_signal(s->event, s->signal.sig)) {
1484                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1485
1486                                 (void) event_update_signal_fd(s->event);
1487                                 /* If disabling failed, we might get a spurious event,
1488                                  * but otherwise nothing bad should happen. */
1489                         }
1490
1491                         break;
1492
1493                 case SOURCE_CHILD:
1494                         assert(need_signal(s->event, SIGCHLD));
1495
1496                         s->enabled = m;
1497
1498                         assert(s->event->n_enabled_child_sources > 0);
1499                         s->event->n_enabled_child_sources--;
1500
1501                         if (!need_signal(s->event, SIGCHLD)) {
1502                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1503
1504                                 (void) event_update_signal_fd(s->event);
1505                         }
1506
1507                         break;
1508
1509                 case SOURCE_EXIT:
1510                         s->enabled = m;
1511                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1512                         break;
1513
1514                 case SOURCE_DEFER:
1515                 case SOURCE_POST:
1516                         s->enabled = m;
1517                         break;
1518
1519                 default:
1520                         assert_not_reached("Wut? I shouldn't exist.");
1521                 }
1522
1523         } else {
1524                 switch (s->type) {
1525
1526                 case SOURCE_IO:
1527                         r = source_io_register(s, m, s->io.events);
1528                         if (r < 0)
1529                                 return r;
1530
1531                         s->enabled = m;
1532                         break;
1533
1534                 case SOURCE_TIME_REALTIME:
1535                 case SOURCE_TIME_BOOTTIME:
1536                 case SOURCE_TIME_MONOTONIC:
1537                 case SOURCE_TIME_REALTIME_ALARM:
1538                 case SOURCE_TIME_BOOTTIME_ALARM: {
1539                         struct clock_data *d;
1540
1541                         s->enabled = m;
1542                         d = event_get_clock_data(s->event, s->type);
1543                         assert(d);
1544
1545                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1546                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1547                         d->needs_rearm = true;
1548                         break;
1549                 }
1550
1551                 case SOURCE_SIGNAL:
1552                         /* Check status before enabling. */
1553                         if (!need_signal(s->event, s->signal.sig)) {
1554                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1555
1556                                 r = event_update_signal_fd(s->event);
1557                                 if (r < 0) {
1558                                         s->enabled = SD_EVENT_OFF;
1559                                         return r;
1560                                 }
1561                         }
1562
1563                         s->enabled = m;
1564                         break;
1565
1566                 case SOURCE_CHILD:
1567                         /* Check status before enabling. */
1568                         if (s->enabled == SD_EVENT_OFF) {
1569                                 if (!need_signal(s->event, SIGCHLD)) {
1570                                         assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1571
1572                                         r = event_update_signal_fd(s->event);
1573                                         if (r < 0) {
1574                                                 s->enabled = SD_EVENT_OFF;
1575                                                 return r;
1576                                         }
1577                                 }
1578
1579                                 s->event->n_enabled_child_sources++;
1580                         }
1581
1582                         s->enabled = m;
1583                         break;
1584
1585                 case SOURCE_EXIT:
1586                         s->enabled = m;
1587                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1588                         break;
1589
1590                 case SOURCE_DEFER:
1591                 case SOURCE_POST:
1592                         s->enabled = m;
1593                         break;
1594
1595                 default:
1596                         assert_not_reached("Wut? I shouldn't exist.");
1597                 }
1598         }
1599
1600         if (s->pending)
1601                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1602
1603         if (s->prepare)
1604                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1605
1606         return 0;
1607 }
1608
1609 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1610         assert_return(s, -EINVAL);
1611         assert_return(usec, -EINVAL);
1612         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1613         assert_return(!event_pid_changed(s->event), -ECHILD);
1614
1615         *usec = s->time.next;
1616         return 0;
1617 }
1618
1619 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1620         struct clock_data *d;
1621
1622         assert_return(s, -EINVAL);
1623         assert_return(usec != (uint64_t) -1, -EINVAL);
1624         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1625         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1626         assert_return(!event_pid_changed(s->event), -ECHILD);
1627
1628         s->time.next = usec;
1629
1630         source_set_pending(s, false);
1631
1632         d = event_get_clock_data(s->event, s->type);
1633         assert(d);
1634
1635         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1636         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1637         d->needs_rearm = true;
1638
1639         return 0;
1640 }
1641
1642 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1643         assert_return(s, -EINVAL);
1644         assert_return(usec, -EINVAL);
1645         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1646         assert_return(!event_pid_changed(s->event), -ECHILD);
1647
1648         *usec = s->time.accuracy;
1649         return 0;
1650 }
1651
1652 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1653         struct clock_data *d;
1654
1655         assert_return(s, -EINVAL);
1656         assert_return(usec != (uint64_t) -1, -EINVAL);
1657         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1658         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1659         assert_return(!event_pid_changed(s->event), -ECHILD);
1660
1661         if (usec == 0)
1662                 usec = DEFAULT_ACCURACY_USEC;
1663
1664         s->time.accuracy = usec;
1665
1666         source_set_pending(s, false);
1667
1668         d = event_get_clock_data(s->event, s->type);
1669         assert(d);
1670
1671         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1672         d->needs_rearm = true;
1673
1674         return 0;
1675 }
1676
1677 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1678         assert_return(s, -EINVAL);
1679         assert_return(clock, -EINVAL);
1680         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1681         assert_return(!event_pid_changed(s->event), -ECHILD);
1682
1683         *clock = event_source_type_to_clock(s->type);
1684         return 0;
1685 }
1686
1687 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1688         assert_return(s, -EINVAL);
1689         assert_return(pid, -EINVAL);
1690         assert_return(s->type == SOURCE_CHILD, -EDOM);
1691         assert_return(!event_pid_changed(s->event), -ECHILD);
1692
1693         *pid = s->child.pid;
1694         return 0;
1695 }
1696
1697 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1698         int r;
1699
1700         assert_return(s, -EINVAL);
1701         assert_return(s->type != SOURCE_EXIT, -EDOM);
1702         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1703         assert_return(!event_pid_changed(s->event), -ECHILD);
1704
1705         if (s->prepare == callback)
1706                 return 0;
1707
1708         if (callback && s->prepare) {
1709                 s->prepare = callback;
1710                 return 0;
1711         }
1712
1713         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1714         if (r < 0)
1715                 return r;
1716
1717         s->prepare = callback;
1718
1719         if (callback) {
1720                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1721                 if (r < 0)
1722                         return r;
1723         } else
1724                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1725
1726         return 0;
1727 }
1728
1729 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1730         assert_return(s, NULL);
1731
1732         return s->userdata;
1733 }
1734
1735 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1736         void *ret;
1737
1738         assert_return(s, NULL);
1739
1740         ret = s->userdata;
1741         s->userdata = userdata;
1742
1743         return ret;
1744 }
1745
1746 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1747         usec_t c;
1748         assert(e);
1749         assert(a <= b);
1750
1751         if (a <= 0)
1752                 return 0;
1753
1754         if (b <= a + 1)
1755                 return a;
1756
1757         initialize_perturb(e);
1758
1759         /*
1760           Find a good time to wake up again between times a and b. We
1761           have two goals here:
1762
1763           a) We want to wake up as seldom as possible, hence prefer
1764              later times over earlier times.
1765
1766           b) But if we have to wake up, then let's make sure to
1767              dispatch as much as possible on the entire system.
1768
1769           We implement this by waking up everywhere at the same time
1770           within any given minute if we can, synchronised via the
1771           perturbation value determined from the boot ID. If we can't,
1772           then we try to find the same spot in every 10s, then 1s and
1773           then 250ms step. Otherwise, we pick the last possible time
1774           to wake up.
1775         */
1776
1777         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1778         if (c >= b) {
1779                 if (_unlikely_(c < USEC_PER_MINUTE))
1780                         return b;
1781
1782                 c -= USEC_PER_MINUTE;
1783         }
1784
1785         if (c >= a)
1786                 return c;
1787
1788         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1789         if (c >= b) {
1790                 if (_unlikely_(c < USEC_PER_SEC*10))
1791                         return b;
1792
1793                 c -= USEC_PER_SEC*10;
1794         }
1795
1796         if (c >= a)
1797                 return c;
1798
1799         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1800         if (c >= b) {
1801                 if (_unlikely_(c < USEC_PER_SEC))
1802                         return b;
1803
1804                 c -= USEC_PER_SEC;
1805         }
1806
1807         if (c >= a)
1808                 return c;
1809
1810         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1811         if (c >= b) {
1812                 if (_unlikely_(c < USEC_PER_MSEC*250))
1813                         return b;
1814
1815                 c -= USEC_PER_MSEC*250;
1816         }
1817
1818         if (c >= a)
1819                 return c;
1820
1821         return b;
1822 }
1823
1824 static int event_arm_timer(
1825                 sd_event *e,
1826                 struct clock_data *d) {
1827
1828         struct itimerspec its = {};
1829         sd_event_source *a, *b;
1830         usec_t t;
1831         int r;
1832
1833         assert(e);
1834         assert(d);
1835
1836         if (!d->needs_rearm)
1837                 return 0;
1838         else
1839                 d->needs_rearm = false;
1840
1841         a = prioq_peek(d->earliest);
1842         if (!a || a->enabled == SD_EVENT_OFF) {
1843
1844                 if (d->fd < 0)
1845                         return 0;
1846
1847                 if (d->next == USEC_INFINITY)
1848                         return 0;
1849
1850                 /* disarm */
1851                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1852                 if (r < 0)
1853                         return r;
1854
1855                 d->next = USEC_INFINITY;
1856                 return 0;
1857         }
1858
1859         b = prioq_peek(d->latest);
1860         assert_se(b && b->enabled != SD_EVENT_OFF);
1861
1862         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1863         if (d->next == t)
1864                 return 0;
1865
1866         assert_se(d->fd >= 0);
1867
1868         if (t == 0) {
1869                 /* We don' want to disarm here, just mean some time looooong ago. */
1870                 its.it_value.tv_sec = 0;
1871                 its.it_value.tv_nsec = 1;
1872         } else
1873                 timespec_store(&its.it_value, t);
1874
1875         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1876         if (r < 0)
1877                 return -errno;
1878
1879         d->next = t;
1880         return 0;
1881 }
1882
1883 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1884         assert(e);
1885         assert(s);
1886         assert(s->type == SOURCE_IO);
1887
1888         /* If the event source was already pending, we just OR in the
1889          * new revents, otherwise we reset the value. The ORing is
1890          * necessary to handle EPOLLONESHOT events properly where
1891          * readability might happen independently of writability, and
1892          * we need to keep track of both */
1893
1894         if (s->pending)
1895                 s->io.revents |= revents;
1896         else
1897                 s->io.revents = revents;
1898
1899         return source_set_pending(s, true);
1900 }
1901
1902 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1903         uint64_t x;
1904         ssize_t ss;
1905
1906         assert(e);
1907         assert(fd >= 0);
1908
1909         assert_return(events == EPOLLIN, -EIO);
1910
1911         ss = read(fd, &x, sizeof(x));
1912         if (ss < 0) {
1913                 if (errno == EAGAIN || errno == EINTR)
1914                         return 0;
1915
1916                 return -errno;
1917         }
1918
1919         if (_unlikely_(ss != sizeof(x)))
1920                 return -EIO;
1921
1922         if (next)
1923                 *next = USEC_INFINITY;
1924
1925         return 0;
1926 }
1927
1928 static int process_timer(
1929                 sd_event *e,
1930                 usec_t n,
1931                 struct clock_data *d) {
1932
1933         sd_event_source *s;
1934         int r;
1935
1936         assert(e);
1937         assert(d);
1938
1939         for (;;) {
1940                 s = prioq_peek(d->earliest);
1941                 if (!s ||
1942                     s->time.next > n ||
1943                     s->enabled == SD_EVENT_OFF ||
1944                     s->pending)
1945                         break;
1946
1947                 r = source_set_pending(s, true);
1948                 if (r < 0)
1949                         return r;
1950
1951                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1952                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1953                 d->needs_rearm = true;
1954         }
1955
1956         return 0;
1957 }
1958
1959 static int process_child(sd_event *e) {
1960         sd_event_source *s;
1961         Iterator i;
1962         int r;
1963
1964         assert(e);
1965
1966         e->need_process_child = false;
1967
1968         /*
1969            So, this is ugly. We iteratively invoke waitid() with P_PID
1970            + WNOHANG for each PID we wait for, instead of using
1971            P_ALL. This is because we only want to get child
1972            information of very specific child processes, and not all
1973            of them. We might not have processed the SIGCHLD even of a
1974            previous invocation and we don't want to maintain a
1975            unbounded *per-child* event queue, hence we really don't
1976            want anything flushed out of the kernel's queue that we
1977            don't care about. Since this is O(n) this means that if you
1978            have a lot of processes you probably want to handle SIGCHLD
1979            yourself.
1980
1981            We do not reap the children here (by using WNOWAIT), this
1982            is only done after the event source is dispatched so that
1983            the callback still sees the process as a zombie.
1984         */
1985
1986         HASHMAP_FOREACH(s, e->child_sources, i) {
1987                 assert(s->type == SOURCE_CHILD);
1988
1989                 if (s->pending)
1990                         continue;
1991
1992                 if (s->enabled == SD_EVENT_OFF)
1993                         continue;
1994
1995                 zero(s->child.siginfo);
1996                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1997                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1998                 if (r < 0)
1999                         return -errno;
2000
2001                 if (s->child.siginfo.si_pid != 0) {
2002                         bool zombie =
2003                                 s->child.siginfo.si_code == CLD_EXITED ||
2004                                 s->child.siginfo.si_code == CLD_KILLED ||
2005                                 s->child.siginfo.si_code == CLD_DUMPED;
2006
2007                         if (!zombie && (s->child.options & WEXITED)) {
2008                                 /* If the child isn't dead then let's
2009                                  * immediately remove the state change
2010                                  * from the queue, since there's no
2011                                  * benefit in leaving it queued */
2012
2013                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2014                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2015                         }
2016
2017                         r = source_set_pending(s, true);
2018                         if (r < 0)
2019                                 return r;
2020                 }
2021         }
2022
2023         return 0;
2024 }
2025
2026 static int process_signal(sd_event *e, uint32_t events) {
2027         bool read_one = false;
2028         int r;
2029
2030         assert(e);
2031
2032         assert_return(events == EPOLLIN, -EIO);
2033
2034         for (;;) {
2035                 struct signalfd_siginfo si;
2036                 ssize_t n;
2037                 sd_event_source *s = NULL;
2038
2039                 n = read(e->signal_fd, &si, sizeof(si));
2040                 if (n < 0) {
2041                         if (errno == EAGAIN || errno == EINTR)
2042                                 return read_one;
2043
2044                         return -errno;
2045                 }
2046
2047                 if (_unlikely_(n != sizeof(si)))
2048                         return -EIO;
2049
2050                 assert(si.ssi_signo < _NSIG);
2051
2052                 read_one = true;
2053
2054                 if (si.ssi_signo == SIGCHLD) {
2055                         r = process_child(e);
2056                         if (r < 0)
2057                                 return r;
2058                         if (r > 0)
2059                                 continue;
2060                 }
2061
2062                 if (e->signal_sources)
2063                         s = e->signal_sources[si.ssi_signo];
2064
2065                 if (!s)
2066                         continue;
2067
2068                 s->signal.siginfo = si;
2069                 r = source_set_pending(s, true);
2070                 if (r < 0)
2071                         return r;
2072         }
2073 }
2074
2075 static int source_dispatch(sd_event_source *s) {
2076         int r = 0;
2077
2078         assert(s);
2079         assert(s->pending || s->type == SOURCE_EXIT);
2080
2081         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2082                 r = source_set_pending(s, false);
2083                 if (r < 0)
2084                         return r;
2085         }
2086
2087         if (s->type != SOURCE_POST) {
2088                 sd_event_source *z;
2089                 Iterator i;
2090
2091                 /* If we execute a non-post source, let's mark all
2092                  * post sources as pending */
2093
2094                 SET_FOREACH(z, s->event->post_sources, i) {
2095                         if (z->enabled == SD_EVENT_OFF)
2096                                 continue;
2097
2098                         r = source_set_pending(z, true);
2099                         if (r < 0)
2100                                 return r;
2101                 }
2102         }
2103
2104         if (s->enabled == SD_EVENT_ONESHOT) {
2105                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2106                 if (r < 0)
2107                         return r;
2108         }
2109
2110         s->dispatching = true;
2111
2112         switch (s->type) {
2113
2114         case SOURCE_IO:
2115                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2116                 break;
2117
2118         case SOURCE_TIME_REALTIME:
2119         case SOURCE_TIME_BOOTTIME:
2120         case SOURCE_TIME_MONOTONIC:
2121         case SOURCE_TIME_REALTIME_ALARM:
2122         case SOURCE_TIME_BOOTTIME_ALARM:
2123                 r = s->time.callback(s, s->time.next, s->userdata);
2124                 break;
2125
2126         case SOURCE_SIGNAL:
2127                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2128                 break;
2129
2130         case SOURCE_CHILD: {
2131                 bool zombie;
2132
2133                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2134                          s->child.siginfo.si_code == CLD_KILLED ||
2135                          s->child.siginfo.si_code == CLD_DUMPED;
2136
2137                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2138
2139                 /* Now, reap the PID for good. */
2140                 if (zombie)
2141                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2142
2143                 break;
2144         }
2145
2146         case SOURCE_DEFER:
2147                 r = s->defer.callback(s, s->userdata);
2148                 break;
2149
2150         case SOURCE_POST:
2151                 r = s->post.callback(s, s->userdata);
2152                 break;
2153
2154         case SOURCE_EXIT:
2155                 r = s->exit.callback(s, s->userdata);
2156                 break;
2157
2158         case SOURCE_WATCHDOG:
2159         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2160         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2161                 assert_not_reached("Wut? I shouldn't exist.");
2162         }
2163
2164         s->dispatching = false;
2165
2166         if (r < 0) {
2167                 if (s->description)
2168                         log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2169                 else
2170                         log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2171         }
2172
2173         if (s->n_ref == 0)
2174                 source_free(s);
2175         else if (r < 0)
2176                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2177
2178         return 1;
2179 }
2180
2181 static int event_prepare(sd_event *e) {
2182         int r;
2183
2184         assert(e);
2185
2186         for (;;) {
2187                 sd_event_source *s;
2188
2189                 s = prioq_peek(e->prepare);
2190                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2191                         break;
2192
2193                 s->prepare_iteration = e->iteration;
2194                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2195                 if (r < 0)
2196                         return r;
2197
2198                 assert(s->prepare);
2199
2200                 s->dispatching = true;
2201                 r = s->prepare(s, s->userdata);
2202                 s->dispatching = false;
2203
2204                 if (r < 0) {
2205                         if (s->description)
2206                                 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2207                         else
2208                                 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2209                 }
2210
2211                 if (s->n_ref == 0)
2212                         source_free(s);
2213                 else if (r < 0)
2214                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2215         }
2216
2217         return 0;
2218 }
2219
2220 static int dispatch_exit(sd_event *e) {
2221         sd_event_source *p;
2222         int r;
2223
2224         assert(e);
2225
2226         p = prioq_peek(e->exit);
2227         if (!p || p->enabled == SD_EVENT_OFF) {
2228                 e->state = SD_EVENT_FINISHED;
2229                 return 0;
2230         }
2231
2232         sd_event_ref(e);
2233         e->iteration++;
2234         e->state = SD_EVENT_EXITING;
2235
2236         r = source_dispatch(p);
2237
2238         e->state = SD_EVENT_PASSIVE;
2239         sd_event_unref(e);
2240
2241         return r;
2242 }
2243
2244 static sd_event_source* event_next_pending(sd_event *e) {
2245         sd_event_source *p;
2246
2247         assert(e);
2248
2249         p = prioq_peek(e->pending);
2250         if (!p)
2251                 return NULL;
2252
2253         if (p->enabled == SD_EVENT_OFF)
2254                 return NULL;
2255
2256         return p;
2257 }
2258
2259 static int arm_watchdog(sd_event *e) {
2260         struct itimerspec its = {};
2261         usec_t t;
2262         int r;
2263
2264         assert(e);
2265         assert(e->watchdog_fd >= 0);
2266
2267         t = sleep_between(e,
2268                           e->watchdog_last + (e->watchdog_period / 2),
2269                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2270
2271         timespec_store(&its.it_value, t);
2272
2273         /* Make sure we never set the watchdog to 0, which tells the
2274          * kernel to disable it. */
2275         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2276                 its.it_value.tv_nsec = 1;
2277
2278         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2279         if (r < 0)
2280                 return -errno;
2281
2282         return 0;
2283 }
2284
2285 static int process_watchdog(sd_event *e) {
2286         assert(e);
2287
2288         if (!e->watchdog)
2289                 return 0;
2290
2291         /* Don't notify watchdog too often */
2292         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2293                 return 0;
2294
2295         sd_notify(false, "WATCHDOG=1");
2296         e->watchdog_last = e->timestamp.monotonic;
2297
2298         return arm_watchdog(e);
2299 }
2300
2301 _public_ int sd_event_prepare(sd_event *e) {
2302         int r;
2303
2304         assert_return(e, -EINVAL);
2305         assert_return(!event_pid_changed(e), -ECHILD);
2306         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2307         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2308
2309         if (e->exit_requested)
2310                 goto pending;
2311
2312         e->iteration++;
2313
2314         r = event_prepare(e);
2315         if (r < 0)
2316                 return r;
2317
2318         r = event_arm_timer(e, &e->realtime);
2319         if (r < 0)
2320                 return r;
2321
2322         r = event_arm_timer(e, &e->boottime);
2323         if (r < 0)
2324                 return r;
2325
2326         r = event_arm_timer(e, &e->monotonic);
2327         if (r < 0)
2328                 return r;
2329
2330         r = event_arm_timer(e, &e->realtime_alarm);
2331         if (r < 0)
2332                 return r;
2333
2334         r = event_arm_timer(e, &e->boottime_alarm);
2335         if (r < 0)
2336                 return r;
2337
2338         if (event_next_pending(e) || e->need_process_child)
2339                 goto pending;
2340
2341         e->state = SD_EVENT_PREPARED;
2342
2343         return 0;
2344
2345 pending:
2346         e->state = SD_EVENT_PREPARED;
2347         r = sd_event_wait(e, 0);
2348         if (r == 0)
2349                 e->state = SD_EVENT_PREPARED;
2350
2351         return r;
2352 }
2353
2354 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2355         struct epoll_event *ev_queue;
2356         unsigned ev_queue_max;
2357         int r, m, i;
2358
2359         assert_return(e, -EINVAL);
2360         assert_return(!event_pid_changed(e), -ECHILD);
2361         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2362         assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2363
2364         if (e->exit_requested) {
2365                 e->state = SD_EVENT_PENDING;
2366                 return 1;
2367         }
2368
2369         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2370         ev_queue = newa(struct epoll_event, ev_queue_max);
2371
2372         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2373                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2374         if (m < 0) {
2375                 if (errno == EINTR) {
2376                         e->state = SD_EVENT_PENDING;
2377                         return 1;
2378                 }
2379
2380                 r = -errno;
2381
2382                 goto finish;
2383         }
2384
2385         dual_timestamp_get(&e->timestamp);
2386         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2387
2388         for (i = 0; i < m; i++) {
2389
2390                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2391                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2392                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2393                         r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2394                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2395                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2396                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2397                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2398                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2399                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2400                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2401                         r = process_signal(e, ev_queue[i].events);
2402                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2403                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2404                 else
2405                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2406
2407                 if (r < 0)
2408                         goto finish;
2409         }
2410
2411         r = process_watchdog(e);
2412         if (r < 0)
2413                 goto finish;
2414
2415         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2416         if (r < 0)
2417                 goto finish;
2418
2419         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2420         if (r < 0)
2421                 goto finish;
2422
2423         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2424         if (r < 0)
2425                 goto finish;
2426
2427         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2428         if (r < 0)
2429                 goto finish;
2430
2431         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2432         if (r < 0)
2433                 goto finish;
2434
2435         if (e->need_process_child) {
2436                 r = process_child(e);
2437                 if (r < 0)
2438                         goto finish;
2439         }
2440
2441         if (event_next_pending(e)) {
2442                 e->state = SD_EVENT_PENDING;
2443
2444                 return 1;
2445         }
2446
2447         r = 0;
2448
2449 finish:
2450         e->state = SD_EVENT_PASSIVE;
2451
2452         return r;
2453 }
2454
2455 _public_ int sd_event_dispatch(sd_event *e) {
2456         sd_event_source *p;
2457         int r;
2458
2459         assert_return(e, -EINVAL);
2460         assert_return(!event_pid_changed(e), -ECHILD);
2461         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2462         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2463
2464         if (e->exit_requested)
2465                 return dispatch_exit(e);
2466
2467         p = event_next_pending(e);
2468         if (p) {
2469                 sd_event_ref(e);
2470
2471                 e->state = SD_EVENT_RUNNING;
2472                 r = source_dispatch(p);
2473                 e->state = SD_EVENT_PASSIVE;
2474
2475                 sd_event_unref(e);
2476
2477                 return r;
2478         }
2479
2480         e->state = SD_EVENT_PASSIVE;
2481
2482         return 1;
2483 }
2484
2485 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2486         int r;
2487
2488         assert_return(e, -EINVAL);
2489         assert_return(!event_pid_changed(e), -ECHILD);
2490         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2491         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2492
2493         r = sd_event_prepare(e);
2494         if (r > 0)
2495                 return sd_event_dispatch(e);
2496         else if (r < 0)
2497                 return r;
2498
2499         r = sd_event_wait(e, timeout);
2500         if (r > 0)
2501                 return sd_event_dispatch(e);
2502         else
2503                 return r;
2504 }
2505
2506 _public_ int sd_event_loop(sd_event *e) {
2507         int r;
2508
2509         assert_return(e, -EINVAL);
2510         assert_return(!event_pid_changed(e), -ECHILD);
2511         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2512
2513         sd_event_ref(e);
2514
2515         while (e->state != SD_EVENT_FINISHED) {
2516                 r = sd_event_run(e, (uint64_t) -1);
2517                 if (r < 0)
2518                         goto finish;
2519         }
2520
2521         r = e->exit_code;
2522
2523 finish:
2524         sd_event_unref(e);
2525         return r;
2526 }
2527
2528 _public_ int sd_event_get_fd(sd_event *e) {
2529
2530         assert_return(e, -EINVAL);
2531         assert_return(!event_pid_changed(e), -ECHILD);
2532
2533         return e->epoll_fd;
2534 }
2535
2536 _public_ int sd_event_get_state(sd_event *e) {
2537         assert_return(e, -EINVAL);
2538         assert_return(!event_pid_changed(e), -ECHILD);
2539
2540         return e->state;
2541 }
2542
2543 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2544         assert_return(e, -EINVAL);
2545         assert_return(code, -EINVAL);
2546         assert_return(!event_pid_changed(e), -ECHILD);
2547
2548         if (!e->exit_requested)
2549                 return -ENODATA;
2550
2551         *code = e->exit_code;
2552         return 0;
2553 }
2554
2555 _public_ int sd_event_exit(sd_event *e, int code) {
2556         assert_return(e, -EINVAL);
2557         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2558         assert_return(!event_pid_changed(e), -ECHILD);
2559
2560         e->exit_requested = true;
2561         e->exit_code = code;
2562
2563         return 0;
2564 }
2565
2566 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2567         assert_return(e, -EINVAL);
2568         assert_return(usec, -EINVAL);
2569         assert_return(!event_pid_changed(e), -ECHILD);
2570
2571         /* If we haven't run yet, just get the actual time */
2572         if (!dual_timestamp_is_set(&e->timestamp))
2573                 return -ENODATA;
2574
2575         switch (clock) {
2576
2577         case CLOCK_REALTIME:
2578         case CLOCK_REALTIME_ALARM:
2579                 *usec = e->timestamp.realtime;
2580                 break;
2581
2582         case CLOCK_MONOTONIC:
2583                 *usec = e->timestamp.monotonic;
2584                 break;
2585
2586         case CLOCK_BOOTTIME:
2587         case CLOCK_BOOTTIME_ALARM:
2588                 *usec = e->timestamp_boottime;
2589                 break;
2590         }
2591
2592         return 0;
2593 }
2594
2595 _public_ int sd_event_default(sd_event **ret) {
2596
2597         static thread_local sd_event *default_event = NULL;
2598         sd_event *e = NULL;
2599         int r;
2600
2601         if (!ret)
2602                 return !!default_event;
2603
2604         if (default_event) {
2605                 *ret = sd_event_ref(default_event);
2606                 return 0;
2607         }
2608
2609         r = sd_event_new(&e);
2610         if (r < 0)
2611                 return r;
2612
2613         e->default_event_ptr = &default_event;
2614         e->tid = gettid();
2615         default_event = e;
2616
2617         *ret = e;
2618         return 1;
2619 }
2620
2621 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2622         assert_return(e, -EINVAL);
2623         assert_return(tid, -EINVAL);
2624         assert_return(!event_pid_changed(e), -ECHILD);
2625
2626         if (e->tid != 0) {
2627                 *tid = e->tid;
2628                 return 0;
2629         }
2630
2631         return -ENXIO;
2632 }
2633
2634 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2635         int r;
2636
2637         assert_return(e, -EINVAL);
2638         assert_return(!event_pid_changed(e), -ECHILD);
2639
2640         if (e->watchdog == !!b)
2641                 return e->watchdog;
2642
2643         if (b) {
2644                 struct epoll_event ev = {};
2645
2646                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2647                 if (r <= 0)
2648                         return r;
2649
2650                 /* Issue first ping immediately */
2651                 sd_notify(false, "WATCHDOG=1");
2652                 e->watchdog_last = now(CLOCK_MONOTONIC);
2653
2654                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2655                 if (e->watchdog_fd < 0)
2656                         return -errno;
2657
2658                 r = arm_watchdog(e);
2659                 if (r < 0)
2660                         goto fail;
2661
2662                 ev.events = EPOLLIN;
2663                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2664
2665                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2666                 if (r < 0) {
2667                         r = -errno;
2668                         goto fail;
2669                 }
2670
2671         } else {
2672                 if (e->watchdog_fd >= 0) {
2673                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2674                         e->watchdog_fd = safe_close(e->watchdog_fd);
2675                 }
2676         }
2677
2678         e->watchdog = !!b;
2679         return e->watchdog;
2680
2681 fail:
2682         e->watchdog_fd = safe_close(e->watchdog_fd);
2683         return r;
2684 }
2685
2686 _public_ int sd_event_get_watchdog(sd_event *e) {
2687         assert_return(e, -EINVAL);
2688         assert_return(!event_pid_changed(e), -ECHILD);
2689
2690         return e->watchdog;
2691 }