chiark / gitweb /
event: make gcc shut up
[elogind.git] / src / libsystemd-bus / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34
35 #include "sd-event.h"
36
37 #define EPOLL_QUEUE_MAX 64
38 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
39
40 typedef enum EventSourceType {
41         SOURCE_IO,
42         SOURCE_MONOTONIC,
43         SOURCE_REALTIME,
44         SOURCE_SIGNAL,
45         SOURCE_CHILD,
46         SOURCE_DEFER,
47         SOURCE_EXIT,
48         SOURCE_WATCHDOG
49 } EventSourceType;
50
51 struct sd_event_source {
52         unsigned n_ref;
53
54         sd_event *event;
55         void *userdata;
56         sd_event_handler_t prepare;
57
58         EventSourceType type:4;
59         int enabled:3;
60         bool pending:1;
61         bool dispatching:1;
62
63         int priority;
64         unsigned pending_index;
65         unsigned prepare_index;
66         unsigned pending_iteration;
67         unsigned prepare_iteration;
68
69         union {
70                 struct {
71                         sd_event_io_handler_t callback;
72                         int fd;
73                         uint32_t events;
74                         uint32_t revents;
75                         bool registered:1;
76                 } io;
77                 struct {
78                         sd_event_time_handler_t callback;
79                         usec_t next, accuracy;
80                         unsigned earliest_index;
81                         unsigned latest_index;
82                 } time;
83                 struct {
84                         sd_event_signal_handler_t callback;
85                         struct signalfd_siginfo siginfo;
86                         int sig;
87                 } signal;
88                 struct {
89                         sd_event_child_handler_t callback;
90                         siginfo_t siginfo;
91                         pid_t pid;
92                         int options;
93                 } child;
94                 struct {
95                         sd_event_handler_t callback;
96                 } defer;
97                 struct {
98                         sd_event_handler_t callback;
99                         unsigned prioq_index;
100                 } exit;
101         };
102 };
103
104 struct sd_event {
105         unsigned n_ref;
106
107         int epoll_fd;
108         int signal_fd;
109         int realtime_fd;
110         int monotonic_fd;
111         int watchdog_fd;
112
113         Prioq *pending;
114         Prioq *prepare;
115
116         /* For both clocks we maintain two priority queues each, one
117          * ordered for the earliest times the events may be
118          * dispatched, and one ordered by the latest times they must
119          * have been dispatched. The range between the top entries in
120          * the two prioqs is the time window we can freely schedule
121          * wakeups in */
122         Prioq *monotonic_earliest;
123         Prioq *monotonic_latest;
124         Prioq *realtime_earliest;
125         Prioq *realtime_latest;
126
127         usec_t realtime_next, monotonic_next;
128         usec_t perturb;
129
130         sigset_t sigset;
131         sd_event_source **signal_sources;
132
133         Hashmap *child_sources;
134         unsigned n_enabled_child_sources;
135
136         Prioq *exit;
137
138         pid_t original_pid;
139
140         unsigned iteration;
141         dual_timestamp timestamp;
142         int state;
143
144         bool exit_requested:1;
145         bool need_process_child:1;
146         bool watchdog:1;
147
148         int exit_code;
149
150         pid_t tid;
151         sd_event **default_event_ptr;
152
153         usec_t watchdog_last, watchdog_period;
154 };
155
156 static int pending_prioq_compare(const void *a, const void *b) {
157         const sd_event_source *x = a, *y = b;
158
159         assert(x->pending);
160         assert(y->pending);
161
162         /* Enabled ones first */
163         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
164                 return -1;
165         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
166                 return 1;
167
168         /* Lower priority values first */
169         if (x->priority < y->priority)
170                 return -1;
171         if (x->priority > y->priority)
172                 return 1;
173
174         /* Older entries first */
175         if (x->pending_iteration < y->pending_iteration)
176                 return -1;
177         if (x->pending_iteration > y->pending_iteration)
178                 return 1;
179
180         /* Stability for the rest */
181         if (x < y)
182                 return -1;
183         if (x > y)
184                 return 1;
185
186         return 0;
187 }
188
189 static int prepare_prioq_compare(const void *a, const void *b) {
190         const sd_event_source *x = a, *y = b;
191
192         assert(x->prepare);
193         assert(y->prepare);
194
195         /* Move most recently prepared ones last, so that we can stop
196          * preparing as soon as we hit one that has already been
197          * prepared in the current iteration */
198         if (x->prepare_iteration < y->prepare_iteration)
199                 return -1;
200         if (x->prepare_iteration > y->prepare_iteration)
201                 return 1;
202
203         /* Enabled ones first */
204         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
205                 return -1;
206         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
207                 return 1;
208
209         /* Lower priority values first */
210         if (x->priority < y->priority)
211                 return -1;
212         if (x->priority > y->priority)
213                 return 1;
214
215         /* Stability for the rest */
216         if (x < y)
217                 return -1;
218         if (x > y)
219                 return 1;
220
221         return 0;
222 }
223
224 static int earliest_time_prioq_compare(const void *a, const void *b) {
225         const sd_event_source *x = a, *y = b;
226
227         assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
228         assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
229
230         /* Enabled ones first */
231         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
232                 return -1;
233         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
234                 return 1;
235
236         /* Move the pending ones to the end */
237         if (!x->pending && y->pending)
238                 return -1;
239         if (x->pending && !y->pending)
240                 return 1;
241
242         /* Order by time */
243         if (x->time.next < y->time.next)
244                 return -1;
245         if (x->time.next > y->time.next)
246                 return 1;
247
248         /* Stability for the rest */
249         if (x < y)
250                 return -1;
251         if (x > y)
252                 return 1;
253
254         return 0;
255 }
256
257 static int latest_time_prioq_compare(const void *a, const void *b) {
258         const sd_event_source *x = a, *y = b;
259
260         assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
261                (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
262
263         /* Enabled ones first */
264         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
265                 return -1;
266         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
267                 return 1;
268
269         /* Move the pending ones to the end */
270         if (!x->pending && y->pending)
271                 return -1;
272         if (x->pending && !y->pending)
273                 return 1;
274
275         /* Order by time */
276         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
277                 return -1;
278         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
279                 return 1;
280
281         /* Stability for the rest */
282         if (x < y)
283                 return -1;
284         if (x > y)
285                 return 1;
286
287         return 0;
288 }
289
290 static int exit_prioq_compare(const void *a, const void *b) {
291         const sd_event_source *x = a, *y = b;
292
293         assert(x->type == SOURCE_EXIT);
294         assert(y->type == SOURCE_EXIT);
295
296         /* Enabled ones first */
297         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
298                 return -1;
299         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
300                 return 1;
301
302         /* Lower priority values first */
303         if (x->priority < y->priority)
304                 return -1;
305         if (x->priority > y->priority)
306                 return 1;
307
308         /* Stability for the rest */
309         if (x < y)
310                 return -1;
311         if (x > y)
312                 return 1;
313
314         return 0;
315 }
316
317 static void event_free(sd_event *e) {
318         assert(e);
319
320         if (e->default_event_ptr)
321                 *(e->default_event_ptr) = NULL;
322
323         if (e->epoll_fd >= 0)
324                 close_nointr_nofail(e->epoll_fd);
325
326         if (e->signal_fd >= 0)
327                 close_nointr_nofail(e->signal_fd);
328
329         if (e->realtime_fd >= 0)
330                 close_nointr_nofail(e->realtime_fd);
331
332         if (e->monotonic_fd >= 0)
333                 close_nointr_nofail(e->monotonic_fd);
334
335         if (e->watchdog_fd >= 0)
336                 close_nointr_nofail(e->watchdog_fd);
337
338         prioq_free(e->pending);
339         prioq_free(e->prepare);
340         prioq_free(e->monotonic_earliest);
341         prioq_free(e->monotonic_latest);
342         prioq_free(e->realtime_earliest);
343         prioq_free(e->realtime_latest);
344         prioq_free(e->exit);
345
346         free(e->signal_sources);
347
348         hashmap_free(e->child_sources);
349         free(e);
350 }
351
352 _public_ int sd_event_new(sd_event** ret) {
353         sd_event *e;
354         int r;
355
356         assert_return(ret, -EINVAL);
357
358         e = new0(sd_event, 1);
359         if (!e)
360                 return -ENOMEM;
361
362         e->n_ref = 1;
363         e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
364         e->realtime_next = e->monotonic_next = (usec_t) -1;
365         e->original_pid = getpid();
366
367         assert_se(sigemptyset(&e->sigset) == 0);
368
369         e->pending = prioq_new(pending_prioq_compare);
370         if (!e->pending) {
371                 r = -ENOMEM;
372                 goto fail;
373         }
374
375         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
376         if (e->epoll_fd < 0) {
377                 r = -errno;
378                 goto fail;
379         }
380
381         *ret = e;
382         return 0;
383
384 fail:
385         event_free(e);
386         return r;
387 }
388
389 _public_ sd_event* sd_event_ref(sd_event *e) {
390         assert_return(e, NULL);
391
392         assert(e->n_ref >= 1);
393         e->n_ref++;
394
395         return e;
396 }
397
398 _public_ sd_event* sd_event_unref(sd_event *e) {
399
400         if (!e)
401                 return NULL;
402
403         assert(e->n_ref >= 1);
404         e->n_ref--;
405
406         if (e->n_ref <= 0)
407                 event_free(e);
408
409         return NULL;
410 }
411
412 static bool event_pid_changed(sd_event *e) {
413         assert(e);
414
415         /* We don't support people creating am event loop and keeping
416          * it around over a fork(). Let's complain. */
417
418         return e->original_pid != getpid();
419 }
420
421 static int source_io_unregister(sd_event_source *s) {
422         int r;
423
424         assert(s);
425         assert(s->type == SOURCE_IO);
426
427         if (!s->io.registered)
428                 return 0;
429
430         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
431         if (r < 0)
432                 return -errno;
433
434         s->io.registered = false;
435         return 0;
436 }
437
438 static int source_io_register(
439                 sd_event_source *s,
440                 int enabled,
441                 uint32_t events) {
442
443         struct epoll_event ev = {};
444         int r;
445
446         assert(s);
447         assert(s->type == SOURCE_IO);
448         assert(enabled != SD_EVENT_OFF);
449
450         ev.events = events;
451         ev.data.ptr = s;
452
453         if (enabled == SD_EVENT_ONESHOT)
454                 ev.events |= EPOLLONESHOT;
455
456         if (s->io.registered)
457                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
458         else
459                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
460
461         if (r < 0)
462                 return -errno;
463
464         s->io.registered = true;
465
466         return 0;
467 }
468
469 static void source_free(sd_event_source *s) {
470         assert(s);
471
472         if (s->event) {
473                 switch (s->type) {
474
475                 case SOURCE_IO:
476                         if (s->io.fd >= 0)
477                                 source_io_unregister(s);
478
479                         break;
480
481                 case SOURCE_MONOTONIC:
482                         prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
483                         prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
484                         break;
485
486                 case SOURCE_REALTIME:
487                         prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
488                         prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
489                         break;
490
491                 case SOURCE_SIGNAL:
492                         if (s->signal.sig > 0) {
493                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
494                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
495
496                                 if (s->event->signal_sources)
497                                         s->event->signal_sources[s->signal.sig] = NULL;
498                         }
499
500                         break;
501
502                 case SOURCE_CHILD:
503                         if (s->child.pid > 0) {
504                                 if (s->enabled != SD_EVENT_OFF) {
505                                         assert(s->event->n_enabled_child_sources > 0);
506                                         s->event->n_enabled_child_sources--;
507                                 }
508
509                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
510                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
511
512                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
513                         }
514
515                         break;
516
517                 case SOURCE_DEFER:
518                         /* nothing */
519                         break;
520
521                 case SOURCE_EXIT:
522                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
523                         break;
524
525                 case SOURCE_WATCHDOG:
526                         assert_not_reached("Wut? I shouldn't exist.");
527                 }
528
529                 if (s->pending)
530                         prioq_remove(s->event->pending, s, &s->pending_index);
531
532                 if (s->prepare)
533                         prioq_remove(s->event->prepare, s, &s->prepare_index);
534
535                 sd_event_unref(s->event);
536         }
537
538         free(s);
539 }
540
541 static int source_set_pending(sd_event_source *s, bool b) {
542         int r;
543
544         assert(s);
545         assert(s->type != SOURCE_EXIT);
546
547         if (s->pending == b)
548                 return 0;
549
550         s->pending = b;
551
552         if (b) {
553                 s->pending_iteration = s->event->iteration;
554
555                 r = prioq_put(s->event->pending, s, &s->pending_index);
556                 if (r < 0) {
557                         s->pending = false;
558                         return r;
559                 }
560         } else
561                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
562
563         if (s->type == SOURCE_REALTIME) {
564                 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
565                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
566         } else if (s->type == SOURCE_MONOTONIC) {
567                 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
568                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
569         }
570
571         return 0;
572 }
573
574 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
575         sd_event_source *s;
576
577         assert(e);
578
579         s = new0(sd_event_source, 1);
580         if (!s)
581                 return NULL;
582
583         s->n_ref = 1;
584         s->event = sd_event_ref(e);
585         s->type = type;
586         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
587
588         return s;
589 }
590
591 _public_ int sd_event_add_io(
592                 sd_event *e,
593                 int fd,
594                 uint32_t events,
595                 sd_event_io_handler_t callback,
596                 void *userdata,
597                 sd_event_source **ret) {
598
599         sd_event_source *s;
600         int r;
601
602         assert_return(e, -EINVAL);
603         assert_return(fd >= 0, -EINVAL);
604         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
605         assert_return(callback, -EINVAL);
606         assert_return(ret, -EINVAL);
607         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
608         assert_return(!event_pid_changed(e), -ECHILD);
609
610         s = source_new(e, SOURCE_IO);
611         if (!s)
612                 return -ENOMEM;
613
614         s->io.fd = fd;
615         s->io.events = events;
616         s->io.callback = callback;
617         s->userdata = userdata;
618         s->enabled = SD_EVENT_ON;
619
620         r = source_io_register(s, s->enabled, events);
621         if (r < 0) {
622                 source_free(s);
623                 return -errno;
624         }
625
626         *ret = s;
627         return 0;
628 }
629
630 static int event_setup_timer_fd(
631                 sd_event *e,
632                 EventSourceType type,
633                 int *timer_fd,
634                 clockid_t id) {
635
636         struct epoll_event ev = {};
637         int r, fd;
638         sd_id128_t bootid;
639
640         assert(e);
641         assert(timer_fd);
642
643         if (_likely_(*timer_fd >= 0))
644                 return 0;
645
646         fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
647         if (fd < 0)
648                 return -errno;
649
650         ev.events = EPOLLIN;
651         ev.data.ptr = INT_TO_PTR(type);
652
653         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
654         if (r < 0) {
655                 close_nointr_nofail(fd);
656                 return -errno;
657         }
658
659         /* When we sleep for longer, we try to realign the wakeup to
660            the same time wihtin each minute/second/250ms, so that
661            events all across the system can be coalesced into a single
662            CPU wakeup. However, let's take some system-specific
663            randomness for this value, so that in a network of systems
664            with synced clocks timer events are distributed a
665            bit. Here, we calculate a perturbation usec offset from the
666            boot ID. */
667
668         if (sd_id128_get_boot(&bootid) >= 0)
669                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
670
671         *timer_fd = fd;
672         return 0;
673 }
674
675 static int event_add_time_internal(
676                 sd_event *e,
677                 EventSourceType type,
678                 int *timer_fd,
679                 clockid_t id,
680                 Prioq **earliest,
681                 Prioq **latest,
682                 uint64_t usec,
683                 uint64_t accuracy,
684                 sd_event_time_handler_t callback,
685                 void *userdata,
686                 sd_event_source **ret) {
687
688         sd_event_source *s;
689         int r;
690
691         assert_return(e, -EINVAL);
692         assert_return(callback, -EINVAL);
693         assert_return(ret, -EINVAL);
694         assert_return(usec != (uint64_t) -1, -EINVAL);
695         assert_return(accuracy != (uint64_t) -1, -EINVAL);
696         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
697         assert_return(!event_pid_changed(e), -ECHILD);
698
699         assert(timer_fd);
700         assert(earliest);
701         assert(latest);
702
703         if (!*earliest) {
704                 *earliest = prioq_new(earliest_time_prioq_compare);
705                 if (!*earliest)
706                         return -ENOMEM;
707         }
708
709         if (!*latest) {
710                 *latest = prioq_new(latest_time_prioq_compare);
711                 if (!*latest)
712                         return -ENOMEM;
713         }
714
715         if (*timer_fd < 0) {
716                 r = event_setup_timer_fd(e, type, timer_fd, id);
717                 if (r < 0)
718                         return r;
719         }
720
721         s = source_new(e, type);
722         if (!s)
723                 return -ENOMEM;
724
725         s->time.next = usec;
726         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
727         s->time.callback = callback;
728         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
729         s->userdata = userdata;
730         s->enabled = SD_EVENT_ONESHOT;
731
732         r = prioq_put(*earliest, s, &s->time.earliest_index);
733         if (r < 0)
734                 goto fail;
735
736         r = prioq_put(*latest, s, &s->time.latest_index);
737         if (r < 0)
738                 goto fail;
739
740         *ret = s;
741         return 0;
742
743 fail:
744         source_free(s);
745         return r;
746 }
747
748 _public_ int sd_event_add_monotonic(sd_event *e,
749                                     uint64_t usec,
750                                     uint64_t accuracy,
751                                     sd_event_time_handler_t callback,
752                                     void *userdata,
753                                     sd_event_source **ret) {
754
755         return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
756 }
757
758 _public_ int sd_event_add_realtime(sd_event *e,
759                                    uint64_t usec,
760                                    uint64_t accuracy,
761                                    sd_event_time_handler_t callback,
762                                    void *userdata,
763                                    sd_event_source **ret) {
764
765         return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
766 }
767
768 static int event_update_signal_fd(sd_event *e) {
769         struct epoll_event ev = {};
770         bool add_to_epoll;
771         int r;
772
773         assert(e);
774
775         add_to_epoll = e->signal_fd < 0;
776
777         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
778         if (r < 0)
779                 return -errno;
780
781         e->signal_fd = r;
782
783         if (!add_to_epoll)
784                 return 0;
785
786         ev.events = EPOLLIN;
787         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
788
789         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
790         if (r < 0) {
791                 close_nointr_nofail(e->signal_fd);
792                 e->signal_fd = -1;
793
794                 return -errno;
795         }
796
797         return 0;
798 }
799
800 _public_ int sd_event_add_signal(
801                 sd_event *e,
802                 int sig,
803                 sd_event_signal_handler_t callback,
804                 void *userdata,
805                 sd_event_source **ret) {
806
807         sd_event_source *s;
808         int r;
809
810         assert_return(e, -EINVAL);
811         assert_return(sig > 0, -EINVAL);
812         assert_return(sig < _NSIG, -EINVAL);
813         assert_return(callback, -EINVAL);
814         assert_return(ret, -EINVAL);
815         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
816         assert_return(!event_pid_changed(e), -ECHILD);
817
818         if (!e->signal_sources) {
819                 e->signal_sources = new0(sd_event_source*, _NSIG);
820                 if (!e->signal_sources)
821                         return -ENOMEM;
822         } else if (e->signal_sources[sig])
823                 return -EBUSY;
824
825         s = source_new(e, SOURCE_SIGNAL);
826         if (!s)
827                 return -ENOMEM;
828
829         s->signal.sig = sig;
830         s->signal.callback = callback;
831         s->userdata = userdata;
832         s->enabled = SD_EVENT_ON;
833
834         e->signal_sources[sig] = s;
835         assert_se(sigaddset(&e->sigset, sig) == 0);
836
837         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
838                 r = event_update_signal_fd(e);
839                 if (r < 0) {
840                         source_free(s);
841                         return r;
842                 }
843         }
844
845         *ret = s;
846         return 0;
847 }
848
849 _public_ int sd_event_add_child(
850                 sd_event *e,
851                 pid_t pid,
852                 int options,
853                 sd_event_child_handler_t callback,
854                 void *userdata,
855                 sd_event_source **ret) {
856
857         sd_event_source *s;
858         int r;
859
860         assert_return(e, -EINVAL);
861         assert_return(pid > 1, -EINVAL);
862         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
863         assert_return(options != 0, -EINVAL);
864         assert_return(callback, -EINVAL);
865         assert_return(ret, -EINVAL);
866         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
867         assert_return(!event_pid_changed(e), -ECHILD);
868
869         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
870         if (r < 0)
871                 return r;
872
873         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
874                 return -EBUSY;
875
876         s = source_new(e, SOURCE_CHILD);
877         if (!s)
878                 return -ENOMEM;
879
880         s->child.pid = pid;
881         s->child.options = options;
882         s->child.callback = callback;
883         s->userdata = userdata;
884         s->enabled = SD_EVENT_ONESHOT;
885
886         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
887         if (r < 0) {
888                 source_free(s);
889                 return r;
890         }
891
892         e->n_enabled_child_sources ++;
893
894         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
895
896         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
897                 r = event_update_signal_fd(e);
898                 if (r < 0) {
899                         source_free(s);
900                         return -errno;
901                 }
902         }
903
904         e->need_process_child = true;
905
906         *ret = s;
907         return 0;
908 }
909
910 _public_ int sd_event_add_defer(
911                 sd_event *e,
912                 sd_event_handler_t callback,
913                 void *userdata,
914                 sd_event_source **ret) {
915
916         sd_event_source *s;
917         int r;
918
919         assert_return(e, -EINVAL);
920         assert_return(callback, -EINVAL);
921         assert_return(ret, -EINVAL);
922         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
923         assert_return(!event_pid_changed(e), -ECHILD);
924
925         s = source_new(e, SOURCE_DEFER);
926         if (!s)
927                 return -ENOMEM;
928
929         s->defer.callback = callback;
930         s->userdata = userdata;
931         s->enabled = SD_EVENT_ONESHOT;
932
933         r = source_set_pending(s, true);
934         if (r < 0) {
935                 source_free(s);
936                 return r;
937         }
938
939         *ret = s;
940         return 0;
941 }
942
943 _public_ int sd_event_add_exit(
944                 sd_event *e,
945                 sd_event_handler_t callback,
946                 void *userdata,
947                 sd_event_source **ret) {
948
949         sd_event_source *s;
950         int r;
951
952         assert_return(e, -EINVAL);
953         assert_return(callback, -EINVAL);
954         assert_return(ret, -EINVAL);
955         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
956         assert_return(!event_pid_changed(e), -ECHILD);
957
958         if (!e->exit) {
959                 e->exit = prioq_new(exit_prioq_compare);
960                 if (!e->exit)
961                         return -ENOMEM;
962         }
963
964         s = source_new(e, SOURCE_EXIT);
965         if (!s)
966                 return -ENOMEM;
967
968         s->exit.callback = callback;
969         s->userdata = userdata;
970         s->exit.prioq_index = PRIOQ_IDX_NULL;
971         s->enabled = SD_EVENT_ONESHOT;
972
973         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
974         if (r < 0) {
975                 source_free(s);
976                 return r;
977         }
978
979         *ret = s;
980         return 0;
981 }
982
983 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
984         assert_return(s, NULL);
985
986         assert(s->n_ref >= 1);
987         s->n_ref++;
988
989         return s;
990 }
991
992 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
993
994         if (!s)
995                 return NULL;
996
997         assert(s->n_ref >= 1);
998         s->n_ref--;
999
1000         if (s->n_ref <= 0) {
1001                 /* Here's a special hack: when we are called from a
1002                  * dispatch handler we won't free the event source
1003                  * immediately, but we will detach the fd from the
1004                  * epoll. This way it is safe for the caller to unref
1005                  * the event source and immediately close the fd, but
1006                  * we still retain a valid event source object after
1007                  * the callback. */
1008
1009                 if (s->dispatching) {
1010                         if (s->type == SOURCE_IO)
1011                                 source_io_unregister(s);
1012                 } else
1013                         source_free(s);
1014         }
1015
1016         return NULL;
1017 }
1018
1019 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1020         assert_return(s, NULL);
1021
1022         return s->event;
1023 }
1024
1025 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1026         assert_return(s, -EINVAL);
1027         assert_return(s->type != SOURCE_EXIT, -EDOM);
1028         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1029         assert_return(!event_pid_changed(s->event), -ECHILD);
1030
1031         return s->pending;
1032 }
1033
1034 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1035         assert_return(s, -EINVAL);
1036         assert_return(s->type == SOURCE_IO, -EDOM);
1037         assert_return(!event_pid_changed(s->event), -ECHILD);
1038
1039         return s->io.fd;
1040 }
1041
1042 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1043         int r;
1044
1045         assert_return(s, -EINVAL);
1046         assert_return(fd >= 0, -EINVAL);
1047         assert_return(s->type == SOURCE_IO, -EDOM);
1048         assert_return(!event_pid_changed(s->event), -ECHILD);
1049
1050         if (s->io.fd == fd)
1051                 return 0;
1052
1053         if (s->enabled == SD_EVENT_OFF) {
1054                 s->io.fd = fd;
1055                 s->io.registered = false;
1056         } else {
1057                 int saved_fd;
1058
1059                 saved_fd = s->io.fd;
1060                 assert(s->io.registered);
1061
1062                 s->io.fd = fd;
1063                 s->io.registered = false;
1064
1065                 r = source_io_register(s, s->enabled, s->io.events);
1066                 if (r < 0) {
1067                         s->io.fd = saved_fd;
1068                         s->io.registered = true;
1069                         return r;
1070                 }
1071
1072                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1073         }
1074
1075         return 0;
1076 }
1077
1078 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1079         assert_return(s, -EINVAL);
1080         assert_return(events, -EINVAL);
1081         assert_return(s->type == SOURCE_IO, -EDOM);
1082         assert_return(!event_pid_changed(s->event), -ECHILD);
1083
1084         *events = s->io.events;
1085         return 0;
1086 }
1087
1088 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1089         int r;
1090
1091         assert_return(s, -EINVAL);
1092         assert_return(s->type == SOURCE_IO, -EDOM);
1093         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1094         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1095         assert_return(!event_pid_changed(s->event), -ECHILD);
1096
1097         if (s->io.events == events)
1098                 return 0;
1099
1100         if (s->enabled != SD_EVENT_OFF) {
1101                 r = source_io_register(s, s->enabled, events);
1102                 if (r < 0)
1103                         return r;
1104         }
1105
1106         s->io.events = events;
1107         source_set_pending(s, false);
1108
1109         return 0;
1110 }
1111
1112 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1113         assert_return(s, -EINVAL);
1114         assert_return(revents, -EINVAL);
1115         assert_return(s->type == SOURCE_IO, -EDOM);
1116         assert_return(s->pending, -ENODATA);
1117         assert_return(!event_pid_changed(s->event), -ECHILD);
1118
1119         *revents = s->io.revents;
1120         return 0;
1121 }
1122
1123 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1124         assert_return(s, -EINVAL);
1125         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1126         assert_return(!event_pid_changed(s->event), -ECHILD);
1127
1128         return s->signal.sig;
1129 }
1130
1131 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1132         assert_return(s, -EINVAL);
1133         assert_return(!event_pid_changed(s->event), -ECHILD);
1134
1135         return s->priority;
1136 }
1137
1138 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1139         assert_return(s, -EINVAL);
1140         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1141         assert_return(!event_pid_changed(s->event), -ECHILD);
1142
1143         if (s->priority == priority)
1144                 return 0;
1145
1146         s->priority = priority;
1147
1148         if (s->pending)
1149                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1150
1151         if (s->prepare)
1152                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1153
1154         if (s->type == SOURCE_EXIT)
1155                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1156
1157         return 0;
1158 }
1159
1160 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1161         assert_return(s, -EINVAL);
1162         assert_return(m, -EINVAL);
1163         assert_return(!event_pid_changed(s->event), -ECHILD);
1164
1165         *m = s->enabled;
1166         return 0;
1167 }
1168
1169 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1170         int r;
1171
1172         assert_return(s, -EINVAL);
1173         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1174         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1175         assert_return(!event_pid_changed(s->event), -ECHILD);
1176
1177         if (s->enabled == m)
1178                 return 0;
1179
1180         if (m == SD_EVENT_OFF) {
1181
1182                 switch (s->type) {
1183
1184                 case SOURCE_IO:
1185                         r = source_io_unregister(s);
1186                         if (r < 0)
1187                                 return r;
1188
1189                         s->enabled = m;
1190                         break;
1191
1192                 case SOURCE_MONOTONIC:
1193                         s->enabled = m;
1194                         prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1195                         prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1196                         break;
1197
1198                 case SOURCE_REALTIME:
1199                         s->enabled = m;
1200                         prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1201                         prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1202                         break;
1203
1204                 case SOURCE_SIGNAL:
1205                         s->enabled = m;
1206                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1207                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1208                                 event_update_signal_fd(s->event);
1209                         }
1210
1211                         break;
1212
1213                 case SOURCE_CHILD:
1214                         s->enabled = m;
1215
1216                         assert(s->event->n_enabled_child_sources > 0);
1217                         s->event->n_enabled_child_sources--;
1218
1219                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1220                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1221                                 event_update_signal_fd(s->event);
1222                         }
1223
1224                         break;
1225
1226                 case SOURCE_EXIT:
1227                         s->enabled = m;
1228                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1229                         break;
1230
1231                 case SOURCE_DEFER:
1232                         s->enabled = m;
1233                         break;
1234
1235                 case SOURCE_WATCHDOG:
1236                         assert_not_reached("Wut? I shouldn't exist.");
1237                 }
1238
1239         } else {
1240                 switch (s->type) {
1241
1242                 case SOURCE_IO:
1243                         r = source_io_register(s, m, s->io.events);
1244                         if (r < 0)
1245                                 return r;
1246
1247                         s->enabled = m;
1248                         break;
1249
1250                 case SOURCE_MONOTONIC:
1251                         s->enabled = m;
1252                         prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1253                         prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1254                         break;
1255
1256                 case SOURCE_REALTIME:
1257                         s->enabled = m;
1258                         prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1259                         prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1260                         break;
1261
1262                 case SOURCE_SIGNAL:
1263                         s->enabled = m;
1264
1265                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1266                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1267                                 event_update_signal_fd(s->event);
1268                         }
1269                         break;
1270
1271                 case SOURCE_CHILD:
1272                         s->enabled = m;
1273
1274                         if (s->enabled == SD_EVENT_OFF) {
1275                                 s->event->n_enabled_child_sources++;
1276
1277                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1278                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1279                                         event_update_signal_fd(s->event);
1280                                 }
1281                         }
1282                         break;
1283
1284                 case SOURCE_EXIT:
1285                         s->enabled = m;
1286                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1287                         break;
1288
1289                 case SOURCE_DEFER:
1290                         s->enabled = m;
1291                         break;
1292
1293                 case SOURCE_WATCHDOG:
1294                         assert_not_reached("Wut? I shouldn't exist.");
1295                 }
1296         }
1297
1298         if (s->pending)
1299                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1300
1301         if (s->prepare)
1302                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1303
1304         return 0;
1305 }
1306
1307 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1308         assert_return(s, -EINVAL);
1309         assert_return(usec, -EINVAL);
1310         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1311         assert_return(!event_pid_changed(s->event), -ECHILD);
1312
1313         *usec = s->time.next;
1314         return 0;
1315 }
1316
1317 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1318         assert_return(s, -EINVAL);
1319         assert_return(usec != (uint64_t) -1, -EINVAL);
1320         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1321         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1322         assert_return(!event_pid_changed(s->event), -ECHILD);
1323
1324         s->time.next = usec;
1325
1326         source_set_pending(s, false);
1327
1328         if (s->type == SOURCE_REALTIME) {
1329                 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1330                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1331         } else {
1332                 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1333                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1334         }
1335
1336         return 0;
1337 }
1338
1339 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1340         assert_return(s, -EINVAL);
1341         assert_return(usec, -EINVAL);
1342         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1343         assert_return(!event_pid_changed(s->event), -ECHILD);
1344
1345         *usec = s->time.accuracy;
1346         return 0;
1347 }
1348
1349 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1350         assert_return(s, -EINVAL);
1351         assert_return(usec != (uint64_t) -1, -EINVAL);
1352         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1353         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1354         assert_return(!event_pid_changed(s->event), -ECHILD);
1355
1356         if (usec == 0)
1357                 usec = DEFAULT_ACCURACY_USEC;
1358
1359         s->time.accuracy = usec;
1360
1361         source_set_pending(s, false);
1362
1363         if (s->type == SOURCE_REALTIME)
1364                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1365         else
1366                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1367
1368         return 0;
1369 }
1370
1371 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1372         assert_return(s, -EINVAL);
1373         assert_return(pid, -EINVAL);
1374         assert_return(s->type == SOURCE_CHILD, -EDOM);
1375         assert_return(!event_pid_changed(s->event), -ECHILD);
1376
1377         *pid = s->child.pid;
1378         return 0;
1379 }
1380
1381 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1382         int r;
1383
1384         assert_return(s, -EINVAL);
1385         assert_return(s->type != SOURCE_EXIT, -EDOM);
1386         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1387         assert_return(!event_pid_changed(s->event), -ECHILD);
1388
1389         if (s->prepare == callback)
1390                 return 0;
1391
1392         if (callback && s->prepare) {
1393                 s->prepare = callback;
1394                 return 0;
1395         }
1396
1397         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1398         if (r < 0)
1399                 return r;
1400
1401         s->prepare = callback;
1402
1403         if (callback) {
1404                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1405                 if (r < 0)
1406                         return r;
1407         } else
1408                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1409
1410         return 0;
1411 }
1412
1413 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1414         assert_return(s, NULL);
1415
1416         return s->userdata;
1417 }
1418
1419 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1420         void *ret;
1421
1422         assert_return(s, NULL);
1423
1424         ret = s->userdata;
1425         s->userdata = userdata;
1426
1427         return ret;
1428 }
1429
1430 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1431         usec_t c;
1432         assert(e);
1433         assert(a <= b);
1434
1435         if (a <= 0)
1436                 return 0;
1437
1438         if (b <= a + 1)
1439                 return a;
1440
1441         /*
1442           Find a good time to wake up again between times a and b. We
1443           have two goals here:
1444
1445           a) We want to wake up as seldom as possible, hence prefer
1446              later times over earlier times.
1447
1448           b) But if we have to wake up, then let's make sure to
1449              dispatch as much as possible on the entire system.
1450
1451           We implement this by waking up everywhere at the same time
1452           within any given minute if we can, synchronised via the
1453           perturbation value determined from the boot ID. If we can't,
1454           then we try to find the same spot in every 10s, then 1s and
1455           then 250ms step. Otherwise, we pick the last possible time
1456           to wake up.
1457         */
1458
1459         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1460         if (c >= b) {
1461                 if (_unlikely_(c < USEC_PER_MINUTE))
1462                         return b;
1463
1464                 c -= USEC_PER_MINUTE;
1465         }
1466
1467         if (c >= a)
1468                 return c;
1469
1470         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1471         if (c >= b) {
1472                 if (_unlikely_(c < USEC_PER_SEC*10))
1473                         return b;
1474
1475                 c -= USEC_PER_SEC*10;
1476         }
1477
1478         if (c >= a)
1479                 return c;
1480
1481         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1482         if (c >= b) {
1483                 if (_unlikely_(c < USEC_PER_SEC))
1484                         return b;
1485
1486                 c -= USEC_PER_SEC;
1487         }
1488
1489         if (c >= a)
1490                 return c;
1491
1492         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1493         if (c >= b) {
1494                 if (_unlikely_(c < USEC_PER_MSEC*250))
1495                         return b;
1496
1497                 c -= USEC_PER_MSEC*250;
1498         }
1499
1500         if (c >= a)
1501                 return c;
1502
1503         return b;
1504 }
1505
1506 static int event_arm_timer(
1507                 sd_event *e,
1508                 int timer_fd,
1509                 Prioq *earliest,
1510                 Prioq *latest,
1511                 usec_t *next) {
1512
1513         struct itimerspec its = {};
1514         sd_event_source *a, *b;
1515         usec_t t;
1516         int r;
1517
1518         assert(e);
1519         assert(next);
1520
1521         a = prioq_peek(earliest);
1522         if (!a || a->enabled == SD_EVENT_OFF) {
1523
1524                 if (timer_fd < 0)
1525                         return 0;
1526
1527                 if (*next == (usec_t) -1)
1528                         return 0;
1529
1530                 /* disarm */
1531                 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1532                 if (r < 0)
1533                         return r;
1534
1535                 *next = (usec_t) -1;
1536
1537                 return 0;
1538         }
1539
1540         b = prioq_peek(latest);
1541         assert_se(b && b->enabled != SD_EVENT_OFF);
1542
1543         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1544         if (*next == t)
1545                 return 0;
1546
1547         assert_se(timer_fd >= 0);
1548
1549         if (t == 0) {
1550                 /* We don' want to disarm here, just mean some time looooong ago. */
1551                 its.it_value.tv_sec = 0;
1552                 its.it_value.tv_nsec = 1;
1553         } else
1554                 timespec_store(&its.it_value, t);
1555
1556         r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1557         if (r < 0)
1558                 return -errno;
1559
1560         *next = t;
1561         return 0;
1562 }
1563
1564 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1565         assert(e);
1566         assert(s);
1567         assert(s->type == SOURCE_IO);
1568
1569         s->io.revents = events;
1570
1571         return source_set_pending(s, true);
1572 }
1573
1574 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1575         uint64_t x;
1576         ssize_t ss;
1577
1578         assert(e);
1579         assert(fd >= 0);
1580
1581         assert_return(events == EPOLLIN, -EIO);
1582
1583         ss = read(fd, &x, sizeof(x));
1584         if (ss < 0) {
1585                 if (errno == EAGAIN || errno == EINTR)
1586                         return 0;
1587
1588                 return -errno;
1589         }
1590
1591         if (ss != sizeof(x))
1592                 return -EIO;
1593
1594         if (next)
1595                 *next = (usec_t) -1;
1596
1597         return 0;
1598 }
1599
1600 static int process_timer(
1601                 sd_event *e,
1602                 usec_t n,
1603                 Prioq *earliest,
1604                 Prioq *latest) {
1605
1606         sd_event_source *s;
1607         int r;
1608
1609         assert(e);
1610
1611         for (;;) {
1612                 s = prioq_peek(earliest);
1613                 if (!s ||
1614                     s->time.next > n ||
1615                     s->enabled == SD_EVENT_OFF ||
1616                     s->pending)
1617                         break;
1618
1619                 r = source_set_pending(s, true);
1620                 if (r < 0)
1621                         return r;
1622
1623                 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1624                 prioq_reshuffle(latest, s, &s->time.latest_index);
1625         }
1626
1627         return 0;
1628 }
1629
1630 static int process_child(sd_event *e) {
1631         sd_event_source *s;
1632         Iterator i;
1633         int r;
1634
1635         assert(e);
1636
1637         e->need_process_child = false;
1638
1639         /*
1640            So, this is ugly. We iteratively invoke waitid() with P_PID
1641            + WNOHANG for each PID we wait for, instead of using
1642            P_ALL. This is because we only want to get child
1643            information of very specific child processes, and not all
1644            of them. We might not have processed the SIGCHLD even of a
1645            previous invocation and we don't want to maintain a
1646            unbounded *per-child* event queue, hence we really don't
1647            want anything flushed out of the kernel's queue that we
1648            don't care about. Since this is O(n) this means that if you
1649            have a lot of processes you probably want to handle SIGCHLD
1650            yourself.
1651
1652            We do not reap the children here (by using WNOWAIT), this
1653            is only done after the event source is dispatched so that
1654            the callback still sees the process as a zombie.
1655         */
1656
1657         HASHMAP_FOREACH(s, e->child_sources, i) {
1658                 assert(s->type == SOURCE_CHILD);
1659
1660                 if (s->pending)
1661                         continue;
1662
1663                 if (s->enabled == SD_EVENT_OFF)
1664                         continue;
1665
1666                 zero(s->child.siginfo);
1667                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1668                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1669                 if (r < 0)
1670                         return -errno;
1671
1672                 if (s->child.siginfo.si_pid != 0) {
1673                         bool zombie =
1674                                 s->child.siginfo.si_code == CLD_EXITED ||
1675                                 s->child.siginfo.si_code == CLD_KILLED ||
1676                                 s->child.siginfo.si_code == CLD_DUMPED;
1677
1678                         if (!zombie && (s->child.options & WEXITED)) {
1679                                 /* If the child isn't dead then let's
1680                                  * immediately remove the state change
1681                                  * from the queue, since there's no
1682                                  * benefit in leaving it queued */
1683
1684                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1685                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1686                         }
1687
1688                         r = source_set_pending(s, true);
1689                         if (r < 0)
1690                                 return r;
1691                 }
1692         }
1693
1694         return 0;
1695 }
1696
1697 static int process_signal(sd_event *e, uint32_t events) {
1698         bool read_one = false;
1699         int r;
1700
1701         assert(e);
1702         assert(e->signal_sources);
1703
1704         assert_return(events == EPOLLIN, -EIO);
1705
1706         for (;;) {
1707                 struct signalfd_siginfo si;
1708                 ssize_t ss;
1709                 sd_event_source *s;
1710
1711                 ss = read(e->signal_fd, &si, sizeof(si));
1712                 if (ss < 0) {
1713                         if (errno == EAGAIN || errno == EINTR)
1714                                 return read_one;
1715
1716                         return -errno;
1717                 }
1718
1719                 if (ss != sizeof(si))
1720                         return -EIO;
1721
1722                 read_one = true;
1723
1724                 s = e->signal_sources[si.ssi_signo];
1725                 if (si.ssi_signo == SIGCHLD) {
1726                         r = process_child(e);
1727                         if (r < 0)
1728                                 return r;
1729                         if (r > 0 || !s)
1730                                 continue;
1731                 } else
1732                         if (!s)
1733                                 return -EIO;
1734
1735                 s->signal.siginfo = si;
1736                 r = source_set_pending(s, true);
1737                 if (r < 0)
1738                         return r;
1739         }
1740
1741         return 0;
1742 }
1743
1744 static int source_dispatch(sd_event_source *s) {
1745         int r = 0;
1746
1747         assert(s);
1748         assert(s->pending || s->type == SOURCE_EXIT);
1749
1750         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1751                 r = source_set_pending(s, false);
1752                 if (r < 0)
1753                         return r;
1754         }
1755
1756         if (s->enabled == SD_EVENT_ONESHOT) {
1757                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1758                 if (r < 0)
1759                         return r;
1760         }
1761
1762         s->dispatching = true;
1763
1764         switch (s->type) {
1765
1766         case SOURCE_IO:
1767                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1768                 break;
1769
1770         case SOURCE_MONOTONIC:
1771                 r = s->time.callback(s, s->time.next, s->userdata);
1772                 break;
1773
1774         case SOURCE_REALTIME:
1775                 r = s->time.callback(s, s->time.next, s->userdata);
1776                 break;
1777
1778         case SOURCE_SIGNAL:
1779                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1780                 break;
1781
1782         case SOURCE_CHILD: {
1783                 bool zombie;
1784
1785                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1786                          s->child.siginfo.si_code == CLD_KILLED ||
1787                          s->child.siginfo.si_code == CLD_DUMPED;
1788
1789                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1790
1791                 /* Now, reap the PID for good. */
1792                 if (zombie)
1793                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1794
1795                 break;
1796         }
1797
1798         case SOURCE_DEFER:
1799                 r = s->defer.callback(s, s->userdata);
1800                 break;
1801
1802         case SOURCE_EXIT:
1803                 r = s->exit.callback(s, s->userdata);
1804                 break;
1805
1806         case SOURCE_WATCHDOG:
1807                 assert_not_reached("Wut? I shouldn't exist.");
1808         }
1809
1810         s->dispatching = false;
1811
1812         if (r < 0)
1813                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1814
1815         if (s->n_ref == 0)
1816                 source_free(s);
1817         else if (r < 0)
1818                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1819
1820         return 1;
1821 }
1822
1823 static int event_prepare(sd_event *e) {
1824         int r;
1825
1826         assert(e);
1827
1828         for (;;) {
1829                 sd_event_source *s;
1830
1831                 s = prioq_peek(e->prepare);
1832                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1833                         break;
1834
1835                 s->prepare_iteration = e->iteration;
1836                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1837                 if (r < 0)
1838                         return r;
1839
1840                 assert(s->prepare);
1841
1842                 s->dispatching = true;
1843                 r = s->prepare(s, s->userdata);
1844                 s->dispatching = false;
1845
1846                 if (r < 0)
1847                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
1848
1849                 if (s->n_ref == 0)
1850                         source_free(s);
1851                 else if (r < 0)
1852                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
1853         }
1854
1855         return 0;
1856 }
1857
1858 static int dispatch_exit(sd_event *e) {
1859         sd_event_source *p;
1860         int r;
1861
1862         assert(e);
1863
1864         p = prioq_peek(e->exit);
1865         if (!p || p->enabled == SD_EVENT_OFF) {
1866                 e->state = SD_EVENT_FINISHED;
1867                 return 0;
1868         }
1869
1870         sd_event_ref(e);
1871         e->iteration++;
1872         e->state = SD_EVENT_EXITING;
1873
1874         r = source_dispatch(p);
1875
1876         e->state = SD_EVENT_PASSIVE;
1877         sd_event_unref(e);
1878
1879         return r;
1880 }
1881
1882 static sd_event_source* event_next_pending(sd_event *e) {
1883         sd_event_source *p;
1884
1885         assert(e);
1886
1887         p = prioq_peek(e->pending);
1888         if (!p)
1889                 return NULL;
1890
1891         if (p->enabled == SD_EVENT_OFF)
1892                 return NULL;
1893
1894         return p;
1895 }
1896
1897 static int arm_watchdog(sd_event *e) {
1898         struct itimerspec its = {};
1899         usec_t t;
1900         int r;
1901
1902         assert(e);
1903         assert(e->watchdog_fd >= 0);
1904
1905         t = sleep_between(e,
1906                           e->watchdog_last + (e->watchdog_period / 2),
1907                           e->watchdog_last + (e->watchdog_period * 3 / 4));
1908
1909         timespec_store(&its.it_value, t);
1910
1911         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
1912         if (r < 0)
1913                 return -errno;
1914
1915         return 0;
1916 }
1917
1918 static int process_watchdog(sd_event *e) {
1919         assert(e);
1920
1921         if (!e->watchdog)
1922                 return 0;
1923
1924         /* Don't notify watchdog too often */
1925         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
1926                 return 0;
1927
1928         sd_notify(false, "WATCHDOG=1");
1929         e->watchdog_last = e->timestamp.monotonic;
1930
1931         return arm_watchdog(e);
1932 }
1933
1934 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1935         struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1936         sd_event_source *p;
1937         int r, i, m;
1938
1939         assert_return(e, -EINVAL);
1940         assert_return(!event_pid_changed(e), -ECHILD);
1941         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1942         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1943
1944         if (e->exit_requested)
1945                 return dispatch_exit(e);
1946
1947         sd_event_ref(e);
1948         e->iteration++;
1949         e->state = SD_EVENT_RUNNING;
1950
1951         r = event_prepare(e);
1952         if (r < 0)
1953                 goto finish;
1954
1955         r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1956         if (r < 0)
1957                 goto finish;
1958
1959         r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1960         if (r < 0)
1961                 goto finish;
1962
1963         if (event_next_pending(e) || e->need_process_child)
1964                 timeout = 0;
1965
1966         m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1967                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1968         if (m < 0) {
1969                 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1970                 goto finish;
1971         }
1972
1973         dual_timestamp_get(&e->timestamp);
1974
1975         for (i = 0; i < m; i++) {
1976
1977                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1978                         r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1979                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1980                         r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1981                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1982                         r = process_signal(e, ev_queue[i].events);
1983                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
1984                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
1985                 else
1986                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1987
1988                 if (r < 0)
1989                         goto finish;
1990         }
1991
1992         r = process_watchdog(e);
1993         if (r < 0)
1994                 goto finish;
1995
1996         r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1997         if (r < 0)
1998                 goto finish;
1999
2000         r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
2001         if (r < 0)
2002                 goto finish;
2003
2004         if (e->need_process_child) {
2005                 r = process_child(e);
2006                 if (r < 0)
2007                         goto finish;
2008         }
2009
2010         p = event_next_pending(e);
2011         if (!p) {
2012                 r = 0;
2013                 goto finish;
2014         }
2015
2016         r = source_dispatch(p);
2017
2018 finish:
2019         e->state = SD_EVENT_PASSIVE;
2020         sd_event_unref(e);
2021
2022         return r;
2023 }
2024
2025 _public_ int sd_event_loop(sd_event *e) {
2026         int r;
2027
2028         assert_return(e, -EINVAL);
2029         assert_return(!event_pid_changed(e), -ECHILD);
2030         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2031
2032         sd_event_ref(e);
2033
2034         while (e->state != SD_EVENT_FINISHED) {
2035                 r = sd_event_run(e, (uint64_t) -1);
2036                 if (r < 0)
2037                         goto finish;
2038         }
2039
2040         r = e->exit_code;
2041
2042 finish:
2043         sd_event_unref(e);
2044         return r;
2045 }
2046
2047 _public_ int sd_event_get_state(sd_event *e) {
2048         assert_return(e, -EINVAL);
2049         assert_return(!event_pid_changed(e), -ECHILD);
2050
2051         return e->state;
2052 }
2053
2054 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2055         assert_return(e, -EINVAL);
2056         assert_return(code, -EINVAL);
2057         assert_return(!event_pid_changed(e), -ECHILD);
2058
2059         if (!e->exit_requested)
2060                 return -ENODATA;
2061
2062         *code = e->exit_code;
2063         return 0;
2064 }
2065
2066 _public_ int sd_event_exit(sd_event *e, int code) {
2067         assert_return(e, -EINVAL);
2068         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2069         assert_return(!event_pid_changed(e), -ECHILD);
2070
2071         e->exit_requested = true;
2072         e->exit_code = code;
2073
2074         return 0;
2075 }
2076
2077 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
2078         assert_return(e, -EINVAL);
2079         assert_return(usec, -EINVAL);
2080         assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2081         assert_return(!event_pid_changed(e), -ECHILD);
2082
2083         *usec = e->timestamp.realtime;
2084         return 0;
2085 }
2086
2087 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
2088         assert_return(e, -EINVAL);
2089         assert_return(usec, -EINVAL);
2090         assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2091         assert_return(!event_pid_changed(e), -ECHILD);
2092
2093         *usec = e->timestamp.monotonic;
2094         return 0;
2095 }
2096
2097 _public_ int sd_event_default(sd_event **ret) {
2098
2099         static __thread sd_event *default_event = NULL;
2100         sd_event *e;
2101         int r;
2102
2103         if (!ret)
2104                 return !!default_event;
2105
2106         if (default_event) {
2107                 *ret = sd_event_ref(default_event);
2108                 return 0;
2109         }
2110
2111         r = sd_event_new(&e);
2112         if (r < 0)
2113                 return r;
2114
2115         e->default_event_ptr = &default_event;
2116         e->tid = gettid();
2117         default_event = e;
2118
2119         *ret = e;
2120         return 1;
2121 }
2122
2123 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2124         assert_return(e, -EINVAL);
2125         assert_return(tid, -EINVAL);
2126         assert_return(!event_pid_changed(e), -ECHILD);
2127
2128         if (e->tid != 0) {
2129                 *tid = e->tid;
2130                 return 0;
2131         }
2132
2133         return -ENXIO;
2134 }
2135
2136 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2137         int r;
2138
2139         assert_return(e, -EINVAL);
2140         assert_return(!event_pid_changed(e), -ECHILD);
2141
2142         if (e->watchdog == !!b)
2143                 return e->watchdog;
2144
2145         if (b) {
2146                 struct epoll_event ev = {};
2147                 const char *env;
2148
2149                 env = getenv("WATCHDOG_USEC");
2150                 if (!env)
2151                         return false;
2152
2153                 r = safe_atou64(env, &e->watchdog_period);
2154                 if (r < 0)
2155                         return r;
2156                 if (e->watchdog_period <= 0)
2157                         return -EIO;
2158
2159                 /* Issue first ping immediately */
2160                 sd_notify(false, "WATCHDOG=1");
2161                 e->watchdog_last = now(CLOCK_MONOTONIC);
2162
2163                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2164                 if (e->watchdog_fd < 0)
2165                         return -errno;
2166
2167                 r = arm_watchdog(e);
2168                 if (r < 0)
2169                         goto fail;
2170
2171                 ev.events = EPOLLIN;
2172                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2173
2174                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2175                 if (r < 0) {
2176                         r = -errno;
2177                         goto fail;
2178                 }
2179
2180         } else {
2181                 if (e->watchdog_fd >= 0) {
2182                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2183                         close_nointr_nofail(e->watchdog_fd);
2184                         e->watchdog_fd = -1;
2185                 }
2186         }
2187
2188         e->watchdog = !!b;
2189         return e->watchdog;
2190
2191 fail:
2192         close_nointr_nofail(e->watchdog_fd);
2193         e->watchdog_fd = -1;
2194         return r;
2195 }
2196
2197 _public_ int sd_event_get_watchdog(sd_event *e) {
2198         assert_return(e, -EINVAL);
2199         assert_return(!event_pid_changed(e), -ECHILD);
2200
2201         return e->watchdog;
2202 }