chiark / gitweb /
libsystemd-bus: the same error codes for sd_bus_release_name() (for kdbus and dbus1)
[elogind.git] / src / libsystemd-bus / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34
35 #include "sd-event.h"
36
37 #define EPOLL_QUEUE_MAX 64
38 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
39
40 typedef enum EventSourceType {
41         SOURCE_IO,
42         SOURCE_MONOTONIC,
43         SOURCE_REALTIME,
44         SOURCE_SIGNAL,
45         SOURCE_CHILD,
46         SOURCE_DEFER,
47         SOURCE_EXIT,
48         SOURCE_WATCHDOG
49 } EventSourceType;
50
51 struct sd_event_source {
52         unsigned n_ref;
53
54         sd_event *event;
55         void *userdata;
56         sd_event_handler_t prepare;
57
58         EventSourceType type:4;
59         int enabled:3;
60         bool pending:1;
61         bool dispatching:1;
62
63         int priority;
64         unsigned pending_index;
65         unsigned prepare_index;
66         unsigned pending_iteration;
67         unsigned prepare_iteration;
68
69         union {
70                 struct {
71                         sd_event_io_handler_t callback;
72                         int fd;
73                         uint32_t events;
74                         uint32_t revents;
75                         bool registered:1;
76                 } io;
77                 struct {
78                         sd_event_time_handler_t callback;
79                         usec_t next, accuracy;
80                         unsigned earliest_index;
81                         unsigned latest_index;
82                 } time;
83                 struct {
84                         sd_event_signal_handler_t callback;
85                         struct signalfd_siginfo siginfo;
86                         int sig;
87                 } signal;
88                 struct {
89                         sd_event_child_handler_t callback;
90                         siginfo_t siginfo;
91                         pid_t pid;
92                         int options;
93                 } child;
94                 struct {
95                         sd_event_handler_t callback;
96                 } defer;
97                 struct {
98                         sd_event_handler_t callback;
99                         unsigned prioq_index;
100                 } exit;
101         };
102 };
103
104 struct sd_event {
105         unsigned n_ref;
106
107         int epoll_fd;
108         int signal_fd;
109         int realtime_fd;
110         int monotonic_fd;
111         int watchdog_fd;
112
113         Prioq *pending;
114         Prioq *prepare;
115
116         /* For both clocks we maintain two priority queues each, one
117          * ordered for the earliest times the events may be
118          * dispatched, and one ordered by the latest times they must
119          * have been dispatched. The range between the top entries in
120          * the two prioqs is the time window we can freely schedule
121          * wakeups in */
122         Prioq *monotonic_earliest;
123         Prioq *monotonic_latest;
124         Prioq *realtime_earliest;
125         Prioq *realtime_latest;
126
127         usec_t realtime_next, monotonic_next;
128         usec_t perturb;
129
130         sigset_t sigset;
131         sd_event_source **signal_sources;
132
133         Hashmap *child_sources;
134         unsigned n_enabled_child_sources;
135
136         Prioq *exit;
137
138         pid_t original_pid;
139
140         unsigned iteration;
141         dual_timestamp timestamp;
142         int state;
143
144         bool exit_requested:1;
145         bool need_process_child:1;
146         bool watchdog:1;
147
148         int exit_code;
149
150         pid_t tid;
151         sd_event **default_event_ptr;
152
153         usec_t watchdog_last, watchdog_period;
154 };
155
156 static int pending_prioq_compare(const void *a, const void *b) {
157         const sd_event_source *x = a, *y = b;
158
159         assert(x->pending);
160         assert(y->pending);
161
162         /* Enabled ones first */
163         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
164                 return -1;
165         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
166                 return 1;
167
168         /* Lower priority values first */
169         if (x->priority < y->priority)
170                 return -1;
171         if (x->priority > y->priority)
172                 return 1;
173
174         /* Older entries first */
175         if (x->pending_iteration < y->pending_iteration)
176                 return -1;
177         if (x->pending_iteration > y->pending_iteration)
178                 return 1;
179
180         /* Stability for the rest */
181         if (x < y)
182                 return -1;
183         if (x > y)
184                 return 1;
185
186         return 0;
187 }
188
189 static int prepare_prioq_compare(const void *a, const void *b) {
190         const sd_event_source *x = a, *y = b;
191
192         assert(x->prepare);
193         assert(y->prepare);
194
195         /* Move most recently prepared ones last, so that we can stop
196          * preparing as soon as we hit one that has already been
197          * prepared in the current iteration */
198         if (x->prepare_iteration < y->prepare_iteration)
199                 return -1;
200         if (x->prepare_iteration > y->prepare_iteration)
201                 return 1;
202
203         /* Enabled ones first */
204         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
205                 return -1;
206         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
207                 return 1;
208
209         /* Lower priority values first */
210         if (x->priority < y->priority)
211                 return -1;
212         if (x->priority > y->priority)
213                 return 1;
214
215         /* Stability for the rest */
216         if (x < y)
217                 return -1;
218         if (x > y)
219                 return 1;
220
221         return 0;
222 }
223
224 static int earliest_time_prioq_compare(const void *a, const void *b) {
225         const sd_event_source *x = a, *y = b;
226
227         assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
228         assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
229
230         /* Enabled ones first */
231         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
232                 return -1;
233         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
234                 return 1;
235
236         /* Move the pending ones to the end */
237         if (!x->pending && y->pending)
238                 return -1;
239         if (x->pending && !y->pending)
240                 return 1;
241
242         /* Order by time */
243         if (x->time.next < y->time.next)
244                 return -1;
245         if (x->time.next > y->time.next)
246                 return 1;
247
248         /* Stability for the rest */
249         if (x < y)
250                 return -1;
251         if (x > y)
252                 return 1;
253
254         return 0;
255 }
256
257 static int latest_time_prioq_compare(const void *a, const void *b) {
258         const sd_event_source *x = a, *y = b;
259
260         assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
261                (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
262
263         /* Enabled ones first */
264         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
265                 return -1;
266         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
267                 return 1;
268
269         /* Move the pending ones to the end */
270         if (!x->pending && y->pending)
271                 return -1;
272         if (x->pending && !y->pending)
273                 return 1;
274
275         /* Order by time */
276         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
277                 return -1;
278         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
279                 return 1;
280
281         /* Stability for the rest */
282         if (x < y)
283                 return -1;
284         if (x > y)
285                 return 1;
286
287         return 0;
288 }
289
290 static int exit_prioq_compare(const void *a, const void *b) {
291         const sd_event_source *x = a, *y = b;
292
293         assert(x->type == SOURCE_EXIT);
294         assert(y->type == SOURCE_EXIT);
295
296         /* Enabled ones first */
297         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
298                 return -1;
299         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
300                 return 1;
301
302         /* Lower priority values first */
303         if (x->priority < y->priority)
304                 return -1;
305         if (x->priority > y->priority)
306                 return 1;
307
308         /* Stability for the rest */
309         if (x < y)
310                 return -1;
311         if (x > y)
312                 return 1;
313
314         return 0;
315 }
316
317 static void event_free(sd_event *e) {
318         assert(e);
319
320         if (e->default_event_ptr)
321                 *(e->default_event_ptr) = NULL;
322
323         if (e->epoll_fd >= 0)
324                 close_nointr_nofail(e->epoll_fd);
325
326         if (e->signal_fd >= 0)
327                 close_nointr_nofail(e->signal_fd);
328
329         if (e->realtime_fd >= 0)
330                 close_nointr_nofail(e->realtime_fd);
331
332         if (e->monotonic_fd >= 0)
333                 close_nointr_nofail(e->monotonic_fd);
334
335         if (e->watchdog_fd >= 0)
336                 close_nointr_nofail(e->watchdog_fd);
337
338         prioq_free(e->pending);
339         prioq_free(e->prepare);
340         prioq_free(e->monotonic_earliest);
341         prioq_free(e->monotonic_latest);
342         prioq_free(e->realtime_earliest);
343         prioq_free(e->realtime_latest);
344         prioq_free(e->exit);
345
346         free(e->signal_sources);
347
348         hashmap_free(e->child_sources);
349         free(e);
350 }
351
352 _public_ int sd_event_new(sd_event** ret) {
353         sd_event *e;
354         int r;
355
356         assert_return(ret, -EINVAL);
357
358         e = new0(sd_event, 1);
359         if (!e)
360                 return -ENOMEM;
361
362         e->n_ref = 1;
363         e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
364         e->realtime_next = e->monotonic_next = (usec_t) -1;
365         e->original_pid = getpid();
366
367         assert_se(sigemptyset(&e->sigset) == 0);
368
369         e->pending = prioq_new(pending_prioq_compare);
370         if (!e->pending) {
371                 r = -ENOMEM;
372                 goto fail;
373         }
374
375         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
376         if (e->epoll_fd < 0) {
377                 r = -errno;
378                 goto fail;
379         }
380
381         *ret = e;
382         return 0;
383
384 fail:
385         event_free(e);
386         return r;
387 }
388
389 _public_ sd_event* sd_event_ref(sd_event *e) {
390         assert_return(e, NULL);
391
392         assert(e->n_ref >= 1);
393         e->n_ref++;
394
395         return e;
396 }
397
398 _public_ sd_event* sd_event_unref(sd_event *e) {
399
400         if (!e)
401                 return NULL;
402
403         assert(e->n_ref >= 1);
404         e->n_ref--;
405
406         if (e->n_ref <= 0)
407                 event_free(e);
408
409         return NULL;
410 }
411
412 static bool event_pid_changed(sd_event *e) {
413         assert(e);
414
415         /* We don't support people creating am event loop and keeping
416          * it around over a fork(). Let's complain. */
417
418         return e->original_pid != getpid();
419 }
420
421 static int source_io_unregister(sd_event_source *s) {
422         int r;
423
424         assert(s);
425         assert(s->type == SOURCE_IO);
426
427         if (!s->io.registered)
428                 return 0;
429
430         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
431         if (r < 0)
432                 return -errno;
433
434         s->io.registered = false;
435         return 0;
436 }
437
438 static int source_io_register(
439                 sd_event_source *s,
440                 int enabled,
441                 uint32_t events) {
442
443         struct epoll_event ev = {};
444         int r;
445
446         assert(s);
447         assert(s->type == SOURCE_IO);
448         assert(enabled != SD_EVENT_OFF);
449
450         ev.events = events;
451         ev.data.ptr = s;
452
453         if (enabled == SD_EVENT_ONESHOT)
454                 ev.events |= EPOLLONESHOT;
455
456         if (s->io.registered)
457                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
458         else
459                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
460
461         if (r < 0)
462                 return -errno;
463
464         s->io.registered = true;
465
466         return 0;
467 }
468
469 static void source_free(sd_event_source *s) {
470         assert(s);
471
472         if (s->event) {
473                 switch (s->type) {
474
475                 case SOURCE_IO:
476                         if (s->io.fd >= 0)
477                                 source_io_unregister(s);
478
479                         break;
480
481                 case SOURCE_MONOTONIC:
482                         prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
483                         prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
484                         break;
485
486                 case SOURCE_REALTIME:
487                         prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
488                         prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
489                         break;
490
491                 case SOURCE_SIGNAL:
492                         if (s->signal.sig > 0) {
493                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
494                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
495
496                                 if (s->event->signal_sources)
497                                         s->event->signal_sources[s->signal.sig] = NULL;
498                         }
499
500                         break;
501
502                 case SOURCE_CHILD:
503                         if (s->child.pid > 0) {
504                                 if (s->enabled != SD_EVENT_OFF) {
505                                         assert(s->event->n_enabled_child_sources > 0);
506                                         s->event->n_enabled_child_sources--;
507                                 }
508
509                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
510                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
511
512                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
513                         }
514
515                         break;
516
517                 case SOURCE_DEFER:
518                         /* nothing */
519                         break;
520
521                 case SOURCE_EXIT:
522                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
523                         break;
524                 }
525
526                 if (s->pending)
527                         prioq_remove(s->event->pending, s, &s->pending_index);
528
529                 if (s->prepare)
530                         prioq_remove(s->event->prepare, s, &s->prepare_index);
531
532                 sd_event_unref(s->event);
533         }
534
535         free(s);
536 }
537
538 static int source_set_pending(sd_event_source *s, bool b) {
539         int r;
540
541         assert(s);
542         assert(s->type != SOURCE_EXIT);
543
544         if (s->pending == b)
545                 return 0;
546
547         s->pending = b;
548
549         if (b) {
550                 s->pending_iteration = s->event->iteration;
551
552                 r = prioq_put(s->event->pending, s, &s->pending_index);
553                 if (r < 0) {
554                         s->pending = false;
555                         return r;
556                 }
557         } else
558                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
559
560         if (s->type == SOURCE_REALTIME) {
561                 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
562                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
563         } else if (s->type == SOURCE_MONOTONIC) {
564                 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
565                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
566         }
567
568         return 0;
569 }
570
571 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
572         sd_event_source *s;
573
574         assert(e);
575
576         s = new0(sd_event_source, 1);
577         if (!s)
578                 return NULL;
579
580         s->n_ref = 1;
581         s->event = sd_event_ref(e);
582         s->type = type;
583         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
584
585         return s;
586 }
587
588 _public_ int sd_event_add_io(
589                 sd_event *e,
590                 int fd,
591                 uint32_t events,
592                 sd_event_io_handler_t callback,
593                 void *userdata,
594                 sd_event_source **ret) {
595
596         sd_event_source *s;
597         int r;
598
599         assert_return(e, -EINVAL);
600         assert_return(fd >= 0, -EINVAL);
601         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
602         assert_return(callback, -EINVAL);
603         assert_return(ret, -EINVAL);
604         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
605         assert_return(!event_pid_changed(e), -ECHILD);
606
607         s = source_new(e, SOURCE_IO);
608         if (!s)
609                 return -ENOMEM;
610
611         s->io.fd = fd;
612         s->io.events = events;
613         s->io.callback = callback;
614         s->userdata = userdata;
615         s->enabled = SD_EVENT_ON;
616
617         r = source_io_register(s, s->enabled, events);
618         if (r < 0) {
619                 source_free(s);
620                 return -errno;
621         }
622
623         *ret = s;
624         return 0;
625 }
626
627 static int event_setup_timer_fd(
628                 sd_event *e,
629                 EventSourceType type,
630                 int *timer_fd,
631                 clockid_t id) {
632
633         struct epoll_event ev = {};
634         int r, fd;
635         sd_id128_t bootid;
636
637         assert(e);
638         assert(timer_fd);
639
640         if (_likely_(*timer_fd >= 0))
641                 return 0;
642
643         fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
644         if (fd < 0)
645                 return -errno;
646
647         ev.events = EPOLLIN;
648         ev.data.ptr = INT_TO_PTR(type);
649
650         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
651         if (r < 0) {
652                 close_nointr_nofail(fd);
653                 return -errno;
654         }
655
656         /* When we sleep for longer, we try to realign the wakeup to
657            the same time wihtin each minute/second/250ms, so that
658            events all across the system can be coalesced into a single
659            CPU wakeup. However, let's take some system-specific
660            randomness for this value, so that in a network of systems
661            with synced clocks timer events are distributed a
662            bit. Here, we calculate a perturbation usec offset from the
663            boot ID. */
664
665         if (sd_id128_get_boot(&bootid) >= 0)
666                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
667
668         *timer_fd = fd;
669         return 0;
670 }
671
672 static int event_add_time_internal(
673                 sd_event *e,
674                 EventSourceType type,
675                 int *timer_fd,
676                 clockid_t id,
677                 Prioq **earliest,
678                 Prioq **latest,
679                 uint64_t usec,
680                 uint64_t accuracy,
681                 sd_event_time_handler_t callback,
682                 void *userdata,
683                 sd_event_source **ret) {
684
685         sd_event_source *s;
686         int r;
687
688         assert_return(e, -EINVAL);
689         assert_return(callback, -EINVAL);
690         assert_return(ret, -EINVAL);
691         assert_return(usec != (uint64_t) -1, -EINVAL);
692         assert_return(accuracy != (uint64_t) -1, -EINVAL);
693         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
694         assert_return(!event_pid_changed(e), -ECHILD);
695
696         assert(timer_fd);
697         assert(earliest);
698         assert(latest);
699
700         if (!*earliest) {
701                 *earliest = prioq_new(earliest_time_prioq_compare);
702                 if (!*earliest)
703                         return -ENOMEM;
704         }
705
706         if (!*latest) {
707                 *latest = prioq_new(latest_time_prioq_compare);
708                 if (!*latest)
709                         return -ENOMEM;
710         }
711
712         if (*timer_fd < 0) {
713                 r = event_setup_timer_fd(e, type, timer_fd, id);
714                 if (r < 0)
715                         return r;
716         }
717
718         s = source_new(e, type);
719         if (!s)
720                 return -ENOMEM;
721
722         s->time.next = usec;
723         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
724         s->time.callback = callback;
725         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
726         s->userdata = userdata;
727         s->enabled = SD_EVENT_ONESHOT;
728
729         r = prioq_put(*earliest, s, &s->time.earliest_index);
730         if (r < 0)
731                 goto fail;
732
733         r = prioq_put(*latest, s, &s->time.latest_index);
734         if (r < 0)
735                 goto fail;
736
737         *ret = s;
738         return 0;
739
740 fail:
741         source_free(s);
742         return r;
743 }
744
745 _public_ int sd_event_add_monotonic(sd_event *e,
746                                     uint64_t usec,
747                                     uint64_t accuracy,
748                                     sd_event_time_handler_t callback,
749                                     void *userdata,
750                                     sd_event_source **ret) {
751
752         return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
753 }
754
755 _public_ int sd_event_add_realtime(sd_event *e,
756                                    uint64_t usec,
757                                    uint64_t accuracy,
758                                    sd_event_time_handler_t callback,
759                                    void *userdata,
760                                    sd_event_source **ret) {
761
762         return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
763 }
764
765 static int event_update_signal_fd(sd_event *e) {
766         struct epoll_event ev = {};
767         bool add_to_epoll;
768         int r;
769
770         assert(e);
771
772         add_to_epoll = e->signal_fd < 0;
773
774         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
775         if (r < 0)
776                 return -errno;
777
778         e->signal_fd = r;
779
780         if (!add_to_epoll)
781                 return 0;
782
783         ev.events = EPOLLIN;
784         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
785
786         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
787         if (r < 0) {
788                 close_nointr_nofail(e->signal_fd);
789                 e->signal_fd = -1;
790
791                 return -errno;
792         }
793
794         return 0;
795 }
796
797 _public_ int sd_event_add_signal(
798                 sd_event *e,
799                 int sig,
800                 sd_event_signal_handler_t callback,
801                 void *userdata,
802                 sd_event_source **ret) {
803
804         sd_event_source *s;
805         int r;
806
807         assert_return(e, -EINVAL);
808         assert_return(sig > 0, -EINVAL);
809         assert_return(sig < _NSIG, -EINVAL);
810         assert_return(callback, -EINVAL);
811         assert_return(ret, -EINVAL);
812         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
813         assert_return(!event_pid_changed(e), -ECHILD);
814
815         if (!e->signal_sources) {
816                 e->signal_sources = new0(sd_event_source*, _NSIG);
817                 if (!e->signal_sources)
818                         return -ENOMEM;
819         } else if (e->signal_sources[sig])
820                 return -EBUSY;
821
822         s = source_new(e, SOURCE_SIGNAL);
823         if (!s)
824                 return -ENOMEM;
825
826         s->signal.sig = sig;
827         s->signal.callback = callback;
828         s->userdata = userdata;
829         s->enabled = SD_EVENT_ON;
830
831         e->signal_sources[sig] = s;
832         assert_se(sigaddset(&e->sigset, sig) == 0);
833
834         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
835                 r = event_update_signal_fd(e);
836                 if (r < 0) {
837                         source_free(s);
838                         return r;
839                 }
840         }
841
842         *ret = s;
843         return 0;
844 }
845
846 _public_ int sd_event_add_child(
847                 sd_event *e,
848                 pid_t pid,
849                 int options,
850                 sd_event_child_handler_t callback,
851                 void *userdata,
852                 sd_event_source **ret) {
853
854         sd_event_source *s;
855         int r;
856
857         assert_return(e, -EINVAL);
858         assert_return(pid > 1, -EINVAL);
859         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
860         assert_return(options != 0, -EINVAL);
861         assert_return(callback, -EINVAL);
862         assert_return(ret, -EINVAL);
863         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
864         assert_return(!event_pid_changed(e), -ECHILD);
865
866         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
867         if (r < 0)
868                 return r;
869
870         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
871                 return -EBUSY;
872
873         s = source_new(e, SOURCE_CHILD);
874         if (!s)
875                 return -ENOMEM;
876
877         s->child.pid = pid;
878         s->child.options = options;
879         s->child.callback = callback;
880         s->userdata = userdata;
881         s->enabled = SD_EVENT_ONESHOT;
882
883         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
884         if (r < 0) {
885                 source_free(s);
886                 return r;
887         }
888
889         e->n_enabled_child_sources ++;
890
891         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
892
893         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
894                 r = event_update_signal_fd(e);
895                 if (r < 0) {
896                         source_free(s);
897                         return -errno;
898                 }
899         }
900
901         e->need_process_child = true;
902
903         *ret = s;
904         return 0;
905 }
906
907 _public_ int sd_event_add_defer(
908                 sd_event *e,
909                 sd_event_handler_t callback,
910                 void *userdata,
911                 sd_event_source **ret) {
912
913         sd_event_source *s;
914         int r;
915
916         assert_return(e, -EINVAL);
917         assert_return(callback, -EINVAL);
918         assert_return(ret, -EINVAL);
919         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
920         assert_return(!event_pid_changed(e), -ECHILD);
921
922         s = source_new(e, SOURCE_DEFER);
923         if (!s)
924                 return -ENOMEM;
925
926         s->defer.callback = callback;
927         s->userdata = userdata;
928         s->enabled = SD_EVENT_ONESHOT;
929
930         r = source_set_pending(s, true);
931         if (r < 0) {
932                 source_free(s);
933                 return r;
934         }
935
936         *ret = s;
937         return 0;
938 }
939
940 _public_ int sd_event_add_exit(
941                 sd_event *e,
942                 sd_event_handler_t callback,
943                 void *userdata,
944                 sd_event_source **ret) {
945
946         sd_event_source *s;
947         int r;
948
949         assert_return(e, -EINVAL);
950         assert_return(callback, -EINVAL);
951         assert_return(ret, -EINVAL);
952         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
953         assert_return(!event_pid_changed(e), -ECHILD);
954
955         if (!e->exit) {
956                 e->exit = prioq_new(exit_prioq_compare);
957                 if (!e->exit)
958                         return -ENOMEM;
959         }
960
961         s = source_new(e, SOURCE_EXIT);
962         if (!s)
963                 return -ENOMEM;
964
965         s->exit.callback = callback;
966         s->userdata = userdata;
967         s->exit.prioq_index = PRIOQ_IDX_NULL;
968         s->enabled = SD_EVENT_ONESHOT;
969
970         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
971         if (r < 0) {
972                 source_free(s);
973                 return r;
974         }
975
976         *ret = s;
977         return 0;
978 }
979
980 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
981         assert_return(s, NULL);
982
983         assert(s->n_ref >= 1);
984         s->n_ref++;
985
986         return s;
987 }
988
989 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
990
991         if (!s)
992                 return NULL;
993
994         assert(s->n_ref >= 1);
995         s->n_ref--;
996
997         if (s->n_ref <= 0) {
998                 /* Here's a special hack: when we are called from a
999                  * dispatch handler we won't free the event source
1000                  * immediately, but we will detach the fd from the
1001                  * epoll. This way it is safe for the caller to unref
1002                  * the event source and immediately close the fd, but
1003                  * we still retain a valid event source object after
1004                  * the callback. */
1005
1006                 if (s->dispatching) {
1007                         if (s->type == SOURCE_IO)
1008                                 source_io_unregister(s);
1009                 } else
1010                         source_free(s);
1011         }
1012
1013         return NULL;
1014 }
1015
1016 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1017         assert_return(s, NULL);
1018
1019         return s->event;
1020 }
1021
1022 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1023         assert_return(s, -EINVAL);
1024         assert_return(s->type != SOURCE_EXIT, -EDOM);
1025         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1026         assert_return(!event_pid_changed(s->event), -ECHILD);
1027
1028         return s->pending;
1029 }
1030
1031 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1032         assert_return(s, -EINVAL);
1033         assert_return(s->type == SOURCE_IO, -EDOM);
1034         assert_return(!event_pid_changed(s->event), -ECHILD);
1035
1036         return s->io.fd;
1037 }
1038
1039 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1040         int r;
1041
1042         assert_return(s, -EINVAL);
1043         assert_return(fd >= 0, -EINVAL);
1044         assert_return(s->type == SOURCE_IO, -EDOM);
1045         assert_return(!event_pid_changed(s->event), -ECHILD);
1046
1047         if (s->io.fd == fd)
1048                 return 0;
1049
1050         if (s->enabled == SD_EVENT_OFF) {
1051                 s->io.fd = fd;
1052                 s->io.registered = false;
1053         } else {
1054                 int saved_fd;
1055
1056                 saved_fd = s->io.fd;
1057                 assert(s->io.registered);
1058
1059                 s->io.fd = fd;
1060                 s->io.registered = false;
1061
1062                 r = source_io_register(s, s->enabled, s->io.events);
1063                 if (r < 0) {
1064                         s->io.fd = saved_fd;
1065                         s->io.registered = true;
1066                         return r;
1067                 }
1068
1069                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1070         }
1071
1072         return 0;
1073 }
1074
1075 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1076         assert_return(s, -EINVAL);
1077         assert_return(events, -EINVAL);
1078         assert_return(s->type == SOURCE_IO, -EDOM);
1079         assert_return(!event_pid_changed(s->event), -ECHILD);
1080
1081         *events = s->io.events;
1082         return 0;
1083 }
1084
1085 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1086         int r;
1087
1088         assert_return(s, -EINVAL);
1089         assert_return(s->type == SOURCE_IO, -EDOM);
1090         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1091         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1092         assert_return(!event_pid_changed(s->event), -ECHILD);
1093
1094         if (s->io.events == events)
1095                 return 0;
1096
1097         if (s->enabled != SD_EVENT_OFF) {
1098                 r = source_io_register(s, s->enabled, events);
1099                 if (r < 0)
1100                         return r;
1101         }
1102
1103         s->io.events = events;
1104         source_set_pending(s, false);
1105
1106         return 0;
1107 }
1108
1109 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1110         assert_return(s, -EINVAL);
1111         assert_return(revents, -EINVAL);
1112         assert_return(s->type == SOURCE_IO, -EDOM);
1113         assert_return(s->pending, -ENODATA);
1114         assert_return(!event_pid_changed(s->event), -ECHILD);
1115
1116         *revents = s->io.revents;
1117         return 0;
1118 }
1119
1120 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1121         assert_return(s, -EINVAL);
1122         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1123         assert_return(!event_pid_changed(s->event), -ECHILD);
1124
1125         return s->signal.sig;
1126 }
1127
1128 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1129         assert_return(s, -EINVAL);
1130         assert_return(!event_pid_changed(s->event), -ECHILD);
1131
1132         return s->priority;
1133 }
1134
1135 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1136         assert_return(s, -EINVAL);
1137         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1138         assert_return(!event_pid_changed(s->event), -ECHILD);
1139
1140         if (s->priority == priority)
1141                 return 0;
1142
1143         s->priority = priority;
1144
1145         if (s->pending)
1146                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1147
1148         if (s->prepare)
1149                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1150
1151         if (s->type == SOURCE_EXIT)
1152                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1153
1154         return 0;
1155 }
1156
1157 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1158         assert_return(s, -EINVAL);
1159         assert_return(m, -EINVAL);
1160         assert_return(!event_pid_changed(s->event), -ECHILD);
1161
1162         *m = s->enabled;
1163         return 0;
1164 }
1165
1166 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1167         int r;
1168
1169         assert_return(s, -EINVAL);
1170         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1171         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1172         assert_return(!event_pid_changed(s->event), -ECHILD);
1173
1174         if (s->enabled == m)
1175                 return 0;
1176
1177         if (m == SD_EVENT_OFF) {
1178
1179                 switch (s->type) {
1180
1181                 case SOURCE_IO:
1182                         r = source_io_unregister(s);
1183                         if (r < 0)
1184                                 return r;
1185
1186                         s->enabled = m;
1187                         break;
1188
1189                 case SOURCE_MONOTONIC:
1190                         s->enabled = m;
1191                         prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1192                         prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1193                         break;
1194
1195                 case SOURCE_REALTIME:
1196                         s->enabled = m;
1197                         prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1198                         prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1199                         break;
1200
1201                 case SOURCE_SIGNAL:
1202                         s->enabled = m;
1203                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1204                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1205                                 event_update_signal_fd(s->event);
1206                         }
1207
1208                         break;
1209
1210                 case SOURCE_CHILD:
1211                         s->enabled = m;
1212
1213                         assert(s->event->n_enabled_child_sources > 0);
1214                         s->event->n_enabled_child_sources--;
1215
1216                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1217                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1218                                 event_update_signal_fd(s->event);
1219                         }
1220
1221                         break;
1222
1223                 case SOURCE_EXIT:
1224                         s->enabled = m;
1225                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1226                         break;
1227
1228                 case SOURCE_DEFER:
1229                         s->enabled = m;
1230                         break;
1231                 }
1232
1233         } else {
1234                 switch (s->type) {
1235
1236                 case SOURCE_IO:
1237                         r = source_io_register(s, m, s->io.events);
1238                         if (r < 0)
1239                                 return r;
1240
1241                         s->enabled = m;
1242                         break;
1243
1244                 case SOURCE_MONOTONIC:
1245                         s->enabled = m;
1246                         prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1247                         prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1248                         break;
1249
1250                 case SOURCE_REALTIME:
1251                         s->enabled = m;
1252                         prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1253                         prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1254                         break;
1255
1256                 case SOURCE_SIGNAL:
1257                         s->enabled = m;
1258
1259                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1260                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1261                                 event_update_signal_fd(s->event);
1262                         }
1263                         break;
1264
1265                 case SOURCE_CHILD:
1266                         s->enabled = m;
1267
1268                         if (s->enabled == SD_EVENT_OFF) {
1269                                 s->event->n_enabled_child_sources++;
1270
1271                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1272                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1273                                         event_update_signal_fd(s->event);
1274                                 }
1275                         }
1276                         break;
1277
1278                 case SOURCE_EXIT:
1279                         s->enabled = m;
1280                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1281                         break;
1282
1283                 case SOURCE_DEFER:
1284                         s->enabled = m;
1285                         break;
1286                 }
1287         }
1288
1289         if (s->pending)
1290                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1291
1292         if (s->prepare)
1293                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1294
1295         return 0;
1296 }
1297
1298 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1299         assert_return(s, -EINVAL);
1300         assert_return(usec, -EINVAL);
1301         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1302         assert_return(!event_pid_changed(s->event), -ECHILD);
1303
1304         *usec = s->time.next;
1305         return 0;
1306 }
1307
1308 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1309         assert_return(s, -EINVAL);
1310         assert_return(usec != (uint64_t) -1, -EINVAL);
1311         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1312         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1313         assert_return(!event_pid_changed(s->event), -ECHILD);
1314
1315         s->time.next = usec;
1316
1317         source_set_pending(s, false);
1318
1319         if (s->type == SOURCE_REALTIME) {
1320                 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1321                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1322         } else {
1323                 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1324                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1325         }
1326
1327         return 0;
1328 }
1329
1330 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1331         assert_return(s, -EINVAL);
1332         assert_return(usec, -EINVAL);
1333         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1334         assert_return(!event_pid_changed(s->event), -ECHILD);
1335
1336         *usec = s->time.accuracy;
1337         return 0;
1338 }
1339
1340 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1341         assert_return(s, -EINVAL);
1342         assert_return(usec != (uint64_t) -1, -EINVAL);
1343         assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1344         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1345         assert_return(!event_pid_changed(s->event), -ECHILD);
1346
1347         if (usec == 0)
1348                 usec = DEFAULT_ACCURACY_USEC;
1349
1350         s->time.accuracy = usec;
1351
1352         source_set_pending(s, false);
1353
1354         if (s->type == SOURCE_REALTIME)
1355                 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1356         else
1357                 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1358
1359         return 0;
1360 }
1361
1362 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1363         assert_return(s, -EINVAL);
1364         assert_return(pid, -EINVAL);
1365         assert_return(s->type == SOURCE_CHILD, -EDOM);
1366         assert_return(!event_pid_changed(s->event), -ECHILD);
1367
1368         *pid = s->child.pid;
1369         return 0;
1370 }
1371
1372 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1373         int r;
1374
1375         assert_return(s, -EINVAL);
1376         assert_return(s->type != SOURCE_EXIT, -EDOM);
1377         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1378         assert_return(!event_pid_changed(s->event), -ECHILD);
1379
1380         if (s->prepare == callback)
1381                 return 0;
1382
1383         if (callback && s->prepare) {
1384                 s->prepare = callback;
1385                 return 0;
1386         }
1387
1388         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1389         if (r < 0)
1390                 return r;
1391
1392         s->prepare = callback;
1393
1394         if (callback) {
1395                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1396                 if (r < 0)
1397                         return r;
1398         } else
1399                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1400
1401         return 0;
1402 }
1403
1404 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1405         assert_return(s, NULL);
1406
1407         return s->userdata;
1408 }
1409
1410 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1411         void *ret;
1412
1413         assert_return(s, NULL);
1414
1415         ret = s->userdata;
1416         s->userdata = userdata;
1417
1418         return ret;
1419 }
1420
1421 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1422         usec_t c;
1423         assert(e);
1424         assert(a <= b);
1425
1426         if (a <= 0)
1427                 return 0;
1428
1429         if (b <= a + 1)
1430                 return a;
1431
1432         /*
1433           Find a good time to wake up again between times a and b. We
1434           have two goals here:
1435
1436           a) We want to wake up as seldom as possible, hence prefer
1437              later times over earlier times.
1438
1439           b) But if we have to wake up, then let's make sure to
1440              dispatch as much as possible on the entire system.
1441
1442           We implement this by waking up everywhere at the same time
1443           within any given minute if we can, synchronised via the
1444           perturbation value determined from the boot ID. If we can't,
1445           then we try to find the same spot in every 10s, then 1s and
1446           then 250ms step. Otherwise, we pick the last possible time
1447           to wake up.
1448         */
1449
1450         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1451         if (c >= b) {
1452                 if (_unlikely_(c < USEC_PER_MINUTE))
1453                         return b;
1454
1455                 c -= USEC_PER_MINUTE;
1456         }
1457
1458         if (c >= a)
1459                 return c;
1460
1461         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1462         if (c >= b) {
1463                 if (_unlikely_(c < USEC_PER_SEC*10))
1464                         return b;
1465
1466                 c -= USEC_PER_SEC*10;
1467         }
1468
1469         if (c >= a)
1470                 return c;
1471
1472         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1473         if (c >= b) {
1474                 if (_unlikely_(c < USEC_PER_SEC))
1475                         return b;
1476
1477                 c -= USEC_PER_SEC;
1478         }
1479
1480         if (c >= a)
1481                 return c;
1482
1483         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1484         if (c >= b) {
1485                 if (_unlikely_(c < USEC_PER_MSEC*250))
1486                         return b;
1487
1488                 c -= USEC_PER_MSEC*250;
1489         }
1490
1491         if (c >= a)
1492                 return c;
1493
1494         return b;
1495 }
1496
1497 static int event_arm_timer(
1498                 sd_event *e,
1499                 int timer_fd,
1500                 Prioq *earliest,
1501                 Prioq *latest,
1502                 usec_t *next) {
1503
1504         struct itimerspec its = {};
1505         sd_event_source *a, *b;
1506         usec_t t;
1507         int r;
1508
1509         assert(e);
1510         assert(next);
1511
1512         a = prioq_peek(earliest);
1513         if (!a || a->enabled == SD_EVENT_OFF) {
1514
1515                 if (timer_fd < 0)
1516                         return 0;
1517
1518                 if (*next == (usec_t) -1)
1519                         return 0;
1520
1521                 /* disarm */
1522                 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1523                 if (r < 0)
1524                         return r;
1525
1526                 *next = (usec_t) -1;
1527
1528                 return 0;
1529         }
1530
1531         b = prioq_peek(latest);
1532         assert_se(b && b->enabled != SD_EVENT_OFF);
1533
1534         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1535         if (*next == t)
1536                 return 0;
1537
1538         assert_se(timer_fd >= 0);
1539
1540         if (t == 0) {
1541                 /* We don' want to disarm here, just mean some time looooong ago. */
1542                 its.it_value.tv_sec = 0;
1543                 its.it_value.tv_nsec = 1;
1544         } else
1545                 timespec_store(&its.it_value, t);
1546
1547         r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1548         if (r < 0)
1549                 return -errno;
1550
1551         *next = t;
1552         return 0;
1553 }
1554
1555 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1556         assert(e);
1557         assert(s);
1558         assert(s->type == SOURCE_IO);
1559
1560         s->io.revents = events;
1561
1562         return source_set_pending(s, true);
1563 }
1564
1565 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1566         uint64_t x;
1567         ssize_t ss;
1568
1569         assert(e);
1570         assert(fd >= 0);
1571
1572         assert_return(events == EPOLLIN, -EIO);
1573
1574         ss = read(fd, &x, sizeof(x));
1575         if (ss < 0) {
1576                 if (errno == EAGAIN || errno == EINTR)
1577                         return 0;
1578
1579                 return -errno;
1580         }
1581
1582         if (ss != sizeof(x))
1583                 return -EIO;
1584
1585         if (next)
1586                 *next = (usec_t) -1;
1587
1588         return 0;
1589 }
1590
1591 static int process_timer(
1592                 sd_event *e,
1593                 usec_t n,
1594                 Prioq *earliest,
1595                 Prioq *latest) {
1596
1597         sd_event_source *s;
1598         int r;
1599
1600         assert(e);
1601
1602         for (;;) {
1603                 s = prioq_peek(earliest);
1604                 if (!s ||
1605                     s->time.next > n ||
1606                     s->enabled == SD_EVENT_OFF ||
1607                     s->pending)
1608                         break;
1609
1610                 r = source_set_pending(s, true);
1611                 if (r < 0)
1612                         return r;
1613
1614                 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1615                 prioq_reshuffle(latest, s, &s->time.latest_index);
1616         }
1617
1618         return 0;
1619 }
1620
1621 static int process_child(sd_event *e) {
1622         sd_event_source *s;
1623         Iterator i;
1624         int r;
1625
1626         assert(e);
1627
1628         e->need_process_child = false;
1629
1630         /*
1631            So, this is ugly. We iteratively invoke waitid() with P_PID
1632            + WNOHANG for each PID we wait for, instead of using
1633            P_ALL. This is because we only want to get child
1634            information of very specific child processes, and not all
1635            of them. We might not have processed the SIGCHLD even of a
1636            previous invocation and we don't want to maintain a
1637            unbounded *per-child* event queue, hence we really don't
1638            want anything flushed out of the kernel's queue that we
1639            don't care about. Since this is O(n) this means that if you
1640            have a lot of processes you probably want to handle SIGCHLD
1641            yourself.
1642
1643            We do not reap the children here (by using WNOWAIT), this
1644            is only done after the event source is dispatched so that
1645            the callback still sees the process as a zombie.
1646         */
1647
1648         HASHMAP_FOREACH(s, e->child_sources, i) {
1649                 assert(s->type == SOURCE_CHILD);
1650
1651                 if (s->pending)
1652                         continue;
1653
1654                 if (s->enabled == SD_EVENT_OFF)
1655                         continue;
1656
1657                 zero(s->child.siginfo);
1658                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1659                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1660                 if (r < 0)
1661                         return -errno;
1662
1663                 if (s->child.siginfo.si_pid != 0) {
1664                         bool zombie =
1665                                 s->child.siginfo.si_code == CLD_EXITED ||
1666                                 s->child.siginfo.si_code == CLD_KILLED ||
1667                                 s->child.siginfo.si_code == CLD_DUMPED;
1668
1669                         if (!zombie && (s->child.options & WEXITED)) {
1670                                 /* If the child isn't dead then let's
1671                                  * immediately remove the state change
1672                                  * from the queue, since there's no
1673                                  * benefit in leaving it queued */
1674
1675                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1676                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1677                         }
1678
1679                         r = source_set_pending(s, true);
1680                         if (r < 0)
1681                                 return r;
1682                 }
1683         }
1684
1685         return 0;
1686 }
1687
1688 static int process_signal(sd_event *e, uint32_t events) {
1689         bool read_one = false;
1690         int r;
1691
1692         assert(e);
1693         assert(e->signal_sources);
1694
1695         assert_return(events == EPOLLIN, -EIO);
1696
1697         for (;;) {
1698                 struct signalfd_siginfo si;
1699                 ssize_t ss;
1700                 sd_event_source *s;
1701
1702                 ss = read(e->signal_fd, &si, sizeof(si));
1703                 if (ss < 0) {
1704                         if (errno == EAGAIN || errno == EINTR)
1705                                 return read_one;
1706
1707                         return -errno;
1708                 }
1709
1710                 if (ss != sizeof(si))
1711                         return -EIO;
1712
1713                 read_one = true;
1714
1715                 s = e->signal_sources[si.ssi_signo];
1716                 if (si.ssi_signo == SIGCHLD) {
1717                         r = process_child(e);
1718                         if (r < 0)
1719                                 return r;
1720                         if (r > 0 || !s)
1721                                 continue;
1722                 } else
1723                         if (!s)
1724                                 return -EIO;
1725
1726                 s->signal.siginfo = si;
1727                 r = source_set_pending(s, true);
1728                 if (r < 0)
1729                         return r;
1730         }
1731
1732         return 0;
1733 }
1734
1735 static int source_dispatch(sd_event_source *s) {
1736         int r = 0;
1737
1738         assert(s);
1739         assert(s->pending || s->type == SOURCE_EXIT);
1740
1741         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1742                 r = source_set_pending(s, false);
1743                 if (r < 0)
1744                         return r;
1745         }
1746
1747         if (s->enabled == SD_EVENT_ONESHOT) {
1748                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1749                 if (r < 0)
1750                         return r;
1751         }
1752
1753         s->dispatching = true;
1754
1755         switch (s->type) {
1756
1757         case SOURCE_IO:
1758                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1759                 break;
1760
1761         case SOURCE_MONOTONIC:
1762                 r = s->time.callback(s, s->time.next, s->userdata);
1763                 break;
1764
1765         case SOURCE_REALTIME:
1766                 r = s->time.callback(s, s->time.next, s->userdata);
1767                 break;
1768
1769         case SOURCE_SIGNAL:
1770                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1771                 break;
1772
1773         case SOURCE_CHILD: {
1774                 bool zombie;
1775
1776                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1777                          s->child.siginfo.si_code == CLD_KILLED ||
1778                          s->child.siginfo.si_code == CLD_DUMPED;
1779
1780                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1781
1782                 /* Now, reap the PID for good. */
1783                 if (zombie)
1784                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1785
1786                 break;
1787         }
1788
1789         case SOURCE_DEFER:
1790                 r = s->defer.callback(s, s->userdata);
1791                 break;
1792
1793         case SOURCE_EXIT:
1794                 r = s->exit.callback(s, s->userdata);
1795                 break;
1796         }
1797
1798         s->dispatching = false;
1799
1800         if (r < 0)
1801                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1802
1803         if (s->n_ref == 0)
1804                 source_free(s);
1805         else if (r < 0)
1806                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1807
1808         return 1;
1809 }
1810
1811 static int event_prepare(sd_event *e) {
1812         int r;
1813
1814         assert(e);
1815
1816         for (;;) {
1817                 sd_event_source *s;
1818
1819                 s = prioq_peek(e->prepare);
1820                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1821                         break;
1822
1823                 s->prepare_iteration = e->iteration;
1824                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1825                 if (r < 0)
1826                         return r;
1827
1828                 assert(s->prepare);
1829
1830                 s->dispatching = true;
1831                 r = s->prepare(s, s->userdata);
1832                 s->dispatching = false;
1833
1834                 if (r < 0)
1835                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
1836
1837                 if (s->n_ref == 0)
1838                         source_free(s);
1839                 else if (r < 0)
1840                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
1841         }
1842
1843         return 0;
1844 }
1845
1846 static int dispatch_exit(sd_event *e) {
1847         sd_event_source *p;
1848         int r;
1849
1850         assert(e);
1851
1852         p = prioq_peek(e->exit);
1853         if (!p || p->enabled == SD_EVENT_OFF) {
1854                 e->state = SD_EVENT_FINISHED;
1855                 return 0;
1856         }
1857
1858         sd_event_ref(e);
1859         e->iteration++;
1860         e->state = SD_EVENT_EXITING;
1861
1862         r = source_dispatch(p);
1863
1864         e->state = SD_EVENT_PASSIVE;
1865         sd_event_unref(e);
1866
1867         return r;
1868 }
1869
1870 static sd_event_source* event_next_pending(sd_event *e) {
1871         sd_event_source *p;
1872
1873         assert(e);
1874
1875         p = prioq_peek(e->pending);
1876         if (!p)
1877                 return NULL;
1878
1879         if (p->enabled == SD_EVENT_OFF)
1880                 return NULL;
1881
1882         return p;
1883 }
1884
1885 static int arm_watchdog(sd_event *e) {
1886         struct itimerspec its = {};
1887         usec_t t;
1888         int r;
1889
1890         assert(e);
1891         assert(e->watchdog_fd >= 0);
1892
1893         t = sleep_between(e,
1894                           e->watchdog_last + (e->watchdog_period / 2),
1895                           e->watchdog_last + (e->watchdog_period * 3 / 4));
1896
1897         timespec_store(&its.it_value, t);
1898
1899         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
1900         if (r < 0)
1901                 return -errno;
1902
1903         return 0;
1904 }
1905
1906 static int process_watchdog(sd_event *e) {
1907         assert(e);
1908
1909         if (!e->watchdog)
1910                 return 0;
1911
1912         /* Don't notify watchdog too often */
1913         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
1914                 return 0;
1915
1916         sd_notify(false, "WATCHDOG=1");
1917         e->watchdog_last = e->timestamp.monotonic;
1918
1919         return arm_watchdog(e);
1920 }
1921
1922 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1923         struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1924         sd_event_source *p;
1925         int r, i, m;
1926
1927         assert_return(e, -EINVAL);
1928         assert_return(!event_pid_changed(e), -ECHILD);
1929         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1930         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1931
1932         if (e->exit_requested)
1933                 return dispatch_exit(e);
1934
1935         sd_event_ref(e);
1936         e->iteration++;
1937         e->state = SD_EVENT_RUNNING;
1938
1939         r = event_prepare(e);
1940         if (r < 0)
1941                 goto finish;
1942
1943         r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1944         if (r < 0)
1945                 goto finish;
1946
1947         r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1948         if (r < 0)
1949                 goto finish;
1950
1951         if (event_next_pending(e) || e->need_process_child)
1952                 timeout = 0;
1953
1954         m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1955                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1956         if (m < 0) {
1957                 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1958                 goto finish;
1959         }
1960
1961         dual_timestamp_get(&e->timestamp);
1962
1963         for (i = 0; i < m; i++) {
1964
1965                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1966                         r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1967                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1968                         r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1969                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1970                         r = process_signal(e, ev_queue[i].events);
1971                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
1972                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
1973                 else
1974                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1975
1976                 if (r < 0)
1977                         goto finish;
1978         }
1979
1980         r = process_watchdog(e);
1981         if (r < 0)
1982                 goto finish;
1983
1984         r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1985         if (r < 0)
1986                 goto finish;
1987
1988         r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1989         if (r < 0)
1990                 goto finish;
1991
1992         if (e->need_process_child) {
1993                 r = process_child(e);
1994                 if (r < 0)
1995                         goto finish;
1996         }
1997
1998         p = event_next_pending(e);
1999         if (!p) {
2000                 r = 0;
2001                 goto finish;
2002         }
2003
2004         r = source_dispatch(p);
2005
2006 finish:
2007         e->state = SD_EVENT_PASSIVE;
2008         sd_event_unref(e);
2009
2010         return r;
2011 }
2012
2013 _public_ int sd_event_loop(sd_event *e) {
2014         int r;
2015
2016         assert_return(e, -EINVAL);
2017         assert_return(!event_pid_changed(e), -ECHILD);
2018         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2019
2020         sd_event_ref(e);
2021
2022         while (e->state != SD_EVENT_FINISHED) {
2023                 r = sd_event_run(e, (uint64_t) -1);
2024                 if (r < 0)
2025                         goto finish;
2026         }
2027
2028         r = e->exit_code;
2029
2030 finish:
2031         sd_event_unref(e);
2032         return r;
2033 }
2034
2035 _public_ int sd_event_get_state(sd_event *e) {
2036         assert_return(e, -EINVAL);
2037         assert_return(!event_pid_changed(e), -ECHILD);
2038
2039         return e->state;
2040 }
2041
2042 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2043         assert_return(e, -EINVAL);
2044         assert_return(code, -EINVAL);
2045         assert_return(!event_pid_changed(e), -ECHILD);
2046
2047         if (!e->exit_requested)
2048                 return -ENODATA;
2049
2050         *code = e->exit_code;
2051         return 0;
2052 }
2053
2054 _public_ int sd_event_exit(sd_event *e, int code) {
2055         assert_return(e, -EINVAL);
2056         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2057         assert_return(!event_pid_changed(e), -ECHILD);
2058
2059         e->exit_requested = true;
2060         e->exit_code = code;
2061
2062         return 0;
2063 }
2064
2065 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
2066         assert_return(e, -EINVAL);
2067         assert_return(usec, -EINVAL);
2068         assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2069         assert_return(!event_pid_changed(e), -ECHILD);
2070
2071         *usec = e->timestamp.realtime;
2072         return 0;
2073 }
2074
2075 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
2076         assert_return(e, -EINVAL);
2077         assert_return(usec, -EINVAL);
2078         assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2079         assert_return(!event_pid_changed(e), -ECHILD);
2080
2081         *usec = e->timestamp.monotonic;
2082         return 0;
2083 }
2084
2085 _public_ int sd_event_default(sd_event **ret) {
2086
2087         static __thread sd_event *default_event = NULL;
2088         sd_event *e;
2089         int r;
2090
2091         if (!ret)
2092                 return !!default_event;
2093
2094         if (default_event) {
2095                 *ret = sd_event_ref(default_event);
2096                 return 0;
2097         }
2098
2099         r = sd_event_new(&e);
2100         if (r < 0)
2101                 return r;
2102
2103         e->default_event_ptr = &default_event;
2104         e->tid = gettid();
2105         default_event = e;
2106
2107         *ret = e;
2108         return 1;
2109 }
2110
2111 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2112         assert_return(e, -EINVAL);
2113         assert_return(tid, -EINVAL);
2114         assert_return(!event_pid_changed(e), -ECHILD);
2115
2116         if (e->tid != 0) {
2117                 *tid = e->tid;
2118                 return 0;
2119         }
2120
2121         return -ENXIO;
2122 }
2123
2124 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2125         int r;
2126
2127         assert_return(e, -EINVAL);
2128         assert_return(!event_pid_changed(e), -ECHILD);
2129
2130         if (e->watchdog == !!b)
2131                 return e->watchdog;
2132
2133         if (b) {
2134                 struct epoll_event ev = {};
2135                 const char *env;
2136
2137                 env = getenv("WATCHDOG_USEC");
2138                 if (!env)
2139                         return false;
2140
2141                 r = safe_atou64(env, &e->watchdog_period);
2142                 if (r < 0)
2143                         return r;
2144                 if (e->watchdog_period <= 0)
2145                         return -EIO;
2146
2147                 /* Issue first ping immediately */
2148                 sd_notify(false, "WATCHDOG=1");
2149                 e->watchdog_last = now(CLOCK_MONOTONIC);
2150
2151                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2152                 if (e->watchdog_fd < 0)
2153                         return -errno;
2154
2155                 r = arm_watchdog(e);
2156                 if (r < 0)
2157                         goto fail;
2158
2159                 ev.events = EPOLLIN;
2160                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2161
2162                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2163                 if (r < 0) {
2164                         r = -errno;
2165                         goto fail;
2166                 }
2167
2168         } else {
2169                 if (e->watchdog_fd >= 0) {
2170                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2171                         close_nointr_nofail(e->watchdog_fd);
2172                         e->watchdog_fd = -1;
2173                 }
2174         }
2175
2176         e->watchdog = !!b;
2177         return e->watchdog;
2178
2179 fail:
2180         close_nointr_nofail(e->watchdog_fd);
2181         e->watchdog_fd = -1;
2182         return r;
2183 }
2184
2185 _public_ int sd_event_get_watchdog(sd_event *e) {
2186         assert_return(e, -EINVAL);
2187         assert_return(!event_pid_changed(e), -ECHILD);
2188
2189         return e->watchdog;
2190 }