chiark / gitweb /
sd-event: when determining the last allowed time a time event may elapse, deal with...
[elogind.git] / src / libelogind / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-daemon.h"
27 #include "sd-event.h"
28 #include "sd-id128.h"
29
30 #include "alloc-util.h"
31 #include "fd-util.h"
32 #include "hashmap.h"
33 #include "list.h"
34 #include "macro.h"
35 #include "missing.h"
36 #include "prioq.h"
37 #include "process-util.h"
38 #include "set.h"
39 #include "signal-util.h"
40 #include "string-table.h"
41 #include "string-util.h"
42 #include "time-util.h"
43 #include "util.h"
44
45 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
46
47 typedef enum EventSourceType {
48         SOURCE_IO,
49         SOURCE_TIME_REALTIME,
50         SOURCE_TIME_BOOTTIME,
51         SOURCE_TIME_MONOTONIC,
52         SOURCE_TIME_REALTIME_ALARM,
53         SOURCE_TIME_BOOTTIME_ALARM,
54         SOURCE_SIGNAL,
55         SOURCE_CHILD,
56         SOURCE_DEFER,
57         SOURCE_POST,
58         SOURCE_EXIT,
59         SOURCE_WATCHDOG,
60         _SOURCE_EVENT_SOURCE_TYPE_MAX,
61         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
62 } EventSourceType;
63
64 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
65         [SOURCE_IO] = "io",
66         [SOURCE_TIME_REALTIME] = "realtime",
67         [SOURCE_TIME_BOOTTIME] = "bootime",
68         [SOURCE_TIME_MONOTONIC] = "monotonic",
69         [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
70         [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
71         [SOURCE_SIGNAL] = "signal",
72         [SOURCE_CHILD] = "child",
73         [SOURCE_DEFER] = "defer",
74         [SOURCE_POST] = "post",
75         [SOURCE_EXIT] = "exit",
76         [SOURCE_WATCHDOG] = "watchdog",
77 };
78
79 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
80
81 /* All objects we use in epoll events start with this value, so that
82  * we know how to dispatch it */
83 typedef enum WakeupType {
84         WAKEUP_NONE,
85         WAKEUP_EVENT_SOURCE,
86         WAKEUP_CLOCK_DATA,
87         WAKEUP_SIGNAL_DATA,
88         _WAKEUP_TYPE_MAX,
89         _WAKEUP_TYPE_INVALID = -1,
90 } WakeupType;
91
92 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
93
94 struct sd_event_source {
95         WakeupType wakeup;
96
97         unsigned n_ref;
98
99         sd_event *event;
100         void *userdata;
101         sd_event_handler_t prepare;
102
103         char *description;
104
105         EventSourceType type:5;
106         int enabled:3;
107         bool pending:1;
108         bool dispatching:1;
109         bool floating:1;
110
111         int64_t priority;
112         unsigned pending_index;
113         unsigned prepare_index;
114         unsigned pending_iteration;
115         unsigned prepare_iteration;
116
117         LIST_FIELDS(sd_event_source, sources);
118
119         union {
120                 struct {
121                         sd_event_io_handler_t callback;
122                         int fd;
123                         uint32_t events;
124                         uint32_t revents;
125                         bool registered:1;
126                 } io;
127                 struct {
128                         sd_event_time_handler_t callback;
129                         usec_t next, accuracy;
130                         unsigned earliest_index;
131                         unsigned latest_index;
132                 } time;
133                 struct {
134                         sd_event_signal_handler_t callback;
135                         struct signalfd_siginfo siginfo;
136                         int sig;
137                 } signal;
138                 struct {
139                         sd_event_child_handler_t callback;
140                         siginfo_t siginfo;
141                         pid_t pid;
142                         int options;
143                 } child;
144                 struct {
145                         sd_event_handler_t callback;
146                 } defer;
147                 struct {
148                         sd_event_handler_t callback;
149                 } post;
150                 struct {
151                         sd_event_handler_t callback;
152                         unsigned prioq_index;
153                 } exit;
154         };
155 };
156
157 struct clock_data {
158         WakeupType wakeup;
159         int fd;
160
161         /* For all clocks we maintain two priority queues each, one
162          * ordered for the earliest times the events may be
163          * dispatched, and one ordered by the latest times they must
164          * have been dispatched. The range between the top entries in
165          * the two prioqs is the time window we can freely schedule
166          * wakeups in */
167
168         Prioq *earliest;
169         Prioq *latest;
170         usec_t next;
171
172         bool needs_rearm:1;
173 };
174
175 struct signal_data {
176         WakeupType wakeup;
177
178         /* For each priority we maintain one signal fd, so that we
179          * only have to dequeue a single event per priority at a
180          * time. */
181
182         int fd;
183         int64_t priority;
184         sigset_t sigset;
185         sd_event_source *current;
186 };
187
188 struct sd_event {
189         unsigned n_ref;
190
191         int epoll_fd;
192         int watchdog_fd;
193
194         Prioq *pending;
195         Prioq *prepare;
196
197         /* timerfd_create() only supports these five clocks so far. We
198          * can add support for more clocks when the kernel learns to
199          * deal with them, too. */
200         struct clock_data realtime;
201         struct clock_data boottime;
202         struct clock_data monotonic;
203         struct clock_data realtime_alarm;
204         struct clock_data boottime_alarm;
205
206         usec_t perturb;
207
208         sd_event_source **signal_sources; /* indexed by signal number */
209         Hashmap *signal_data; /* indexed by priority */
210
211         Hashmap *child_sources;
212         unsigned n_enabled_child_sources;
213
214         Set *post_sources;
215
216         Prioq *exit;
217
218         pid_t original_pid;
219
220         unsigned iteration;
221         dual_timestamp timestamp;
222         usec_t timestamp_boottime;
223         int state;
224
225         bool exit_requested:1;
226         bool need_process_child:1;
227         bool watchdog:1;
228         bool profile_delays:1;
229
230         int exit_code;
231
232         pid_t tid;
233         sd_event **default_event_ptr;
234
235         usec_t watchdog_last, watchdog_period;
236
237         unsigned n_sources;
238
239         LIST_HEAD(sd_event_source, sources);
240
241         usec_t last_run, last_log;
242         unsigned delays[sizeof(usec_t) * 8];
243 };
244
245 static void source_disconnect(sd_event_source *s);
246
247 static int pending_prioq_compare(const void *a, const void *b) {
248         const sd_event_source *x = a, *y = b;
249
250         assert(x->pending);
251         assert(y->pending);
252
253         /* Enabled ones first */
254         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
255                 return -1;
256         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
257                 return 1;
258
259         /* Lower priority values first */
260         if (x->priority < y->priority)
261                 return -1;
262         if (x->priority > y->priority)
263                 return 1;
264
265         /* Older entries first */
266         if (x->pending_iteration < y->pending_iteration)
267                 return -1;
268         if (x->pending_iteration > y->pending_iteration)
269                 return 1;
270
271         return 0;
272 }
273
274 static int prepare_prioq_compare(const void *a, const void *b) {
275         const sd_event_source *x = a, *y = b;
276
277         assert(x->prepare);
278         assert(y->prepare);
279
280         /* Enabled ones first */
281         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
282                 return -1;
283         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
284                 return 1;
285
286         /* Move most recently prepared ones last, so that we can stop
287          * preparing as soon as we hit one that has already been
288          * prepared in the current iteration */
289         if (x->prepare_iteration < y->prepare_iteration)
290                 return -1;
291         if (x->prepare_iteration > y->prepare_iteration)
292                 return 1;
293
294         /* Lower priority values first */
295         if (x->priority < y->priority)
296                 return -1;
297         if (x->priority > y->priority)
298                 return 1;
299
300         return 0;
301 }
302
303 static int earliest_time_prioq_compare(const void *a, const void *b) {
304         const sd_event_source *x = a, *y = b;
305
306         assert(EVENT_SOURCE_IS_TIME(x->type));
307         assert(x->type == y->type);
308
309         /* Enabled ones first */
310         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
311                 return -1;
312         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
313                 return 1;
314
315         /* Move the pending ones to the end */
316         if (!x->pending && y->pending)
317                 return -1;
318         if (x->pending && !y->pending)
319                 return 1;
320
321         /* Order by time */
322         if (x->time.next < y->time.next)
323                 return -1;
324         if (x->time.next > y->time.next)
325                 return 1;
326
327         return 0;
328 }
329
330 static usec_t time_event_source_latest(const sd_event_source *s) {
331         return usec_add(s->time.next, s->time.accuracy);
332 }
333
334 static int latest_time_prioq_compare(const void *a, const void *b) {
335         const sd_event_source *x = a, *y = b;
336
337         assert(EVENT_SOURCE_IS_TIME(x->type));
338         assert(x->type == y->type);
339
340         /* Enabled ones first */
341         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
342                 return -1;
343         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
344                 return 1;
345
346         /* Move the pending ones to the end */
347         if (!x->pending && y->pending)
348                 return -1;
349         if (x->pending && !y->pending)
350                 return 1;
351
352         /* Order by time */
353         if (time_event_source_latest(x) < time_event_source_latest(y))
354                 return -1;
355         if (time_event_source_latest(x) > time_event_source_latest(y))
356                 return 1;
357
358         return 0;
359 }
360
361 static int exit_prioq_compare(const void *a, const void *b) {
362         const sd_event_source *x = a, *y = b;
363
364         assert(x->type == SOURCE_EXIT);
365         assert(y->type == SOURCE_EXIT);
366
367         /* Enabled ones first */
368         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
369                 return -1;
370         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
371                 return 1;
372
373         /* Lower priority values first */
374         if (x->priority < y->priority)
375                 return -1;
376         if (x->priority > y->priority)
377                 return 1;
378
379         return 0;
380 }
381
382 static void free_clock_data(struct clock_data *d) {
383         assert(d);
384         assert(d->wakeup == WAKEUP_CLOCK_DATA);
385
386         safe_close(d->fd);
387         prioq_free(d->earliest);
388         prioq_free(d->latest);
389 }
390
391 static void event_free(sd_event *e) {
392         sd_event_source *s;
393
394         assert(e);
395
396         while ((s = e->sources)) {
397                 assert(s->floating);
398                 source_disconnect(s);
399                 sd_event_source_unref(s);
400         }
401
402         assert(e->n_sources == 0);
403
404         if (e->default_event_ptr)
405                 *(e->default_event_ptr) = NULL;
406
407         safe_close(e->epoll_fd);
408         safe_close(e->watchdog_fd);
409
410         free_clock_data(&e->realtime);
411         free_clock_data(&e->boottime);
412         free_clock_data(&e->monotonic);
413         free_clock_data(&e->realtime_alarm);
414         free_clock_data(&e->boottime_alarm);
415
416         prioq_free(e->pending);
417         prioq_free(e->prepare);
418         prioq_free(e->exit);
419
420         free(e->signal_sources);
421         hashmap_free(e->signal_data);
422
423         hashmap_free(e->child_sources);
424         set_free(e->post_sources);
425         free(e);
426 }
427
428 _public_ int sd_event_new(sd_event** ret) {
429         sd_event *e;
430         int r;
431
432         assert_return(ret, -EINVAL);
433
434         e = new0(sd_event, 1);
435         if (!e)
436                 return -ENOMEM;
437
438         e->n_ref = 1;
439         e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
440         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
441         e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
442         e->original_pid = getpid();
443         e->perturb = USEC_INFINITY;
444
445         r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
446         if (r < 0)
447                 goto fail;
448
449         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
450         if (e->epoll_fd < 0) {
451                 r = -errno;
452                 goto fail;
453         }
454
455         if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
456                 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
457                 e->profile_delays = true;
458         }
459
460         *ret = e;
461         return 0;
462
463 fail:
464         event_free(e);
465         return r;
466 }
467
468 _public_ sd_event* sd_event_ref(sd_event *e) {
469         assert_return(e, NULL);
470
471         assert(e->n_ref >= 1);
472         e->n_ref++;
473
474         return e;
475 }
476
477 _public_ sd_event* sd_event_unref(sd_event *e) {
478
479         if (!e)
480                 return NULL;
481
482         assert(e->n_ref >= 1);
483         e->n_ref--;
484
485         if (e->n_ref <= 0)
486                 event_free(e);
487
488         return NULL;
489 }
490
491 static bool event_pid_changed(sd_event *e) {
492         assert(e);
493
494         /* We don't support people creating an event loop and keeping
495          * it around over a fork(). Let's complain. */
496
497         return e->original_pid != getpid();
498 }
499
500 static void source_io_unregister(sd_event_source *s) {
501         int r;
502
503         assert(s);
504         assert(s->type == SOURCE_IO);
505
506         if (event_pid_changed(s->event))
507                 return;
508
509         if (!s->io.registered)
510                 return;
511
512         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
513         if (r < 0)
514                 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
515                                 strna(s->description), event_source_type_to_string(s->type));
516
517         s->io.registered = false;
518 }
519
520 static int source_io_register(
521                 sd_event_source *s,
522                 int enabled,
523                 uint32_t events) {
524
525         struct epoll_event ev = {};
526         int r;
527
528         assert(s);
529         assert(s->type == SOURCE_IO);
530         assert(enabled != SD_EVENT_OFF);
531
532         ev.events = events;
533         ev.data.ptr = s;
534
535         if (enabled == SD_EVENT_ONESHOT)
536                 ev.events |= EPOLLONESHOT;
537
538         if (s->io.registered)
539                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
540         else
541                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
542         if (r < 0)
543                 return -errno;
544
545         s->io.registered = true;
546
547         return 0;
548 }
549
550 #if 0 /// UNNEEDED by elogind
551 static clockid_t event_source_type_to_clock(EventSourceType t) {
552
553         switch (t) {
554
555         case SOURCE_TIME_REALTIME:
556                 return CLOCK_REALTIME;
557
558         case SOURCE_TIME_BOOTTIME:
559                 return CLOCK_BOOTTIME;
560
561         case SOURCE_TIME_MONOTONIC:
562                 return CLOCK_MONOTONIC;
563
564         case SOURCE_TIME_REALTIME_ALARM:
565                 return CLOCK_REALTIME_ALARM;
566
567         case SOURCE_TIME_BOOTTIME_ALARM:
568                 return CLOCK_BOOTTIME_ALARM;
569
570         default:
571                 return (clockid_t) -1;
572         }
573 }
574 #endif // 0
575
576 static EventSourceType clock_to_event_source_type(clockid_t clock) {
577
578         switch (clock) {
579
580         case CLOCK_REALTIME:
581                 return SOURCE_TIME_REALTIME;
582
583         case CLOCK_BOOTTIME:
584                 return SOURCE_TIME_BOOTTIME;
585
586         case CLOCK_MONOTONIC:
587                 return SOURCE_TIME_MONOTONIC;
588
589         case CLOCK_REALTIME_ALARM:
590                 return SOURCE_TIME_REALTIME_ALARM;
591
592         case CLOCK_BOOTTIME_ALARM:
593                 return SOURCE_TIME_BOOTTIME_ALARM;
594
595         default:
596                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
597         }
598 }
599
600 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
601         assert(e);
602
603         switch (t) {
604
605         case SOURCE_TIME_REALTIME:
606                 return &e->realtime;
607
608         case SOURCE_TIME_BOOTTIME:
609                 return &e->boottime;
610
611         case SOURCE_TIME_MONOTONIC:
612                 return &e->monotonic;
613
614         case SOURCE_TIME_REALTIME_ALARM:
615                 return &e->realtime_alarm;
616
617         case SOURCE_TIME_BOOTTIME_ALARM:
618                 return &e->boottime_alarm;
619
620         default:
621                 return NULL;
622         }
623 }
624
625 static int event_make_signal_data(
626                 sd_event *e,
627                 int sig,
628                 struct signal_data **ret) {
629
630         struct epoll_event ev = {};
631         struct signal_data *d;
632         bool added = false;
633         sigset_t ss_copy;
634         int64_t priority;
635         int r;
636
637         assert(e);
638
639         if (event_pid_changed(e))
640                 return -ECHILD;
641
642         if (e->signal_sources && e->signal_sources[sig])
643                 priority = e->signal_sources[sig]->priority;
644         else
645                 priority = 0;
646
647         d = hashmap_get(e->signal_data, &priority);
648         if (d) {
649                 if (sigismember(&d->sigset, sig) > 0) {
650                         if (ret)
651                                 *ret = d;
652                 return 0;
653                 }
654         } else {
655                 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
656                 if (r < 0)
657                         return r;
658
659                 d = new0(struct signal_data, 1);
660                 if (!d)
661                         return -ENOMEM;
662
663                 d->wakeup = WAKEUP_SIGNAL_DATA;
664                 d->fd  = -1;
665                 d->priority = priority;
666
667                 r = hashmap_put(e->signal_data, &d->priority, d);
668         if (r < 0)
669                         return r;
670
671                 added = true;
672         }
673
674         ss_copy = d->sigset;
675         assert_se(sigaddset(&ss_copy, sig) >= 0);
676
677         r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
678         if (r < 0) {
679                 r = -errno;
680                 goto fail;
681         }
682
683         d->sigset = ss_copy;
684
685         if (d->fd >= 0) {
686                 if (ret)
687                         *ret = d;
688                 return 0;
689         }
690
691         d->fd = r;
692
693         ev.events = EPOLLIN;
694         ev.data.ptr = d;
695
696         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
697         if (r < 0) {
698                 r = -errno;
699                 goto fail;
700         }
701
702         if (ret)
703                 *ret = d;
704
705         return 0;
706
707 fail:
708         if (added) {
709                 d->fd = safe_close(d->fd);
710                 hashmap_remove(e->signal_data, &d->priority);
711                 free(d);
712         }
713
714         return r;
715 }
716
717 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
718         assert(e);
719         assert(d);
720
721         /* Turns off the specified signal in the signal data
722          * object. If the signal mask of the object becomes empty that
723          * way removes it. */
724
725         if (sigismember(&d->sigset, sig) == 0)
726                 return;
727
728         assert_se(sigdelset(&d->sigset, sig) >= 0);
729
730         if (sigisemptyset(&d->sigset)) {
731
732                 /* If all the mask is all-zero we can get rid of the structure */
733                 hashmap_remove(e->signal_data, &d->priority);
734                 assert(!d->current);
735                 safe_close(d->fd);
736                 free(d);
737                 return;
738         }
739
740         assert(d->fd >= 0);
741
742         if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
743                 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
744 }
745
746 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
747         struct signal_data *d;
748         static const int64_t zero_priority = 0;
749
750         assert(e);
751
752         /* Rechecks if the specified signal is still something we are
753          * interested in. If not, we'll unmask it, and possibly drop
754          * the signalfd for it. */
755
756         if (sig == SIGCHLD &&
757             e->n_enabled_child_sources > 0)
758                 return;
759
760         if (e->signal_sources &&
761             e->signal_sources[sig] &&
762             e->signal_sources[sig]->enabled != SD_EVENT_OFF)
763                 return;
764
765         /*
766          * The specified signal might be enabled in three different queues:
767          *
768          * 1) the one that belongs to the priority passed (if it is non-NULL)
769          * 2) the one that belongs to the priority of the event source of the signal (if there is one)
770          * 3) the 0 priority (to cover the SIGCHLD case)
771          *
772          * Hence, let's remove it from all three here.
773          */
774
775         if (priority) {
776                 d = hashmap_get(e->signal_data, priority);
777                 if (d)
778                         event_unmask_signal_data(e, d, sig);
779         }
780
781         if (e->signal_sources && e->signal_sources[sig]) {
782                 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
783                 if (d)
784                         event_unmask_signal_data(e, d, sig);
785         }
786
787         d = hashmap_get(e->signal_data, &zero_priority);
788         if (d)
789                 event_unmask_signal_data(e, d, sig);
790 }
791
792 static void source_disconnect(sd_event_source *s) {
793         sd_event *event;
794
795         assert(s);
796
797         if (!s->event)
798                 return;
799
800         assert(s->event->n_sources > 0);
801
802         switch (s->type) {
803
804         case SOURCE_IO:
805                 if (s->io.fd >= 0)
806                         source_io_unregister(s);
807
808                 break;
809
810         case SOURCE_TIME_REALTIME:
811         case SOURCE_TIME_BOOTTIME:
812         case SOURCE_TIME_MONOTONIC:
813         case SOURCE_TIME_REALTIME_ALARM:
814         case SOURCE_TIME_BOOTTIME_ALARM: {
815                 struct clock_data *d;
816
817                 d = event_get_clock_data(s->event, s->type);
818                 assert(d);
819
820                 prioq_remove(d->earliest, s, &s->time.earliest_index);
821                 prioq_remove(d->latest, s, &s->time.latest_index);
822                 d->needs_rearm = true;
823                 break;
824         }
825
826         case SOURCE_SIGNAL:
827                 if (s->signal.sig > 0) {
828
829                         if (s->event->signal_sources)
830                                 s->event->signal_sources[s->signal.sig] = NULL;
831
832                         event_gc_signal_data(s->event, &s->priority, s->signal.sig);
833                 }
834
835                 break;
836
837         case SOURCE_CHILD:
838                 if (s->child.pid > 0) {
839                         if (s->enabled != SD_EVENT_OFF) {
840                                 assert(s->event->n_enabled_child_sources > 0);
841                                 s->event->n_enabled_child_sources--;
842                         }
843
844                         (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
845                         event_gc_signal_data(s->event, &s->priority, SIGCHLD);
846                 }
847
848                 break;
849
850         case SOURCE_DEFER:
851                 /* nothing */
852                 break;
853
854         case SOURCE_POST:
855                 set_remove(s->event->post_sources, s);
856                 break;
857
858         case SOURCE_EXIT:
859                 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
860                 break;
861
862         default:
863                 assert_not_reached("Wut? I shouldn't exist.");
864         }
865
866         if (s->pending)
867                 prioq_remove(s->event->pending, s, &s->pending_index);
868
869         if (s->prepare)
870                 prioq_remove(s->event->prepare, s, &s->prepare_index);
871
872         event = s->event;
873
874         s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
875         s->event = NULL;
876         LIST_REMOVE(sources, event->sources, s);
877         event->n_sources--;
878
879         if (!s->floating)
880                 sd_event_unref(event);
881 }
882
883 static void source_free(sd_event_source *s) {
884         assert(s);
885
886         source_disconnect(s);
887         free(s->description);
888         free(s);
889 }
890
891 static int source_set_pending(sd_event_source *s, bool b) {
892         int r;
893
894         assert(s);
895         assert(s->type != SOURCE_EXIT);
896
897         if (s->pending == b)
898                 return 0;
899
900         s->pending = b;
901
902         if (b) {
903                 s->pending_iteration = s->event->iteration;
904
905                 r = prioq_put(s->event->pending, s, &s->pending_index);
906                 if (r < 0) {
907                         s->pending = false;
908                         return r;
909                 }
910         } else
911                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912
913         if (EVENT_SOURCE_IS_TIME(s->type)) {
914                 struct clock_data *d;
915
916                 d = event_get_clock_data(s->event, s->type);
917                 assert(d);
918
919                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921                 d->needs_rearm = true;
922         }
923
924         if (s->type == SOURCE_SIGNAL && !b) {
925                 struct signal_data *d;
926
927                 d = hashmap_get(s->event->signal_data, &s->priority);
928                 if (d && d->current == s)
929                         d->current = NULL;
930         }
931
932         return 0;
933 }
934
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
936         sd_event_source *s;
937
938         assert(e);
939
940         s = new0(sd_event_source, 1);
941         if (!s)
942                 return NULL;
943
944         s->n_ref = 1;
945         s->event = e;
946         s->floating = floating;
947         s->type = type;
948         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
949
950         if (!floating)
951                 sd_event_ref(e);
952
953         LIST_PREPEND(sources, e->sources, s);
954         e->n_sources ++;
955
956         return s;
957 }
958
959 _public_ int sd_event_add_io(
960                 sd_event *e,
961                 sd_event_source **ret,
962                 int fd,
963                 uint32_t events,
964                 sd_event_io_handler_t callback,
965                 void *userdata) {
966
967         sd_event_source *s;
968         int r;
969
970         assert_return(e, -EINVAL);
971         assert_return(fd >= 0, -EBADF);
972         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
973         assert_return(callback, -EINVAL);
974         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
975         assert_return(!event_pid_changed(e), -ECHILD);
976
977         s = source_new(e, !ret, SOURCE_IO);
978         if (!s)
979                 return -ENOMEM;
980
981         s->wakeup = WAKEUP_EVENT_SOURCE;
982         s->io.fd = fd;
983         s->io.events = events;
984         s->io.callback = callback;
985         s->userdata = userdata;
986         s->enabled = SD_EVENT_ON;
987
988         r = source_io_register(s, s->enabled, events);
989         if (r < 0) {
990                 source_free(s);
991                 return r;
992         }
993
994         if (ret)
995                 *ret = s;
996
997         return 0;
998 }
999
1000 static void initialize_perturb(sd_event *e) {
1001         sd_id128_t bootid = {};
1002
1003         /* When we sleep for longer, we try to realign the wakeup to
1004            the same time wihtin each minute/second/250ms, so that
1005            events all across the system can be coalesced into a single
1006            CPU wakeup. However, let's take some system-specific
1007            randomness for this value, so that in a network of systems
1008            with synced clocks timer events are distributed a
1009            bit. Here, we calculate a perturbation usec offset from the
1010            boot ID. */
1011
1012         if (_likely_(e->perturb != USEC_INFINITY))
1013                 return;
1014
1015         if (sd_id128_get_boot(&bootid) >= 0)
1016                 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1017 }
1018
1019 static int event_setup_timer_fd(
1020                 sd_event *e,
1021                 struct clock_data *d,
1022                 clockid_t clock) {
1023
1024         struct epoll_event ev = {};
1025         int r, fd;
1026
1027         assert(e);
1028         assert(d);
1029
1030         if (_likely_(d->fd >= 0))
1031                 return 0;
1032
1033         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1034         if (fd < 0)
1035                 return -errno;
1036
1037         ev.events = EPOLLIN;
1038         ev.data.ptr = d;
1039
1040         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1041         if (r < 0) {
1042                 safe_close(fd);
1043                 return -errno;
1044         }
1045
1046         d->fd = fd;
1047         return 0;
1048 }
1049
1050 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1051         assert(s);
1052
1053         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1054 }
1055
1056 _public_ int sd_event_add_time(
1057                 sd_event *e,
1058                 sd_event_source **ret,
1059                 clockid_t clock,
1060                 uint64_t usec,
1061                 uint64_t accuracy,
1062                 sd_event_time_handler_t callback,
1063                 void *userdata) {
1064
1065         EventSourceType type;
1066         sd_event_source *s;
1067         struct clock_data *d;
1068         int r;
1069
1070         assert_return(e, -EINVAL);
1071         assert_return(usec != (uint64_t) -1, -EINVAL);
1072         assert_return(accuracy != (uint64_t) -1, -EINVAL);
1073         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1074         assert_return(!event_pid_changed(e), -ECHILD);
1075
1076         if (!callback)
1077                 callback = time_exit_callback;
1078
1079         type = clock_to_event_source_type(clock);
1080         assert_return(type >= 0, -EOPNOTSUPP);
1081
1082         d = event_get_clock_data(e, type);
1083         assert(d);
1084
1085         r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1086         if (r < 0)
1087                 return r;
1088
1089         r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1090         if (r < 0)
1091                 return r;
1092
1093         if (d->fd < 0) {
1094                 r = event_setup_timer_fd(e, d, clock);
1095                 if (r < 0)
1096                         return r;
1097         }
1098
1099         s = source_new(e, !ret, type);
1100         if (!s)
1101                 return -ENOMEM;
1102
1103         s->time.next = usec;
1104         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1105         s->time.callback = callback;
1106         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1107         s->userdata = userdata;
1108         s->enabled = SD_EVENT_ONESHOT;
1109
1110         d->needs_rearm = true;
1111
1112         r = prioq_put(d->earliest, s, &s->time.earliest_index);
1113         if (r < 0)
1114                 goto fail;
1115
1116         r = prioq_put(d->latest, s, &s->time.latest_index);
1117         if (r < 0)
1118                 goto fail;
1119
1120         if (ret)
1121                 *ret = s;
1122
1123         return 0;
1124
1125 fail:
1126         source_free(s);
1127         return r;
1128 }
1129
1130 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1131         assert(s);
1132
1133         return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1134 }
1135
1136 _public_ int sd_event_add_signal(
1137                 sd_event *e,
1138                 sd_event_source **ret,
1139                 int sig,
1140                 sd_event_signal_handler_t callback,
1141                 void *userdata) {
1142
1143         sd_event_source *s;
1144         struct signal_data *d;
1145         sigset_t ss;
1146         int r;
1147
1148         assert_return(e, -EINVAL);
1149         assert_return(sig > 0, -EINVAL);
1150         assert_return(sig < _NSIG, -EINVAL);
1151         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1152         assert_return(!event_pid_changed(e), -ECHILD);
1153
1154         if (!callback)
1155                 callback = signal_exit_callback;
1156
1157         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1158         if (r != 0)
1159                 return -r;
1160
1161         if (!sigismember(&ss, sig))
1162                 return -EBUSY;
1163
1164         if (!e->signal_sources) {
1165                 e->signal_sources = new0(sd_event_source*, _NSIG);
1166                 if (!e->signal_sources)
1167                         return -ENOMEM;
1168         } else if (e->signal_sources[sig])
1169                 return -EBUSY;
1170
1171         s = source_new(e, !ret, SOURCE_SIGNAL);
1172         if (!s)
1173                 return -ENOMEM;
1174
1175         s->signal.sig = sig;
1176         s->signal.callback = callback;
1177         s->userdata = userdata;
1178         s->enabled = SD_EVENT_ON;
1179
1180         e->signal_sources[sig] = s;
1181
1182         r = event_make_signal_data(e, sig, &d);
1183                 if (r < 0) {
1184                         source_free(s);
1185                         return r;
1186                 }
1187
1188         /* Use the signal name as description for the event source by default */
1189         (void) sd_event_source_set_description(s, signal_to_string(sig));
1190
1191         if (ret)
1192                 *ret = s;
1193
1194         return 0;
1195 }
1196
1197 #if 0 /// UNNEEDED by elogind
1198 _public_ int sd_event_add_child(
1199                 sd_event *e,
1200                 sd_event_source **ret,
1201                 pid_t pid,
1202                 int options,
1203                 sd_event_child_handler_t callback,
1204                 void *userdata) {
1205
1206         sd_event_source *s;
1207         int r;
1208
1209         assert_return(e, -EINVAL);
1210         assert_return(pid > 1, -EINVAL);
1211         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1212         assert_return(options != 0, -EINVAL);
1213         assert_return(callback, -EINVAL);
1214         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1215         assert_return(!event_pid_changed(e), -ECHILD);
1216
1217         r = hashmap_ensure_allocated(&e->child_sources, NULL);
1218         if (r < 0)
1219                 return r;
1220
1221         if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1222                 return -EBUSY;
1223
1224         s = source_new(e, !ret, SOURCE_CHILD);
1225         if (!s)
1226                 return -ENOMEM;
1227
1228         s->child.pid = pid;
1229         s->child.options = options;
1230         s->child.callback = callback;
1231         s->userdata = userdata;
1232         s->enabled = SD_EVENT_ONESHOT;
1233
1234         r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1235         if (r < 0) {
1236                 source_free(s);
1237                 return r;
1238         }
1239
1240         e->n_enabled_child_sources ++;
1241
1242         r = event_make_signal_data(e, SIGCHLD, NULL);
1243                 if (r < 0) {
1244                 e->n_enabled_child_sources--;
1245                         source_free(s);
1246                         return r;
1247                 }
1248
1249         e->need_process_child = true;
1250
1251         if (ret)
1252                 *ret = s;
1253
1254         return 0;
1255 }
1256
1257 _public_ int sd_event_add_defer(
1258                 sd_event *e,
1259                 sd_event_source **ret,
1260                 sd_event_handler_t callback,
1261                 void *userdata) {
1262
1263         sd_event_source *s;
1264         int r;
1265
1266         assert_return(e, -EINVAL);
1267         assert_return(callback, -EINVAL);
1268         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1269         assert_return(!event_pid_changed(e), -ECHILD);
1270
1271         s = source_new(e, !ret, SOURCE_DEFER);
1272         if (!s)
1273                 return -ENOMEM;
1274
1275         s->defer.callback = callback;
1276         s->userdata = userdata;
1277         s->enabled = SD_EVENT_ONESHOT;
1278
1279         r = source_set_pending(s, true);
1280         if (r < 0) {
1281                 source_free(s);
1282                 return r;
1283         }
1284
1285         if (ret)
1286                 *ret = s;
1287
1288         return 0;
1289 }
1290 #endif // 0
1291
1292 _public_ int sd_event_add_post(
1293                 sd_event *e,
1294                 sd_event_source **ret,
1295                 sd_event_handler_t callback,
1296                 void *userdata) {
1297
1298         sd_event_source *s;
1299         int r;
1300
1301         assert_return(e, -EINVAL);
1302         assert_return(callback, -EINVAL);
1303         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1304         assert_return(!event_pid_changed(e), -ECHILD);
1305
1306         r = set_ensure_allocated(&e->post_sources, NULL);
1307         if (r < 0)
1308                 return r;
1309
1310         s = source_new(e, !ret, SOURCE_POST);
1311         if (!s)
1312                 return -ENOMEM;
1313
1314         s->post.callback = callback;
1315         s->userdata = userdata;
1316         s->enabled = SD_EVENT_ON;
1317
1318         r = set_put(e->post_sources, s);
1319         if (r < 0) {
1320                 source_free(s);
1321                 return r;
1322         }
1323
1324         if (ret)
1325                 *ret = s;
1326
1327         return 0;
1328 }
1329
1330 _public_ int sd_event_add_exit(
1331                 sd_event *e,
1332                 sd_event_source **ret,
1333                 sd_event_handler_t callback,
1334                 void *userdata) {
1335
1336         sd_event_source *s;
1337         int r;
1338
1339         assert_return(e, -EINVAL);
1340         assert_return(callback, -EINVAL);
1341         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1342         assert_return(!event_pid_changed(e), -ECHILD);
1343
1344         r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1345         if (r < 0)
1346                 return r;
1347
1348         s = source_new(e, !ret, SOURCE_EXIT);
1349         if (!s)
1350                 return -ENOMEM;
1351
1352         s->exit.callback = callback;
1353         s->userdata = userdata;
1354         s->exit.prioq_index = PRIOQ_IDX_NULL;
1355         s->enabled = SD_EVENT_ONESHOT;
1356
1357         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1358         if (r < 0) {
1359                 source_free(s);
1360                 return r;
1361         }
1362
1363         if (ret)
1364                 *ret = s;
1365
1366         return 0;
1367 }
1368
1369 #if 0 /// UNNEEDED by elogind
1370 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1371         assert_return(s, NULL);
1372
1373         assert(s->n_ref >= 1);
1374         s->n_ref++;
1375
1376         return s;
1377 }
1378 #endif // 0
1379
1380 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1381
1382         if (!s)
1383                 return NULL;
1384
1385         assert(s->n_ref >= 1);
1386         s->n_ref--;
1387
1388         if (s->n_ref <= 0) {
1389                 /* Here's a special hack: when we are called from a
1390                  * dispatch handler we won't free the event source
1391                  * immediately, but we will detach the fd from the
1392                  * epoll. This way it is safe for the caller to unref
1393                  * the event source and immediately close the fd, but
1394                  * we still retain a valid event source object after
1395                  * the callback. */
1396
1397                 if (s->dispatching) {
1398                         if (s->type == SOURCE_IO)
1399                                 source_io_unregister(s);
1400
1401                         source_disconnect(s);
1402                 } else
1403                         source_free(s);
1404         }
1405
1406         return NULL;
1407 }
1408
1409 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1410         assert_return(s, -EINVAL);
1411         assert_return(!event_pid_changed(s->event), -ECHILD);
1412
1413         return free_and_strdup(&s->description, description);
1414 }
1415
1416 #if 0 /// UNNEEDED by elogind
1417 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1418         assert_return(s, -EINVAL);
1419         assert_return(description, -EINVAL);
1420         assert_return(s->description, -ENXIO);
1421         assert_return(!event_pid_changed(s->event), -ECHILD);
1422
1423         *description = s->description;
1424         return 0;
1425 }
1426 #endif // 0
1427
1428 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1429         assert_return(s, NULL);
1430
1431         return s->event;
1432 }
1433
1434 #if 0 /// UNNEEDED by elogind
1435 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1436         assert_return(s, -EINVAL);
1437         assert_return(s->type != SOURCE_EXIT, -EDOM);
1438         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1439         assert_return(!event_pid_changed(s->event), -ECHILD);
1440
1441         return s->pending;
1442 }
1443
1444 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1445         assert_return(s, -EINVAL);
1446         assert_return(s->type == SOURCE_IO, -EDOM);
1447         assert_return(!event_pid_changed(s->event), -ECHILD);
1448
1449         return s->io.fd;
1450 }
1451 #endif // 0
1452
1453 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1454         int r;
1455
1456         assert_return(s, -EINVAL);
1457         assert_return(fd >= 0, -EBADF);
1458         assert_return(s->type == SOURCE_IO, -EDOM);
1459         assert_return(!event_pid_changed(s->event), -ECHILD);
1460
1461         if (s->io.fd == fd)
1462                 return 0;
1463
1464         if (s->enabled == SD_EVENT_OFF) {
1465                 s->io.fd = fd;
1466                 s->io.registered = false;
1467         } else {
1468                 int saved_fd;
1469
1470                 saved_fd = s->io.fd;
1471                 assert(s->io.registered);
1472
1473                 s->io.fd = fd;
1474                 s->io.registered = false;
1475
1476                 r = source_io_register(s, s->enabled, s->io.events);
1477                 if (r < 0) {
1478                         s->io.fd = saved_fd;
1479                         s->io.registered = true;
1480                         return r;
1481                 }
1482
1483                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1484         }
1485
1486         return 0;
1487 }
1488
1489 #if 0 /// UNNEEDED by elogind
1490 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1491         assert_return(s, -EINVAL);
1492         assert_return(events, -EINVAL);
1493         assert_return(s->type == SOURCE_IO, -EDOM);
1494         assert_return(!event_pid_changed(s->event), -ECHILD);
1495
1496         *events = s->io.events;
1497         return 0;
1498 }
1499 #endif // 0
1500
1501 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1502         int r;
1503
1504         assert_return(s, -EINVAL);
1505         assert_return(s->type == SOURCE_IO, -EDOM);
1506         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1507         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1508         assert_return(!event_pid_changed(s->event), -ECHILD);
1509
1510         /* edge-triggered updates are never skipped, so we can reset edges */
1511         if (s->io.events == events && !(events & EPOLLET))
1512                 return 0;
1513
1514         if (s->enabled != SD_EVENT_OFF) {
1515                 r = source_io_register(s, s->enabled, events);
1516                 if (r < 0)
1517                         return r;
1518         }
1519
1520         s->io.events = events;
1521         source_set_pending(s, false);
1522
1523         return 0;
1524 }
1525
1526 #if 0 /// UNNEEDED by elogind
1527 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1528         assert_return(s, -EINVAL);
1529         assert_return(revents, -EINVAL);
1530         assert_return(s->type == SOURCE_IO, -EDOM);
1531         assert_return(s->pending, -ENODATA);
1532         assert_return(!event_pid_changed(s->event), -ECHILD);
1533
1534         *revents = s->io.revents;
1535         return 0;
1536 }
1537
1538 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1539         assert_return(s, -EINVAL);
1540         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1541         assert_return(!event_pid_changed(s->event), -ECHILD);
1542
1543         return s->signal.sig;
1544 }
1545
1546 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1547         assert_return(s, -EINVAL);
1548         assert_return(!event_pid_changed(s->event), -ECHILD);
1549
1550         return s->priority;
1551 }
1552 #endif // 0
1553
1554 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1555         int r;
1556
1557         assert_return(s, -EINVAL);
1558         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1559         assert_return(!event_pid_changed(s->event), -ECHILD);
1560
1561         if (s->priority == priority)
1562                 return 0;
1563
1564         if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1565                 struct signal_data *old, *d;
1566
1567                 /* Move us from the signalfd belonging to the old
1568                  * priority to the signalfd of the new priority */
1569
1570                 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1571
1572                 s->priority = priority;
1573
1574                 r = event_make_signal_data(s->event, s->signal.sig, &d);
1575                 if (r < 0) {
1576                         s->priority = old->priority;
1577                         return r;
1578                 }
1579
1580                 event_unmask_signal_data(s->event, old, s->signal.sig);
1581         } else
1582         s->priority = priority;
1583
1584         if (s->pending)
1585                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1586
1587         if (s->prepare)
1588                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1589
1590         if (s->type == SOURCE_EXIT)
1591                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1592
1593         return 0;
1594 }
1595
1596 #if 0 /// UNNEEDED by elogind
1597 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1598         assert_return(s, -EINVAL);
1599         assert_return(m, -EINVAL);
1600         assert_return(!event_pid_changed(s->event), -ECHILD);
1601
1602         *m = s->enabled;
1603         return 0;
1604 }
1605 #endif // 0
1606
1607 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1608         int r;
1609
1610         assert_return(s, -EINVAL);
1611         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1612         assert_return(!event_pid_changed(s->event), -ECHILD);
1613
1614         /* If we are dead anyway, we are fine with turning off
1615          * sources, but everything else needs to fail. */
1616         if (s->event->state == SD_EVENT_FINISHED)
1617                 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1618
1619         if (s->enabled == m)
1620                 return 0;
1621
1622         if (m == SD_EVENT_OFF) {
1623
1624                 switch (s->type) {
1625
1626                 case SOURCE_IO:
1627                         source_io_unregister(s);
1628                         s->enabled = m;
1629                         break;
1630
1631                 case SOURCE_TIME_REALTIME:
1632                 case SOURCE_TIME_BOOTTIME:
1633                 case SOURCE_TIME_MONOTONIC:
1634                 case SOURCE_TIME_REALTIME_ALARM:
1635                 case SOURCE_TIME_BOOTTIME_ALARM: {
1636                         struct clock_data *d;
1637
1638                         s->enabled = m;
1639                         d = event_get_clock_data(s->event, s->type);
1640                         assert(d);
1641
1642                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1643                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1644                         d->needs_rearm = true;
1645                         break;
1646                 }
1647
1648                 case SOURCE_SIGNAL:
1649                         s->enabled = m;
1650
1651                         event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1652                         break;
1653
1654                 case SOURCE_CHILD:
1655                         s->enabled = m;
1656
1657                         assert(s->event->n_enabled_child_sources > 0);
1658                         s->event->n_enabled_child_sources--;
1659
1660                         event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1661                         break;
1662
1663                 case SOURCE_EXIT:
1664                         s->enabled = m;
1665                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1666                         break;
1667
1668                 case SOURCE_DEFER:
1669                 case SOURCE_POST:
1670                         s->enabled = m;
1671                         break;
1672
1673                 default:
1674                         assert_not_reached("Wut? I shouldn't exist.");
1675                 }
1676
1677         } else {
1678                 switch (s->type) {
1679
1680                 case SOURCE_IO:
1681                         r = source_io_register(s, m, s->io.events);
1682                         if (r < 0)
1683                                 return r;
1684
1685                         s->enabled = m;
1686                         break;
1687
1688                 case SOURCE_TIME_REALTIME:
1689                 case SOURCE_TIME_BOOTTIME:
1690                 case SOURCE_TIME_MONOTONIC:
1691                 case SOURCE_TIME_REALTIME_ALARM:
1692                 case SOURCE_TIME_BOOTTIME_ALARM: {
1693                         struct clock_data *d;
1694
1695                         s->enabled = m;
1696                         d = event_get_clock_data(s->event, s->type);
1697                         assert(d);
1698
1699                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1700                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1701                         d->needs_rearm = true;
1702                         break;
1703                 }
1704
1705                 case SOURCE_SIGNAL:
1706
1707                         s->enabled = m;
1708
1709                         r = event_make_signal_data(s->event, s->signal.sig, NULL);
1710                                 if (r < 0) {
1711                                         s->enabled = SD_EVENT_OFF;
1712                                 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1713                                         return r;
1714                                 }
1715
1716                         break;
1717
1718                 case SOURCE_CHILD:
1719
1720                         if (s->enabled == SD_EVENT_OFF)
1721                                 s->event->n_enabled_child_sources++;
1722
1723                         s->enabled = m;
1724
1725                         r = event_make_signal_data(s->event, SIGCHLD, NULL);
1726                                         if (r < 0) {
1727                                                 s->enabled = SD_EVENT_OFF;
1728                                 s->event->n_enabled_child_sources--;
1729                                 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1730                                                 return r;
1731                                         }
1732
1733                         break;
1734
1735                 case SOURCE_EXIT:
1736                         s->enabled = m;
1737                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1738                         break;
1739
1740                 case SOURCE_DEFER:
1741                 case SOURCE_POST:
1742                         s->enabled = m;
1743                         break;
1744
1745                 default:
1746                         assert_not_reached("Wut? I shouldn't exist.");
1747                 }
1748         }
1749
1750         if (s->pending)
1751                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1752
1753         if (s->prepare)
1754                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1755
1756         return 0;
1757 }
1758
1759 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1760         assert_return(s, -EINVAL);
1761         assert_return(usec, -EINVAL);
1762         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1763         assert_return(!event_pid_changed(s->event), -ECHILD);
1764
1765         *usec = s->time.next;
1766         return 0;
1767 }
1768
1769 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1770         struct clock_data *d;
1771
1772         assert_return(s, -EINVAL);
1773         assert_return(usec != (uint64_t) -1, -EINVAL);
1774         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1775         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1776         assert_return(!event_pid_changed(s->event), -ECHILD);
1777
1778         s->time.next = usec;
1779
1780         source_set_pending(s, false);
1781
1782         d = event_get_clock_data(s->event, s->type);
1783         assert(d);
1784
1785         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1786         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1787         d->needs_rearm = true;
1788
1789         return 0;
1790 }
1791
1792 #if 0 /// UNNEEDED by elogind
1793 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1794         assert_return(s, -EINVAL);
1795         assert_return(usec, -EINVAL);
1796         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1797         assert_return(!event_pid_changed(s->event), -ECHILD);
1798
1799         *usec = s->time.accuracy;
1800         return 0;
1801 }
1802
1803 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1804         struct clock_data *d;
1805
1806         assert_return(s, -EINVAL);
1807         assert_return(usec != (uint64_t) -1, -EINVAL);
1808         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1809         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1810         assert_return(!event_pid_changed(s->event), -ECHILD);
1811
1812         if (usec == 0)
1813                 usec = DEFAULT_ACCURACY_USEC;
1814
1815         s->time.accuracy = usec;
1816
1817         source_set_pending(s, false);
1818
1819         d = event_get_clock_data(s->event, s->type);
1820         assert(d);
1821
1822         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1823         d->needs_rearm = true;
1824
1825         return 0;
1826 }
1827
1828 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1829         assert_return(s, -EINVAL);
1830         assert_return(clock, -EINVAL);
1831         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1832         assert_return(!event_pid_changed(s->event), -ECHILD);
1833
1834         *clock = event_source_type_to_clock(s->type);
1835         return 0;
1836 }
1837
1838 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1839         assert_return(s, -EINVAL);
1840         assert_return(pid, -EINVAL);
1841         assert_return(s->type == SOURCE_CHILD, -EDOM);
1842         assert_return(!event_pid_changed(s->event), -ECHILD);
1843
1844         *pid = s->child.pid;
1845         return 0;
1846 }
1847 #endif // 0
1848
1849 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1850         int r;
1851
1852         assert_return(s, -EINVAL);
1853         assert_return(s->type != SOURCE_EXIT, -EDOM);
1854         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1855         assert_return(!event_pid_changed(s->event), -ECHILD);
1856
1857         if (s->prepare == callback)
1858                 return 0;
1859
1860         if (callback && s->prepare) {
1861                 s->prepare = callback;
1862                 return 0;
1863         }
1864
1865         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1866         if (r < 0)
1867                 return r;
1868
1869         s->prepare = callback;
1870
1871         if (callback) {
1872                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1873                 if (r < 0)
1874                         return r;
1875         } else
1876                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1877
1878         return 0;
1879 }
1880
1881 #if 0 /// UNNEEDED by elogind
1882 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1883         assert_return(s, NULL);
1884
1885         return s->userdata;
1886 }
1887
1888 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1889         void *ret;
1890
1891         assert_return(s, NULL);
1892
1893         ret = s->userdata;
1894         s->userdata = userdata;
1895
1896         return ret;
1897 }
1898 #endif // 0
1899
1900 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1901         usec_t c;
1902         assert(e);
1903         assert(a <= b);
1904
1905         if (a <= 0)
1906                 return 0;
1907
1908         if (b <= a + 1)
1909                 return a;
1910
1911         initialize_perturb(e);
1912
1913         /*
1914           Find a good time to wake up again between times a and b. We
1915           have two goals here:
1916
1917           a) We want to wake up as seldom as possible, hence prefer
1918              later times over earlier times.
1919
1920           b) But if we have to wake up, then let's make sure to
1921              dispatch as much as possible on the entire system.
1922
1923           We implement this by waking up everywhere at the same time
1924           within any given minute if we can, synchronised via the
1925           perturbation value determined from the boot ID. If we can't,
1926           then we try to find the same spot in every 10s, then 1s and
1927           then 250ms step. Otherwise, we pick the last possible time
1928           to wake up.
1929         */
1930
1931         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1932         if (c >= b) {
1933                 if (_unlikely_(c < USEC_PER_MINUTE))
1934                         return b;
1935
1936                 c -= USEC_PER_MINUTE;
1937         }
1938
1939         if (c >= a)
1940                 return c;
1941
1942         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1943         if (c >= b) {
1944                 if (_unlikely_(c < USEC_PER_SEC*10))
1945                         return b;
1946
1947                 c -= USEC_PER_SEC*10;
1948         }
1949
1950         if (c >= a)
1951                 return c;
1952
1953         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1954         if (c >= b) {
1955                 if (_unlikely_(c < USEC_PER_SEC))
1956                         return b;
1957
1958                 c -= USEC_PER_SEC;
1959         }
1960
1961         if (c >= a)
1962                 return c;
1963
1964         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1965         if (c >= b) {
1966                 if (_unlikely_(c < USEC_PER_MSEC*250))
1967                         return b;
1968
1969                 c -= USEC_PER_MSEC*250;
1970         }
1971
1972         if (c >= a)
1973                 return c;
1974
1975         return b;
1976 }
1977
1978 static int event_arm_timer(
1979                 sd_event *e,
1980                 struct clock_data *d) {
1981
1982         struct itimerspec its = {};
1983         sd_event_source *a, *b;
1984         usec_t t;
1985         int r;
1986
1987         assert(e);
1988         assert(d);
1989
1990         if (!d->needs_rearm)
1991                 return 0;
1992         else
1993                 d->needs_rearm = false;
1994
1995         a = prioq_peek(d->earliest);
1996         if (!a || a->enabled == SD_EVENT_OFF) {
1997
1998                 if (d->fd < 0)
1999                         return 0;
2000
2001                 if (d->next == USEC_INFINITY)
2002                         return 0;
2003
2004                 /* disarm */
2005                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2006                 if (r < 0)
2007                         return r;
2008
2009                 d->next = USEC_INFINITY;
2010                 return 0;
2011         }
2012
2013         b = prioq_peek(d->latest);
2014         assert_se(b && b->enabled != SD_EVENT_OFF);
2015
2016         t = sleep_between(e, a->time.next, time_event_source_latest(b));
2017         if (d->next == t)
2018                 return 0;
2019
2020         assert_se(d->fd >= 0);
2021
2022         if (t == 0) {
2023                 /* We don' want to disarm here, just mean some time looooong ago. */
2024                 its.it_value.tv_sec = 0;
2025                 its.it_value.tv_nsec = 1;
2026         } else
2027                 timespec_store(&its.it_value, t);
2028
2029         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2030         if (r < 0)
2031                 return -errno;
2032
2033         d->next = t;
2034         return 0;
2035 }
2036
2037 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2038         assert(e);
2039         assert(s);
2040         assert(s->type == SOURCE_IO);
2041
2042         /* If the event source was already pending, we just OR in the
2043          * new revents, otherwise we reset the value. The ORing is
2044          * necessary to handle EPOLLONESHOT events properly where
2045          * readability might happen independently of writability, and
2046          * we need to keep track of both */
2047
2048         if (s->pending)
2049                 s->io.revents |= revents;
2050         else
2051                 s->io.revents = revents;
2052
2053         return source_set_pending(s, true);
2054 }
2055
2056 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2057         uint64_t x;
2058         ssize_t ss;
2059
2060         assert(e);
2061         assert(fd >= 0);
2062
2063         assert_return(events == EPOLLIN, -EIO);
2064
2065         ss = read(fd, &x, sizeof(x));
2066         if (ss < 0) {
2067                 if (errno == EAGAIN || errno == EINTR)
2068                         return 0;
2069
2070                 return -errno;
2071         }
2072
2073         if (_unlikely_(ss != sizeof(x)))
2074                 return -EIO;
2075
2076         if (next)
2077                 *next = USEC_INFINITY;
2078
2079         return 0;
2080 }
2081
2082 static int process_timer(
2083                 sd_event *e,
2084                 usec_t n,
2085                 struct clock_data *d) {
2086
2087         sd_event_source *s;
2088         int r;
2089
2090         assert(e);
2091         assert(d);
2092
2093         for (;;) {
2094                 s = prioq_peek(d->earliest);
2095                 if (!s ||
2096                     s->time.next > n ||
2097                     s->enabled == SD_EVENT_OFF ||
2098                     s->pending)
2099                         break;
2100
2101                 r = source_set_pending(s, true);
2102                 if (r < 0)
2103                         return r;
2104
2105                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2106                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2107                 d->needs_rearm = true;
2108         }
2109
2110         return 0;
2111 }
2112
2113 static int process_child(sd_event *e) {
2114         sd_event_source *s;
2115         Iterator i;
2116         int r;
2117
2118         assert(e);
2119
2120         e->need_process_child = false;
2121
2122         /*
2123            So, this is ugly. We iteratively invoke waitid() with P_PID
2124            + WNOHANG for each PID we wait for, instead of using
2125            P_ALL. This is because we only want to get child
2126            information of very specific child processes, and not all
2127            of them. We might not have processed the SIGCHLD even of a
2128            previous invocation and we don't want to maintain a
2129            unbounded *per-child* event queue, hence we really don't
2130            want anything flushed out of the kernel's queue that we
2131            don't care about. Since this is O(n) this means that if you
2132            have a lot of processes you probably want to handle SIGCHLD
2133            yourself.
2134
2135            We do not reap the children here (by using WNOWAIT), this
2136            is only done after the event source is dispatched so that
2137            the callback still sees the process as a zombie.
2138         */
2139
2140         HASHMAP_FOREACH(s, e->child_sources, i) {
2141                 assert(s->type == SOURCE_CHILD);
2142
2143                 if (s->pending)
2144                         continue;
2145
2146                 if (s->enabled == SD_EVENT_OFF)
2147                         continue;
2148
2149                 zero(s->child.siginfo);
2150                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2151                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2152                 if (r < 0)
2153                         return -errno;
2154
2155                 if (s->child.siginfo.si_pid != 0) {
2156                         bool zombie =
2157                                 s->child.siginfo.si_code == CLD_EXITED ||
2158                                 s->child.siginfo.si_code == CLD_KILLED ||
2159                                 s->child.siginfo.si_code == CLD_DUMPED;
2160
2161                         if (!zombie && (s->child.options & WEXITED)) {
2162                                 /* If the child isn't dead then let's
2163                                  * immediately remove the state change
2164                                  * from the queue, since there's no
2165                                  * benefit in leaving it queued */
2166
2167                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
2168                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2169                         }
2170
2171                         r = source_set_pending(s, true);
2172                         if (r < 0)
2173                                 return r;
2174                 }
2175         }
2176
2177         return 0;
2178 }
2179
2180 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2181         bool read_one = false;
2182         int r;
2183
2184         assert(e);
2185         assert_return(events == EPOLLIN, -EIO);
2186
2187         /* If there's a signal queued on this priority and SIGCHLD is
2188            on this priority too, then make sure to recheck the
2189            children we watch. This is because we only ever dequeue
2190            the first signal per priority, and if we dequeue one, and
2191            SIGCHLD might be enqueued later we wouldn't know, but we
2192            might have higher priority children we care about hence we
2193            need to check that explicitly. */
2194
2195         if (sigismember(&d->sigset, SIGCHLD))
2196                 e->need_process_child = true;
2197
2198         /* If there's already an event source pending for this
2199          * priority we don't read another */
2200         if (d->current)
2201                 return 0;
2202
2203         for (;;) {
2204                 struct signalfd_siginfo si;
2205                 ssize_t n;
2206                 sd_event_source *s = NULL;
2207
2208                 n = read(d->fd, &si, sizeof(si));
2209                 if (n < 0) {
2210                         if (errno == EAGAIN || errno == EINTR)
2211                                 return read_one;
2212
2213                         return -errno;
2214                 }
2215
2216                 if (_unlikely_(n != sizeof(si)))
2217                         return -EIO;
2218
2219                 assert(si.ssi_signo < _NSIG);
2220
2221                 read_one = true;
2222
2223                 if (e->signal_sources)
2224                         s = e->signal_sources[si.ssi_signo];
2225                 if (!s)
2226                         continue;
2227                 if (s->pending)
2228                         continue;
2229
2230                 s->signal.siginfo = si;
2231                 d->current = s;
2232
2233                 r = source_set_pending(s, true);
2234                 if (r < 0)
2235                         return r;
2236
2237                 return 1;
2238         }
2239 }
2240
2241 static int source_dispatch(sd_event_source *s) {
2242         int r = 0;
2243
2244         assert(s);
2245         assert(s->pending || s->type == SOURCE_EXIT);
2246
2247         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2248                 r = source_set_pending(s, false);
2249                 if (r < 0)
2250                         return r;
2251         }
2252
2253         if (s->type != SOURCE_POST) {
2254                 sd_event_source *z;
2255                 Iterator i;
2256
2257                 /* If we execute a non-post source, let's mark all
2258                  * post sources as pending */
2259
2260                 SET_FOREACH(z, s->event->post_sources, i) {
2261                         if (z->enabled == SD_EVENT_OFF)
2262                                 continue;
2263
2264                         r = source_set_pending(z, true);
2265                         if (r < 0)
2266                                 return r;
2267                 }
2268         }
2269
2270         if (s->enabled == SD_EVENT_ONESHOT) {
2271                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2272                 if (r < 0)
2273                         return r;
2274         }
2275
2276         s->dispatching = true;
2277
2278         switch (s->type) {
2279
2280         case SOURCE_IO:
2281                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2282                 break;
2283
2284         case SOURCE_TIME_REALTIME:
2285         case SOURCE_TIME_BOOTTIME:
2286         case SOURCE_TIME_MONOTONIC:
2287         case SOURCE_TIME_REALTIME_ALARM:
2288         case SOURCE_TIME_BOOTTIME_ALARM:
2289                 r = s->time.callback(s, s->time.next, s->userdata);
2290                 break;
2291
2292         case SOURCE_SIGNAL:
2293                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2294                 break;
2295
2296         case SOURCE_CHILD: {
2297                 bool zombie;
2298
2299                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2300                          s->child.siginfo.si_code == CLD_KILLED ||
2301                          s->child.siginfo.si_code == CLD_DUMPED;
2302
2303                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2304
2305                 /* Now, reap the PID for good. */
2306                 if (zombie)
2307                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2308
2309                 break;
2310         }
2311
2312         case SOURCE_DEFER:
2313                 r = s->defer.callback(s, s->userdata);
2314                 break;
2315
2316         case SOURCE_POST:
2317                 r = s->post.callback(s, s->userdata);
2318                 break;
2319
2320         case SOURCE_EXIT:
2321                 r = s->exit.callback(s, s->userdata);
2322                 break;
2323
2324         case SOURCE_WATCHDOG:
2325         case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2326         case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2327                 assert_not_reached("Wut? I shouldn't exist.");
2328         }
2329
2330         s->dispatching = false;
2331
2332         if (r < 0)
2333                 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2334                                 strna(s->description), event_source_type_to_string(s->type));
2335
2336         if (s->n_ref == 0)
2337                 source_free(s);
2338         else if (r < 0)
2339                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2340
2341         return 1;
2342 }
2343
2344 static int event_prepare(sd_event *e) {
2345         int r;
2346
2347         assert(e);
2348
2349         for (;;) {
2350                 sd_event_source *s;
2351
2352                 s = prioq_peek(e->prepare);
2353                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2354                         break;
2355
2356                 s->prepare_iteration = e->iteration;
2357                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2358                 if (r < 0)
2359                         return r;
2360
2361                 assert(s->prepare);
2362
2363                 s->dispatching = true;
2364                 r = s->prepare(s, s->userdata);
2365                 s->dispatching = false;
2366
2367                 if (r < 0)
2368                         log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2369                                         strna(s->description), event_source_type_to_string(s->type));
2370
2371                 if (s->n_ref == 0)
2372                         source_free(s);
2373                 else if (r < 0)
2374                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2375         }
2376
2377         return 0;
2378 }
2379
2380 static int dispatch_exit(sd_event *e) {
2381         sd_event_source *p;
2382         int r;
2383
2384         assert(e);
2385
2386         p = prioq_peek(e->exit);
2387         if (!p || p->enabled == SD_EVENT_OFF) {
2388                 e->state = SD_EVENT_FINISHED;
2389                 return 0;
2390         }
2391
2392         sd_event_ref(e);
2393         e->iteration++;
2394         e->state = SD_EVENT_EXITING;
2395
2396         r = source_dispatch(p);
2397
2398         e->state = SD_EVENT_INITIAL;
2399         sd_event_unref(e);
2400
2401         return r;
2402 }
2403
2404 static sd_event_source* event_next_pending(sd_event *e) {
2405         sd_event_source *p;
2406
2407         assert(e);
2408
2409         p = prioq_peek(e->pending);
2410         if (!p)
2411                 return NULL;
2412
2413         if (p->enabled == SD_EVENT_OFF)
2414                 return NULL;
2415
2416         return p;
2417 }
2418
2419 static int arm_watchdog(sd_event *e) {
2420         struct itimerspec its = {};
2421         usec_t t;
2422         int r;
2423
2424         assert(e);
2425         assert(e->watchdog_fd >= 0);
2426
2427         t = sleep_between(e,
2428                           e->watchdog_last + (e->watchdog_period / 2),
2429                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2430
2431         timespec_store(&its.it_value, t);
2432
2433         /* Make sure we never set the watchdog to 0, which tells the
2434          * kernel to disable it. */
2435         if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2436                 its.it_value.tv_nsec = 1;
2437
2438         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2439         if (r < 0)
2440                 return -errno;
2441
2442         return 0;
2443 }
2444
2445 static int process_watchdog(sd_event *e) {
2446         assert(e);
2447
2448         if (!e->watchdog)
2449                 return 0;
2450
2451         /* Don't notify watchdog too often */
2452         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2453                 return 0;
2454
2455         sd_notify(false, "WATCHDOG=1");
2456         e->watchdog_last = e->timestamp.monotonic;
2457
2458         return arm_watchdog(e);
2459 }
2460
2461 _public_ int sd_event_prepare(sd_event *e) {
2462         int r;
2463
2464         assert_return(e, -EINVAL);
2465         assert_return(!event_pid_changed(e), -ECHILD);
2466         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2467         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2468
2469         if (e->exit_requested)
2470                 goto pending;
2471
2472         e->iteration++;
2473
2474         e->state = SD_EVENT_PREPARING;
2475         r = event_prepare(e);
2476         e->state = SD_EVENT_INITIAL;
2477         if (r < 0)
2478                 return r;
2479
2480         r = event_arm_timer(e, &e->realtime);
2481         if (r < 0)
2482                 return r;
2483
2484         r = event_arm_timer(e, &e->boottime);
2485         if (r < 0)
2486                 return r;
2487
2488         r = event_arm_timer(e, &e->monotonic);
2489         if (r < 0)
2490                 return r;
2491
2492         r = event_arm_timer(e, &e->realtime_alarm);
2493         if (r < 0)
2494                 return r;
2495
2496         r = event_arm_timer(e, &e->boottime_alarm);
2497         if (r < 0)
2498                 return r;
2499
2500         if (event_next_pending(e) || e->need_process_child)
2501                 goto pending;
2502
2503         e->state = SD_EVENT_ARMED;
2504
2505         return 0;
2506
2507 pending:
2508         e->state = SD_EVENT_ARMED;
2509         r = sd_event_wait(e, 0);
2510         if (r == 0)
2511                 e->state = SD_EVENT_ARMED;
2512
2513         return r;
2514 }
2515
2516 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2517         struct epoll_event *ev_queue;
2518         unsigned ev_queue_max;
2519         int r, m, i;
2520
2521         assert_return(e, -EINVAL);
2522         assert_return(!event_pid_changed(e), -ECHILD);
2523         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2524         assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2525
2526         if (e->exit_requested) {
2527                 e->state = SD_EVENT_PENDING;
2528                 return 1;
2529         }
2530
2531         ev_queue_max = MAX(e->n_sources, 1u);
2532         ev_queue = newa(struct epoll_event, ev_queue_max);
2533
2534         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2535                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2536         if (m < 0) {
2537                 if (errno == EINTR) {
2538                         e->state = SD_EVENT_PENDING;
2539                         return 1;
2540                 }
2541
2542                 r = -errno;
2543                 goto finish;
2544         }
2545
2546         dual_timestamp_get(&e->timestamp);
2547         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2548
2549         for (i = 0; i < m; i++) {
2550
2551                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2552                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2553                 else {
2554                         WakeupType *t = ev_queue[i].data.ptr;
2555
2556                         switch (*t) {
2557
2558                         case WAKEUP_EVENT_SOURCE:
2559                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2560                                 break;
2561
2562                         case WAKEUP_CLOCK_DATA: {
2563                                 struct clock_data *d = ev_queue[i].data.ptr;
2564                                 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2565                                 break;
2566                         }
2567
2568                         case WAKEUP_SIGNAL_DATA:
2569                                 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2570                                 break;
2571
2572                         default:
2573                                 assert_not_reached("Invalid wake-up pointer");
2574                         }
2575                 }
2576                 if (r < 0)
2577                         goto finish;
2578         }
2579
2580         r = process_watchdog(e);
2581         if (r < 0)
2582                 goto finish;
2583
2584         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2585         if (r < 0)
2586                 goto finish;
2587
2588         r = process_timer(e, e->timestamp_boottime, &e->boottime);
2589         if (r < 0)
2590                 goto finish;
2591
2592         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2593         if (r < 0)
2594                 goto finish;
2595
2596         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2597         if (r < 0)
2598                 goto finish;
2599
2600         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2601         if (r < 0)
2602                 goto finish;
2603
2604         if (e->need_process_child) {
2605                 r = process_child(e);
2606                 if (r < 0)
2607                         goto finish;
2608         }
2609
2610         if (event_next_pending(e)) {
2611                 e->state = SD_EVENT_PENDING;
2612
2613                 return 1;
2614         }
2615
2616         r = 0;
2617
2618 finish:
2619         e->state = SD_EVENT_INITIAL;
2620
2621         return r;
2622 }
2623
2624 _public_ int sd_event_dispatch(sd_event *e) {
2625         sd_event_source *p;
2626         int r;
2627
2628         assert_return(e, -EINVAL);
2629         assert_return(!event_pid_changed(e), -ECHILD);
2630         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2631         assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2632
2633         if (e->exit_requested)
2634                 return dispatch_exit(e);
2635
2636         p = event_next_pending(e);
2637         if (p) {
2638                 sd_event_ref(e);
2639
2640                 e->state = SD_EVENT_RUNNING;
2641                 r = source_dispatch(p);
2642                 e->state = SD_EVENT_INITIAL;
2643
2644                 sd_event_unref(e);
2645
2646                 return r;
2647         }
2648
2649         e->state = SD_EVENT_INITIAL;
2650
2651         return 1;
2652 }
2653
2654 static void event_log_delays(sd_event *e) {
2655         char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2656         unsigned i;
2657         int o;
2658
2659         for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2660                 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2661                 e->delays[i] = 0;
2662         }
2663         log_debug("Event loop iterations: %.*s", o, b);
2664 }
2665
2666 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2667         int r;
2668
2669         assert_return(e, -EINVAL);
2670         assert_return(!event_pid_changed(e), -ECHILD);
2671         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2672         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2673
2674         if (e->profile_delays && e->last_run) {
2675                 usec_t this_run;
2676                 unsigned l;
2677
2678                 this_run = now(CLOCK_MONOTONIC);
2679
2680                 l = u64log2(this_run - e->last_run);
2681                 assert(l < sizeof(e->delays));
2682                 e->delays[l]++;
2683
2684                 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2685                         event_log_delays(e);
2686                         e->last_log = this_run;
2687                 }
2688         }
2689
2690         r = sd_event_prepare(e);
2691         if (r == 0)
2692                 /* There was nothing? Then wait... */
2693                 r = sd_event_wait(e, timeout);
2694
2695         if (e->profile_delays)
2696                 e->last_run = now(CLOCK_MONOTONIC);
2697
2698         if (r > 0) {
2699                 /* There's something now, then let's dispatch it */
2700                 r = sd_event_dispatch(e);
2701                 if (r < 0)
2702                         return r;
2703
2704                 return 1;
2705         }
2706
2707         return r;
2708 }
2709
2710 #if 0 /// UNNEEDED by elogind
2711 _public_ int sd_event_loop(sd_event *e) {
2712         int r;
2713
2714         assert_return(e, -EINVAL);
2715         assert_return(!event_pid_changed(e), -ECHILD);
2716         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2717
2718         sd_event_ref(e);
2719
2720         while (e->state != SD_EVENT_FINISHED) {
2721                 r = sd_event_run(e, (uint64_t) -1);
2722                 if (r < 0)
2723                         goto finish;
2724         }
2725
2726         r = e->exit_code;
2727
2728 finish:
2729         sd_event_unref(e);
2730         return r;
2731 }
2732
2733 _public_ int sd_event_get_fd(sd_event *e) {
2734
2735         assert_return(e, -EINVAL);
2736         assert_return(!event_pid_changed(e), -ECHILD);
2737
2738         return e->epoll_fd;
2739 }
2740 #endif // 0
2741
2742 _public_ int sd_event_get_state(sd_event *e) {
2743         assert_return(e, -EINVAL);
2744         assert_return(!event_pid_changed(e), -ECHILD);
2745
2746         return e->state;
2747 }
2748
2749 #if 0 /// UNNEEDED by elogind
2750 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2751         assert_return(e, -EINVAL);
2752         assert_return(code, -EINVAL);
2753         assert_return(!event_pid_changed(e), -ECHILD);
2754
2755         if (!e->exit_requested)
2756                 return -ENODATA;
2757
2758         *code = e->exit_code;
2759         return 0;
2760 }
2761 #endif // 0
2762
2763 _public_ int sd_event_exit(sd_event *e, int code) {
2764         assert_return(e, -EINVAL);
2765         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2766         assert_return(!event_pid_changed(e), -ECHILD);
2767
2768         e->exit_requested = true;
2769         e->exit_code = code;
2770
2771         return 0;
2772 }
2773
2774 #if 0 /// UNNEEDED by elogind
2775 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2776         assert_return(e, -EINVAL);
2777         assert_return(usec, -EINVAL);
2778         assert_return(!event_pid_changed(e), -ECHILD);
2779         assert_return(IN_SET(clock,
2780                              CLOCK_REALTIME,
2781                              CLOCK_REALTIME_ALARM,
2782                              CLOCK_MONOTONIC,
2783                              CLOCK_BOOTTIME,
2784                              CLOCK_BOOTTIME_ALARM), -EOPNOTSUPP);
2785
2786         if (!dual_timestamp_is_set(&e->timestamp)) {
2787                 /* Implicitly fall back to now() if we never ran
2788                  * before and thus have no cached time. */
2789                 *usec = now(clock);
2790                 return 1;
2791         }
2792
2793         switch (clock) {
2794
2795         case CLOCK_REALTIME:
2796         case CLOCK_REALTIME_ALARM:
2797                 *usec = e->timestamp.realtime;
2798                 break;
2799
2800         case CLOCK_MONOTONIC:
2801                 *usec = e->timestamp.monotonic;
2802                 break;
2803
2804         default:
2805                 *usec = e->timestamp_boottime;
2806                 break;
2807         }
2808
2809         return 0;
2810 }
2811 #endif // 0
2812
2813 _public_ int sd_event_default(sd_event **ret) {
2814
2815         static thread_local sd_event *default_event = NULL;
2816         sd_event *e = NULL;
2817         int r;
2818
2819         if (!ret)
2820                 return !!default_event;
2821
2822         if (default_event) {
2823                 *ret = sd_event_ref(default_event);
2824                 return 0;
2825         }
2826
2827         r = sd_event_new(&e);
2828         if (r < 0)
2829                 return r;
2830
2831         e->default_event_ptr = &default_event;
2832         e->tid = gettid();
2833         default_event = e;
2834
2835         *ret = e;
2836         return 1;
2837 }
2838
2839 #if 0 /// UNNEEDED by elogind
2840 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2841         assert_return(e, -EINVAL);
2842         assert_return(tid, -EINVAL);
2843         assert_return(!event_pid_changed(e), -ECHILD);
2844
2845         if (e->tid != 0) {
2846                 *tid = e->tid;
2847                 return 0;
2848         }
2849
2850         return -ENXIO;
2851 }
2852 #endif // 0
2853
2854 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2855         int r;
2856
2857         assert_return(e, -EINVAL);
2858         assert_return(!event_pid_changed(e), -ECHILD);
2859
2860         if (e->watchdog == !!b)
2861                 return e->watchdog;
2862
2863         if (b) {
2864                 struct epoll_event ev = {};
2865
2866                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2867                 if (r <= 0)
2868                         return r;
2869
2870                 /* Issue first ping immediately */
2871                 sd_notify(false, "WATCHDOG=1");
2872                 e->watchdog_last = now(CLOCK_MONOTONIC);
2873
2874                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2875                 if (e->watchdog_fd < 0)
2876                         return -errno;
2877
2878                 r = arm_watchdog(e);
2879                 if (r < 0)
2880                         goto fail;
2881
2882                 ev.events = EPOLLIN;
2883                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2884
2885                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2886                 if (r < 0) {
2887                         r = -errno;
2888                         goto fail;
2889                 }
2890
2891         } else {
2892                 if (e->watchdog_fd >= 0) {
2893                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2894                         e->watchdog_fd = safe_close(e->watchdog_fd);
2895                 }
2896         }
2897
2898         e->watchdog = !!b;
2899         return e->watchdog;
2900
2901 fail:
2902         e->watchdog_fd = safe_close(e->watchdog_fd);
2903         return r;
2904 }
2905
2906 #if 0 /// UNNEEDED by elogind
2907 _public_ int sd_event_get_watchdog(sd_event *e) {
2908         assert_return(e, -EINVAL);
2909         assert_return(!event_pid_changed(e), -ECHILD);
2910
2911         return e->watchdog;
2912 }
2913 #endif // 0