chiark / gitweb /
networkd: fix a couple of memory leaks
[elogind.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2013 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25 #include <pthread.h>
26
27 #include "sd-id128.h"
28 #include "sd-daemon.h"
29 #include "macro.h"
30 #include "prioq.h"
31 #include "hashmap.h"
32 #include "util.h"
33 #include "time-util.h"
34 #include "missing.h"
35 #include "set.h"
36
37 #include "sd-event.h"
38
39 #define EPOLL_QUEUE_MAX 512U
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43         SOURCE_IO,
44         SOURCE_TIME_REALTIME,
45         SOURCE_TIME_MONOTONIC,
46         SOURCE_TIME_REALTIME_ALARM,
47         SOURCE_TIME_BOOTTIME_ALARM,
48         SOURCE_SIGNAL,
49         SOURCE_CHILD,
50         SOURCE_DEFER,
51         SOURCE_POST,
52         SOURCE_EXIT,
53         SOURCE_WATCHDOG,
54         _SOUFCE_EVENT_SOURCE_TYPE_MAX,
55         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
56 } EventSourceType;
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event_source {
61         unsigned n_ref;
62
63         sd_event *event;
64         void *userdata;
65         sd_event_handler_t prepare;
66
67         EventSourceType type:5;
68         int enabled:3;
69         bool pending:1;
70         bool dispatching:1;
71
72         int64_t priority;
73         unsigned pending_index;
74         unsigned prepare_index;
75         unsigned pending_iteration;
76         unsigned prepare_iteration;
77
78         union {
79                 struct {
80                         sd_event_io_handler_t callback;
81                         int fd;
82                         uint32_t events;
83                         uint32_t revents;
84                         bool registered:1;
85                 } io;
86                 struct {
87                         sd_event_time_handler_t callback;
88                         usec_t next, accuracy;
89                         unsigned earliest_index;
90                         unsigned latest_index;
91                 } time;
92                 struct {
93                         sd_event_signal_handler_t callback;
94                         struct signalfd_siginfo siginfo;
95                         int sig;
96                 } signal;
97                 struct {
98                         sd_event_child_handler_t callback;
99                         siginfo_t siginfo;
100                         pid_t pid;
101                         int options;
102                 } child;
103                 struct {
104                         sd_event_handler_t callback;
105                 } defer;
106                 struct {
107                         sd_event_handler_t callback;
108                 } post;
109                 struct {
110                         sd_event_handler_t callback;
111                         unsigned prioq_index;
112                 } exit;
113         };
114 };
115
116 struct clock_data {
117         int fd;
118
119         /* For all clocks we maintain two priority queues each, one
120          * ordered for the earliest times the events may be
121          * dispatched, and one ordered by the latest times they must
122          * have been dispatched. The range between the top entries in
123          * the two prioqs is the time window we can freely schedule
124          * wakeups in */
125
126         Prioq *earliest;
127         Prioq *latest;
128         usec_t next;
129 };
130
131 struct sd_event {
132         unsigned n_ref;
133
134         int epoll_fd;
135         int signal_fd;
136         int watchdog_fd;
137
138         Prioq *pending;
139         Prioq *prepare;
140
141         /* timerfd_create() only supports these four clocks so far. We
142          * can add support for more clocks when the kernel learns to
143          * deal with them, too. */
144         struct clock_data realtime;
145         struct clock_data monotonic;
146         struct clock_data realtime_alarm;
147         struct clock_data boottime_alarm;
148
149         usec_t perturb;
150
151         sigset_t sigset;
152         sd_event_source **signal_sources;
153
154         Hashmap *child_sources;
155         unsigned n_enabled_child_sources;
156
157         Set *post_sources;
158
159         Prioq *exit;
160
161         pid_t original_pid;
162
163         unsigned iteration;
164         dual_timestamp timestamp;
165         usec_t timestamp_boottime;
166         int state;
167
168         bool exit_requested:1;
169         bool need_process_child:1;
170         bool watchdog:1;
171
172         int exit_code;
173
174         pid_t tid;
175         sd_event **default_event_ptr;
176
177         usec_t watchdog_last, watchdog_period;
178
179         unsigned n_sources;
180 };
181
182 static int pending_prioq_compare(const void *a, const void *b) {
183         const sd_event_source *x = a, *y = b;
184
185         assert(x->pending);
186         assert(y->pending);
187
188         /* Enabled ones first */
189         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190                 return -1;
191         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192                 return 1;
193
194         /* Lower priority values first */
195         if (x->priority < y->priority)
196                 return -1;
197         if (x->priority > y->priority)
198                 return 1;
199
200         /* Older entries first */
201         if (x->pending_iteration < y->pending_iteration)
202                 return -1;
203         if (x->pending_iteration > y->pending_iteration)
204                 return 1;
205
206         /* Stability for the rest */
207         if (x < y)
208                 return -1;
209         if (x > y)
210                 return 1;
211
212         return 0;
213 }
214
215 static int prepare_prioq_compare(const void *a, const void *b) {
216         const sd_event_source *x = a, *y = b;
217
218         assert(x->prepare);
219         assert(y->prepare);
220
221         /* Move most recently prepared ones last, so that we can stop
222          * preparing as soon as we hit one that has already been
223          * prepared in the current iteration */
224         if (x->prepare_iteration < y->prepare_iteration)
225                 return -1;
226         if (x->prepare_iteration > y->prepare_iteration)
227                 return 1;
228
229         /* Enabled ones first */
230         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
231                 return -1;
232         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233                 return 1;
234
235         /* Lower priority values first */
236         if (x->priority < y->priority)
237                 return -1;
238         if (x->priority > y->priority)
239                 return 1;
240
241         /* Stability for the rest */
242         if (x < y)
243                 return -1;
244         if (x > y)
245                 return 1;
246
247         return 0;
248 }
249
250 static int earliest_time_prioq_compare(const void *a, const void *b) {
251         const sd_event_source *x = a, *y = b;
252
253         assert(EVENT_SOURCE_IS_TIME(x->type));
254         assert(x->type == y->type);
255
256         /* Enabled ones first */
257         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
258                 return -1;
259         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260                 return 1;
261
262         /* Move the pending ones to the end */
263         if (!x->pending && y->pending)
264                 return -1;
265         if (x->pending && !y->pending)
266                 return 1;
267
268         /* Order by time */
269         if (x->time.next < y->time.next)
270                 return -1;
271         if (x->time.next > y->time.next)
272                 return 1;
273
274         /* Stability for the rest */
275         if (x < y)
276                 return -1;
277         if (x > y)
278                 return 1;
279
280         return 0;
281 }
282
283 static int latest_time_prioq_compare(const void *a, const void *b) {
284         const sd_event_source *x = a, *y = b;
285
286         assert(EVENT_SOURCE_IS_TIME(x->type));
287         assert(x->type == y->type);
288
289         /* Enabled ones first */
290         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
291                 return -1;
292         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293                 return 1;
294
295         /* Move the pending ones to the end */
296         if (!x->pending && y->pending)
297                 return -1;
298         if (x->pending && !y->pending)
299                 return 1;
300
301         /* Order by time */
302         if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
303                 return -1;
304         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
305                 return 1;
306
307         /* Stability for the rest */
308         if (x < y)
309                 return -1;
310         if (x > y)
311                 return 1;
312
313         return 0;
314 }
315
316 static int exit_prioq_compare(const void *a, const void *b) {
317         const sd_event_source *x = a, *y = b;
318
319         assert(x->type == SOURCE_EXIT);
320         assert(y->type == SOURCE_EXIT);
321
322         /* Enabled ones first */
323         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
324                 return -1;
325         if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
326                 return 1;
327
328         /* Lower priority values first */
329         if (x->priority < y->priority)
330                 return -1;
331         if (x->priority > y->priority)
332                 return 1;
333
334         /* Stability for the rest */
335         if (x < y)
336                 return -1;
337         if (x > y)
338                 return 1;
339
340         return 0;
341 }
342
343 static void free_clock_data(struct clock_data *d) {
344         assert(d);
345
346         safe_close(d->fd);
347         prioq_free(d->earliest);
348         prioq_free(d->latest);
349 }
350
351 static void event_free(sd_event *e) {
352         assert(e);
353         assert(e->n_sources == 0);
354
355         if (e->default_event_ptr)
356                 *(e->default_event_ptr) = NULL;
357
358         safe_close(e->epoll_fd);
359         safe_close(e->signal_fd);
360         safe_close(e->watchdog_fd);
361
362         free_clock_data(&e->realtime);
363         free_clock_data(&e->monotonic);
364         free_clock_data(&e->realtime_alarm);
365         free_clock_data(&e->boottime_alarm);
366
367         prioq_free(e->pending);
368         prioq_free(e->prepare);
369         prioq_free(e->exit);
370
371         free(e->signal_sources);
372
373         hashmap_free(e->child_sources);
374         set_free(e->post_sources);
375         free(e);
376 }
377
378 _public_ int sd_event_new(sd_event** ret) {
379         sd_event *e;
380         int r;
381
382         assert_return(ret, -EINVAL);
383
384         e = new0(sd_event, 1);
385         if (!e)
386                 return -ENOMEM;
387
388         e->n_ref = 1;
389         e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
390         e->realtime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = (usec_t) -1;
391         e->original_pid = getpid();
392         e->perturb = (usec_t) -1;
393
394         assert_se(sigemptyset(&e->sigset) == 0);
395
396         e->pending = prioq_new(pending_prioq_compare);
397         if (!e->pending) {
398                 r = -ENOMEM;
399                 goto fail;
400         }
401
402         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
403         if (e->epoll_fd < 0) {
404                 r = -errno;
405                 goto fail;
406         }
407
408         *ret = e;
409         return 0;
410
411 fail:
412         event_free(e);
413         return r;
414 }
415
416 _public_ sd_event* sd_event_ref(sd_event *e) {
417         assert_return(e, NULL);
418
419         assert(e->n_ref >= 1);
420         e->n_ref++;
421
422         return e;
423 }
424
425 _public_ sd_event* sd_event_unref(sd_event *e) {
426
427         if (!e)
428                 return NULL;
429
430         assert(e->n_ref >= 1);
431         e->n_ref--;
432
433         if (e->n_ref <= 0)
434                 event_free(e);
435
436         return NULL;
437 }
438
439 static bool event_pid_changed(sd_event *e) {
440         assert(e);
441
442         /* We don't support people creating am event loop and keeping
443          * it around over a fork(). Let's complain. */
444
445         return e->original_pid != getpid();
446 }
447
448 static int source_io_unregister(sd_event_source *s) {
449         int r;
450
451         assert(s);
452         assert(s->type == SOURCE_IO);
453
454         if (!s->io.registered)
455                 return 0;
456
457         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
458         if (r < 0)
459                 return -errno;
460
461         s->io.registered = false;
462         return 0;
463 }
464
465 static int source_io_register(
466                 sd_event_source *s,
467                 int enabled,
468                 uint32_t events) {
469
470         struct epoll_event ev = {};
471         int r;
472
473         assert(s);
474         assert(s->type == SOURCE_IO);
475         assert(enabled != SD_EVENT_OFF);
476
477         ev.events = events;
478         ev.data.ptr = s;
479
480         if (enabled == SD_EVENT_ONESHOT)
481                 ev.events |= EPOLLONESHOT;
482
483         if (s->io.registered)
484                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
485         else
486                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
487
488         if (r < 0)
489                 return -errno;
490
491         s->io.registered = true;
492
493         return 0;
494 }
495
496 static clockid_t event_source_type_to_clock(EventSourceType t) {
497
498         switch (t) {
499
500         case SOURCE_TIME_REALTIME:
501                 return CLOCK_REALTIME;
502
503         case SOURCE_TIME_MONOTONIC:
504                 return CLOCK_MONOTONIC;
505
506         case SOURCE_TIME_REALTIME_ALARM:
507                 return CLOCK_REALTIME_ALARM;
508
509         case SOURCE_TIME_BOOTTIME_ALARM:
510                 return CLOCK_BOOTTIME_ALARM;
511
512         default:
513                 return (clockid_t) -1;
514         }
515 }
516
517 static EventSourceType clock_to_event_source_type(clockid_t clock) {
518
519         switch (clock) {
520
521         case CLOCK_REALTIME:
522                 return SOURCE_TIME_REALTIME;
523
524         case CLOCK_MONOTONIC:
525                 return SOURCE_TIME_MONOTONIC;
526
527         case CLOCK_REALTIME_ALARM:
528                 return SOURCE_TIME_REALTIME_ALARM;
529
530         case CLOCK_BOOTTIME_ALARM:
531                 return SOURCE_TIME_BOOTTIME_ALARM;
532
533         default:
534                 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
535         }
536 }
537
538 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
539         assert(e);
540
541         switch (t) {
542
543         case SOURCE_TIME_REALTIME:
544                 return &e->realtime;
545
546         case SOURCE_TIME_MONOTONIC:
547                 return &e->monotonic;
548
549         case SOURCE_TIME_REALTIME_ALARM:
550                 return &e->realtime_alarm;
551
552         case SOURCE_TIME_BOOTTIME_ALARM:
553                 return &e->boottime_alarm;
554
555         default:
556                 return NULL;
557         }
558 }
559
560 static void source_free(sd_event_source *s) {
561         assert(s);
562
563         if (s->event) {
564                 assert(s->event->n_sources > 0);
565
566                 switch (s->type) {
567
568                 case SOURCE_IO:
569                         if (s->io.fd >= 0)
570                                 source_io_unregister(s);
571
572                         break;
573
574                 case SOURCE_TIME_REALTIME:
575                 case SOURCE_TIME_MONOTONIC:
576                 case SOURCE_TIME_REALTIME_ALARM:
577                 case SOURCE_TIME_BOOTTIME_ALARM: {
578                         struct clock_data *d;
579
580                         d = event_get_clock_data(s->event, s->type);
581                         assert(d);
582
583                         prioq_remove(d->earliest, s, &s->time.earliest_index);
584                         prioq_remove(d->latest, s, &s->time.latest_index);
585                         break;
586                 }
587
588                 case SOURCE_SIGNAL:
589                         if (s->signal.sig > 0) {
590                                 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
591                                         assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
592
593                                 if (s->event->signal_sources)
594                                         s->event->signal_sources[s->signal.sig] = NULL;
595                         }
596
597                         break;
598
599                 case SOURCE_CHILD:
600                         if (s->child.pid > 0) {
601                                 if (s->enabled != SD_EVENT_OFF) {
602                                         assert(s->event->n_enabled_child_sources > 0);
603                                         s->event->n_enabled_child_sources--;
604                                 }
605
606                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
607                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
608
609                                 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
610                         }
611
612                         break;
613
614                 case SOURCE_DEFER:
615                         /* nothing */
616                         break;
617
618                 case SOURCE_POST:
619                         set_remove(s->event->post_sources, s);
620                         break;
621
622                 case SOURCE_EXIT:
623                         prioq_remove(s->event->exit, s, &s->exit.prioq_index);
624                         break;
625
626                 default:
627                         assert_not_reached("Wut? I shouldn't exist.");
628                 }
629
630                 if (s->pending)
631                         prioq_remove(s->event->pending, s, &s->pending_index);
632
633                 if (s->prepare)
634                         prioq_remove(s->event->prepare, s, &s->prepare_index);
635
636                 s->event->n_sources--;
637                 sd_event_unref(s->event);
638         }
639
640         free(s);
641 }
642
643 static int source_set_pending(sd_event_source *s, bool b) {
644         int r;
645
646         assert(s);
647         assert(s->type != SOURCE_EXIT);
648
649         if (s->pending == b)
650                 return 0;
651
652         s->pending = b;
653
654         if (b) {
655                 s->pending_iteration = s->event->iteration;
656
657                 r = prioq_put(s->event->pending, s, &s->pending_index);
658                 if (r < 0) {
659                         s->pending = false;
660                         return r;
661                 }
662         } else
663                 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
664
665         if (EVENT_SOURCE_IS_TIME(s->type)) {
666                 struct clock_data *d;
667
668                 d = event_get_clock_data(s->event, s->type);
669                 assert(d);
670
671                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
672                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
673         }
674
675         return 0;
676 }
677
678 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
679         sd_event_source *s;
680
681         assert(e);
682
683         s = new0(sd_event_source, 1);
684         if (!s)
685                 return NULL;
686
687         s->n_ref = 1;
688         s->event = sd_event_ref(e);
689         s->type = type;
690         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
691
692         e->n_sources ++;
693
694         return s;
695 }
696
697 _public_ int sd_event_add_io(
698                 sd_event *e,
699                 sd_event_source **ret,
700                 int fd,
701                 uint32_t events,
702                 sd_event_io_handler_t callback,
703                 void *userdata) {
704
705         sd_event_source *s;
706         int r;
707
708         assert_return(e, -EINVAL);
709         assert_return(fd >= 0, -EINVAL);
710         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
711         assert_return(callback, -EINVAL);
712         assert_return(ret, -EINVAL);
713         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
714         assert_return(!event_pid_changed(e), -ECHILD);
715
716         s = source_new(e, SOURCE_IO);
717         if (!s)
718                 return -ENOMEM;
719
720         s->io.fd = fd;
721         s->io.events = events;
722         s->io.callback = callback;
723         s->userdata = userdata;
724         s->enabled = SD_EVENT_ON;
725
726         r = source_io_register(s, s->enabled, events);
727         if (r < 0) {
728                 source_free(s);
729                 return -errno;
730         }
731
732         *ret = s;
733         return 0;
734 }
735
736 static int event_setup_timer_fd(
737                 sd_event *e,
738                 struct clock_data *d,
739                 clockid_t clock) {
740
741         sd_id128_t bootid = {};
742         struct epoll_event ev = {};
743         int r, fd;
744
745         assert(e);
746         assert(d);
747
748         if (_likely_(d->fd >= 0))
749                 return 0;
750
751         fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
752         if (fd < 0)
753                 return -errno;
754
755         ev.events = EPOLLIN;
756         ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
757
758         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
759         if (r < 0) {
760                 safe_close(fd);
761                 return -errno;
762         }
763
764         d->fd = fd;
765
766         /* When we sleep for longer, we try to realign the wakeup to
767            the same time wihtin each minute/second/250ms, so that
768            events all across the system can be coalesced into a single
769            CPU wakeup. However, let's take some system-specific
770            randomness for this value, so that in a network of systems
771            with synced clocks timer events are distributed a
772            bit. Here, we calculate a perturbation usec offset from the
773            boot ID. */
774
775         if (e->perturb == (usec_t) -1)
776                 if (sd_id128_get_boot(&bootid) >= 0)
777                         e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
778
779         return 0;
780 }
781
782 _public_ int sd_event_add_time(
783                 sd_event *e,
784                 sd_event_source **ret,
785                 clockid_t clock,
786                 uint64_t usec,
787                 uint64_t accuracy,
788                 sd_event_time_handler_t callback,
789                 void *userdata) {
790
791         EventSourceType type;
792         sd_event_source *s;
793         struct clock_data *d;
794         int r;
795
796         assert_return(e, -EINVAL);
797         assert_return(ret, -EINVAL);
798         assert_return(usec != (uint64_t) -1, -EINVAL);
799         assert_return(accuracy != (uint64_t) -1, -EINVAL);
800         assert_return(callback, -EINVAL);
801         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
802         assert_return(!event_pid_changed(e), -ECHILD);
803
804         type = clock_to_event_source_type(clock);
805         assert_return(type >= 0, -ENOTSUP);
806
807         d = event_get_clock_data(e, type);
808         assert(d);
809
810         if (!d->earliest) {
811                 d->earliest = prioq_new(earliest_time_prioq_compare);
812                 if (!d->earliest)
813                         return -ENOMEM;
814         }
815
816         if (!d->latest) {
817                 d->latest = prioq_new(latest_time_prioq_compare);
818                 if (!d->latest)
819                         return -ENOMEM;
820         }
821
822         if (d->fd < 0) {
823                 r = event_setup_timer_fd(e, d, clock);
824                 if (r < 0)
825                         return r;
826         }
827
828         s = source_new(e, type);
829         if (!s)
830                 return -ENOMEM;
831
832         s->time.next = usec;
833         s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
834         s->time.callback = callback;
835         s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
836         s->userdata = userdata;
837         s->enabled = SD_EVENT_ONESHOT;
838
839         r = prioq_put(d->earliest, s, &s->time.earliest_index);
840         if (r < 0)
841                 goto fail;
842
843         r = prioq_put(d->latest, s, &s->time.latest_index);
844         if (r < 0)
845                 goto fail;
846
847         *ret = s;
848         return 0;
849
850 fail:
851         source_free(s);
852         return r;
853 }
854
855 static int event_update_signal_fd(sd_event *e) {
856         struct epoll_event ev = {};
857         bool add_to_epoll;
858         int r;
859
860         assert(e);
861
862         add_to_epoll = e->signal_fd < 0;
863
864         r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
865         if (r < 0)
866                 return -errno;
867
868         e->signal_fd = r;
869
870         if (!add_to_epoll)
871                 return 0;
872
873         ev.events = EPOLLIN;
874         ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
875
876         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
877         if (r < 0) {
878                 e->signal_fd = safe_close(e->signal_fd);
879                 return -errno;
880         }
881
882         return 0;
883 }
884
885 _public_ int sd_event_add_signal(
886                 sd_event *e,
887                 sd_event_source **ret,
888                 int sig,
889                 sd_event_signal_handler_t callback,
890                 void *userdata) {
891
892         sd_event_source *s;
893         sigset_t ss;
894         int r;
895
896         assert_return(e, -EINVAL);
897         assert_return(sig > 0, -EINVAL);
898         assert_return(sig < _NSIG, -EINVAL);
899         assert_return(callback, -EINVAL);
900         assert_return(ret, -EINVAL);
901         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
902         assert_return(!event_pid_changed(e), -ECHILD);
903
904         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
905         if (r < 0)
906                 return -errno;
907
908         if (!sigismember(&ss, sig))
909                 return -EBUSY;
910
911         if (!e->signal_sources) {
912                 e->signal_sources = new0(sd_event_source*, _NSIG);
913                 if (!e->signal_sources)
914                         return -ENOMEM;
915         } else if (e->signal_sources[sig])
916                 return -EBUSY;
917
918         s = source_new(e, SOURCE_SIGNAL);
919         if (!s)
920                 return -ENOMEM;
921
922         s->signal.sig = sig;
923         s->signal.callback = callback;
924         s->userdata = userdata;
925         s->enabled = SD_EVENT_ON;
926
927         e->signal_sources[sig] = s;
928         assert_se(sigaddset(&e->sigset, sig) == 0);
929
930         if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
931                 r = event_update_signal_fd(e);
932                 if (r < 0) {
933                         source_free(s);
934                         return r;
935                 }
936         }
937
938         *ret = s;
939         return 0;
940 }
941
942 _public_ int sd_event_add_child(
943                 sd_event *e,
944                 sd_event_source **ret,
945                 pid_t pid,
946                 int options,
947                 sd_event_child_handler_t callback,
948                 void *userdata) {
949
950         sd_event_source *s;
951         int r;
952
953         assert_return(e, -EINVAL);
954         assert_return(pid > 1, -EINVAL);
955         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
956         assert_return(options != 0, -EINVAL);
957         assert_return(callback, -EINVAL);
958         assert_return(ret, -EINVAL);
959         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
960         assert_return(!event_pid_changed(e), -ECHILD);
961
962         r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
963         if (r < 0)
964                 return r;
965
966         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
967                 return -EBUSY;
968
969         s = source_new(e, SOURCE_CHILD);
970         if (!s)
971                 return -ENOMEM;
972
973         s->child.pid = pid;
974         s->child.options = options;
975         s->child.callback = callback;
976         s->userdata = userdata;
977         s->enabled = SD_EVENT_ONESHOT;
978
979         r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
980         if (r < 0) {
981                 source_free(s);
982                 return r;
983         }
984
985         e->n_enabled_child_sources ++;
986
987         assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
988
989         if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
990                 r = event_update_signal_fd(e);
991                 if (r < 0) {
992                         source_free(s);
993                         return -errno;
994                 }
995         }
996
997         e->need_process_child = true;
998
999         *ret = s;
1000         return 0;
1001 }
1002
1003 _public_ int sd_event_add_defer(
1004                 sd_event *e,
1005                 sd_event_source **ret,
1006                 sd_event_handler_t callback,
1007                 void *userdata) {
1008
1009         sd_event_source *s;
1010         int r;
1011
1012         assert_return(e, -EINVAL);
1013         assert_return(callback, -EINVAL);
1014         assert_return(ret, -EINVAL);
1015         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1016         assert_return(!event_pid_changed(e), -ECHILD);
1017
1018         s = source_new(e, SOURCE_DEFER);
1019         if (!s)
1020                 return -ENOMEM;
1021
1022         s->defer.callback = callback;
1023         s->userdata = userdata;
1024         s->enabled = SD_EVENT_ONESHOT;
1025
1026         r = source_set_pending(s, true);
1027         if (r < 0) {
1028                 source_free(s);
1029                 return r;
1030         }
1031
1032         *ret = s;
1033         return 0;
1034 }
1035
1036 _public_ int sd_event_add_post(
1037                 sd_event *e,
1038                 sd_event_source **ret,
1039                 sd_event_handler_t callback,
1040                 void *userdata) {
1041
1042         sd_event_source *s;
1043         int r;
1044
1045         assert_return(e, -EINVAL);
1046         assert_return(callback, -EINVAL);
1047         assert_return(ret, -EINVAL);
1048         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1049         assert_return(!event_pid_changed(e), -ECHILD);
1050
1051         r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
1052         if (r < 0)
1053                 return r;
1054
1055         s = source_new(e, SOURCE_POST);
1056         if (!s)
1057                 return -ENOMEM;
1058
1059         s->post.callback = callback;
1060         s->userdata = userdata;
1061         s->enabled = SD_EVENT_ON;
1062
1063         r = set_put(e->post_sources, s);
1064         if (r < 0) {
1065                 source_free(s);
1066                 return r;
1067         }
1068
1069         *ret = s;
1070         return 0;
1071 }
1072
1073 _public_ int sd_event_add_exit(
1074                 sd_event *e,
1075                 sd_event_source **ret,
1076                 sd_event_handler_t callback,
1077                 void *userdata) {
1078
1079         sd_event_source *s;
1080         int r;
1081
1082         assert_return(e, -EINVAL);
1083         assert_return(callback, -EINVAL);
1084         assert_return(ret, -EINVAL);
1085         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1086         assert_return(!event_pid_changed(e), -ECHILD);
1087
1088         if (!e->exit) {
1089                 e->exit = prioq_new(exit_prioq_compare);
1090                 if (!e->exit)
1091                         return -ENOMEM;
1092         }
1093
1094         s = source_new(e, SOURCE_EXIT);
1095         if (!s)
1096                 return -ENOMEM;
1097
1098         s->exit.callback = callback;
1099         s->userdata = userdata;
1100         s->exit.prioq_index = PRIOQ_IDX_NULL;
1101         s->enabled = SD_EVENT_ONESHOT;
1102
1103         r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1104         if (r < 0) {
1105                 source_free(s);
1106                 return r;
1107         }
1108
1109         *ret = s;
1110         return 0;
1111 }
1112
1113 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1114         assert_return(s, NULL);
1115
1116         assert(s->n_ref >= 1);
1117         s->n_ref++;
1118
1119         return s;
1120 }
1121
1122 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1123
1124         if (!s)
1125                 return NULL;
1126
1127         assert(s->n_ref >= 1);
1128         s->n_ref--;
1129
1130         if (s->n_ref <= 0) {
1131                 /* Here's a special hack: when we are called from a
1132                  * dispatch handler we won't free the event source
1133                  * immediately, but we will detach the fd from the
1134                  * epoll. This way it is safe for the caller to unref
1135                  * the event source and immediately close the fd, but
1136                  * we still retain a valid event source object after
1137                  * the callback. */
1138
1139                 if (s->dispatching) {
1140                         if (s->type == SOURCE_IO)
1141                                 source_io_unregister(s);
1142                 } else
1143                         source_free(s);
1144         }
1145
1146         return NULL;
1147 }
1148
1149 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1150         assert_return(s, NULL);
1151
1152         return s->event;
1153 }
1154
1155 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1156         assert_return(s, -EINVAL);
1157         assert_return(s->type != SOURCE_EXIT, -EDOM);
1158         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1159         assert_return(!event_pid_changed(s->event), -ECHILD);
1160
1161         return s->pending;
1162 }
1163
1164 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1165         assert_return(s, -EINVAL);
1166         assert_return(s->type == SOURCE_IO, -EDOM);
1167         assert_return(!event_pid_changed(s->event), -ECHILD);
1168
1169         return s->io.fd;
1170 }
1171
1172 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1173         int r;
1174
1175         assert_return(s, -EINVAL);
1176         assert_return(fd >= 0, -EINVAL);
1177         assert_return(s->type == SOURCE_IO, -EDOM);
1178         assert_return(!event_pid_changed(s->event), -ECHILD);
1179
1180         if (s->io.fd == fd)
1181                 return 0;
1182
1183         if (s->enabled == SD_EVENT_OFF) {
1184                 s->io.fd = fd;
1185                 s->io.registered = false;
1186         } else {
1187                 int saved_fd;
1188
1189                 saved_fd = s->io.fd;
1190                 assert(s->io.registered);
1191
1192                 s->io.fd = fd;
1193                 s->io.registered = false;
1194
1195                 r = source_io_register(s, s->enabled, s->io.events);
1196                 if (r < 0) {
1197                         s->io.fd = saved_fd;
1198                         s->io.registered = true;
1199                         return r;
1200                 }
1201
1202                 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1203         }
1204
1205         return 0;
1206 }
1207
1208 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1209         assert_return(s, -EINVAL);
1210         assert_return(events, -EINVAL);
1211         assert_return(s->type == SOURCE_IO, -EDOM);
1212         assert_return(!event_pid_changed(s->event), -ECHILD);
1213
1214         *events = s->io.events;
1215         return 0;
1216 }
1217
1218 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1219         int r;
1220
1221         assert_return(s, -EINVAL);
1222         assert_return(s->type == SOURCE_IO, -EDOM);
1223         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1224         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1225         assert_return(!event_pid_changed(s->event), -ECHILD);
1226
1227         if (s->io.events == events)
1228                 return 0;
1229
1230         if (s->enabled != SD_EVENT_OFF) {
1231                 r = source_io_register(s, s->enabled, events);
1232                 if (r < 0)
1233                         return r;
1234         }
1235
1236         s->io.events = events;
1237         source_set_pending(s, false);
1238
1239         return 0;
1240 }
1241
1242 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1243         assert_return(s, -EINVAL);
1244         assert_return(revents, -EINVAL);
1245         assert_return(s->type == SOURCE_IO, -EDOM);
1246         assert_return(s->pending, -ENODATA);
1247         assert_return(!event_pid_changed(s->event), -ECHILD);
1248
1249         *revents = s->io.revents;
1250         return 0;
1251 }
1252
1253 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1254         assert_return(s, -EINVAL);
1255         assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1256         assert_return(!event_pid_changed(s->event), -ECHILD);
1257
1258         return s->signal.sig;
1259 }
1260
1261 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1262         assert_return(s, -EINVAL);
1263         assert_return(!event_pid_changed(s->event), -ECHILD);
1264
1265         return s->priority;
1266 }
1267
1268 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1269         assert_return(s, -EINVAL);
1270         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1271         assert_return(!event_pid_changed(s->event), -ECHILD);
1272
1273         if (s->priority == priority)
1274                 return 0;
1275
1276         s->priority = priority;
1277
1278         if (s->pending)
1279                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1280
1281         if (s->prepare)
1282                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1283
1284         if (s->type == SOURCE_EXIT)
1285                 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1286
1287         return 0;
1288 }
1289
1290 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1291         assert_return(s, -EINVAL);
1292         assert_return(m, -EINVAL);
1293         assert_return(!event_pid_changed(s->event), -ECHILD);
1294
1295         *m = s->enabled;
1296         return 0;
1297 }
1298
1299 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1300         int r;
1301
1302         assert_return(s, -EINVAL);
1303         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1304         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1305         assert_return(!event_pid_changed(s->event), -ECHILD);
1306
1307         if (s->enabled == m)
1308                 return 0;
1309
1310         if (m == SD_EVENT_OFF) {
1311
1312                 switch (s->type) {
1313
1314                 case SOURCE_IO:
1315                         r = source_io_unregister(s);
1316                         if (r < 0)
1317                                 return r;
1318
1319                         s->enabled = m;
1320                         break;
1321
1322                 case SOURCE_TIME_REALTIME:
1323                 case SOURCE_TIME_MONOTONIC:
1324                 case SOURCE_TIME_REALTIME_ALARM:
1325                 case SOURCE_TIME_BOOTTIME_ALARM: {
1326                         struct clock_data *d;
1327
1328                         s->enabled = m;
1329                         d = event_get_clock_data(s->event, s->type);
1330                         assert(d);
1331
1332                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1333                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1334                         break;
1335                 }
1336
1337                 case SOURCE_SIGNAL:
1338                         s->enabled = m;
1339                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1340                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1341                                 event_update_signal_fd(s->event);
1342                         }
1343
1344                         break;
1345
1346                 case SOURCE_CHILD:
1347                         s->enabled = m;
1348
1349                         assert(s->event->n_enabled_child_sources > 0);
1350                         s->event->n_enabled_child_sources--;
1351
1352                         if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1353                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1354                                 event_update_signal_fd(s->event);
1355                         }
1356
1357                         break;
1358
1359                 case SOURCE_EXIT:
1360                         s->enabled = m;
1361                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1362                         break;
1363
1364                 case SOURCE_DEFER:
1365                 case SOURCE_POST:
1366                         s->enabled = m;
1367                         break;
1368
1369                 default:
1370                         assert_not_reached("Wut? I shouldn't exist.");
1371                 }
1372
1373         } else {
1374                 switch (s->type) {
1375
1376                 case SOURCE_IO:
1377                         r = source_io_register(s, m, s->io.events);
1378                         if (r < 0)
1379                                 return r;
1380
1381                         s->enabled = m;
1382                         break;
1383
1384                 case SOURCE_TIME_REALTIME:
1385                 case SOURCE_TIME_MONOTONIC:
1386                 case SOURCE_TIME_REALTIME_ALARM:
1387                 case SOURCE_TIME_BOOTTIME_ALARM: {
1388                         struct clock_data *d;
1389
1390                         s->enabled = m;
1391                         d = event_get_clock_data(s->event, s->type);
1392                         assert(d);
1393
1394                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1395                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1396                         break;
1397                 }
1398
1399                 case SOURCE_SIGNAL:
1400                         s->enabled = m;
1401
1402                         if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
1403                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1404                                 event_update_signal_fd(s->event);
1405                         }
1406                         break;
1407
1408                 case SOURCE_CHILD:
1409                         if (s->enabled == SD_EVENT_OFF) {
1410                                 s->event->n_enabled_child_sources++;
1411
1412                                 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1413                                         assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1414                                         event_update_signal_fd(s->event);
1415                                 }
1416                         }
1417
1418                         s->enabled = m;
1419                         break;
1420
1421                 case SOURCE_EXIT:
1422                         s->enabled = m;
1423                         prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1424                         break;
1425
1426                 case SOURCE_DEFER:
1427                 case SOURCE_POST:
1428                         s->enabled = m;
1429                         break;
1430
1431                 default:
1432                         assert_not_reached("Wut? I shouldn't exist.");
1433                 }
1434         }
1435
1436         if (s->pending)
1437                 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1438
1439         if (s->prepare)
1440                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1441
1442         return 0;
1443 }
1444
1445 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1446         assert_return(s, -EINVAL);
1447         assert_return(usec, -EINVAL);
1448         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1449         assert_return(!event_pid_changed(s->event), -ECHILD);
1450
1451         *usec = s->time.next;
1452         return 0;
1453 }
1454
1455 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1456         struct clock_data *d;
1457
1458         assert_return(s, -EINVAL);
1459         assert_return(usec != (uint64_t) -1, -EINVAL);
1460         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1461         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1462         assert_return(!event_pid_changed(s->event), -ECHILD);
1463
1464         s->time.next = usec;
1465
1466         source_set_pending(s, false);
1467
1468         d = event_get_clock_data(s->event, s->type);
1469         assert(d);
1470
1471         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1472         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1473
1474         return 0;
1475 }
1476
1477 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1478         assert_return(s, -EINVAL);
1479         assert_return(usec, -EINVAL);
1480         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1481         assert_return(!event_pid_changed(s->event), -ECHILD);
1482
1483         *usec = s->time.accuracy;
1484         return 0;
1485 }
1486
1487 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1488         struct clock_data *d;
1489
1490         assert_return(s, -EINVAL);
1491         assert_return(usec != (uint64_t) -1, -EINVAL);
1492         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1493         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1494         assert_return(!event_pid_changed(s->event), -ECHILD);
1495
1496         if (usec == 0)
1497                 usec = DEFAULT_ACCURACY_USEC;
1498
1499         s->time.accuracy = usec;
1500
1501         source_set_pending(s, false);
1502
1503         d = event_get_clock_data(s->event, s->type);
1504         assert(d);
1505
1506         prioq_reshuffle(d->latest, s, &s->time.latest_index);
1507
1508         return 0;
1509 }
1510
1511 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1512         assert_return(s, -EINVAL);
1513         assert_return(clock, -EINVAL);
1514         assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1515         assert_return(!event_pid_changed(s->event), -ECHILD);
1516
1517         *clock = event_source_type_to_clock(s->type);
1518         return 0;
1519 }
1520
1521 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1522         assert_return(s, -EINVAL);
1523         assert_return(pid, -EINVAL);
1524         assert_return(s->type == SOURCE_CHILD, -EDOM);
1525         assert_return(!event_pid_changed(s->event), -ECHILD);
1526
1527         *pid = s->child.pid;
1528         return 0;
1529 }
1530
1531 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1532         int r;
1533
1534         assert_return(s, -EINVAL);
1535         assert_return(s->type != SOURCE_EXIT, -EDOM);
1536         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1537         assert_return(!event_pid_changed(s->event), -ECHILD);
1538
1539         if (s->prepare == callback)
1540                 return 0;
1541
1542         if (callback && s->prepare) {
1543                 s->prepare = callback;
1544                 return 0;
1545         }
1546
1547         r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1548         if (r < 0)
1549                 return r;
1550
1551         s->prepare = callback;
1552
1553         if (callback) {
1554                 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1555                 if (r < 0)
1556                         return r;
1557         } else
1558                 prioq_remove(s->event->prepare, s, &s->prepare_index);
1559
1560         return 0;
1561 }
1562
1563 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1564         assert_return(s, NULL);
1565
1566         return s->userdata;
1567 }
1568
1569 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1570         void *ret;
1571
1572         assert_return(s, NULL);
1573
1574         ret = s->userdata;
1575         s->userdata = userdata;
1576
1577         return ret;
1578 }
1579
1580 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1581         usec_t c;
1582         assert(e);
1583         assert(a <= b);
1584
1585         if (a <= 0)
1586                 return 0;
1587
1588         if (b <= a + 1)
1589                 return a;
1590
1591         /*
1592           Find a good time to wake up again between times a and b. We
1593           have two goals here:
1594
1595           a) We want to wake up as seldom as possible, hence prefer
1596              later times over earlier times.
1597
1598           b) But if we have to wake up, then let's make sure to
1599              dispatch as much as possible on the entire system.
1600
1601           We implement this by waking up everywhere at the same time
1602           within any given minute if we can, synchronised via the
1603           perturbation value determined from the boot ID. If we can't,
1604           then we try to find the same spot in every 10s, then 1s and
1605           then 250ms step. Otherwise, we pick the last possible time
1606           to wake up.
1607         */
1608
1609         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1610         if (c >= b) {
1611                 if (_unlikely_(c < USEC_PER_MINUTE))
1612                         return b;
1613
1614                 c -= USEC_PER_MINUTE;
1615         }
1616
1617         if (c >= a)
1618                 return c;
1619
1620         c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1621         if (c >= b) {
1622                 if (_unlikely_(c < USEC_PER_SEC*10))
1623                         return b;
1624
1625                 c -= USEC_PER_SEC*10;
1626         }
1627
1628         if (c >= a)
1629                 return c;
1630
1631         c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1632         if (c >= b) {
1633                 if (_unlikely_(c < USEC_PER_SEC))
1634                         return b;
1635
1636                 c -= USEC_PER_SEC;
1637         }
1638
1639         if (c >= a)
1640                 return c;
1641
1642         c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1643         if (c >= b) {
1644                 if (_unlikely_(c < USEC_PER_MSEC*250))
1645                         return b;
1646
1647                 c -= USEC_PER_MSEC*250;
1648         }
1649
1650         if (c >= a)
1651                 return c;
1652
1653         return b;
1654 }
1655
1656 static int event_arm_timer(
1657                 sd_event *e,
1658                 struct clock_data *d) {
1659
1660         struct itimerspec its = {};
1661         sd_event_source *a, *b;
1662         usec_t t;
1663         int r;
1664
1665         assert(e);
1666         assert(d);
1667
1668         a = prioq_peek(d->earliest);
1669         if (!a || a->enabled == SD_EVENT_OFF) {
1670
1671                 if (d->fd < 0)
1672                         return 0;
1673
1674                 if (d->next == (usec_t) -1)
1675                         return 0;
1676
1677                 /* disarm */
1678                 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1679                 if (r < 0)
1680                         return r;
1681
1682                 d->next = (usec_t) -1;
1683                 return 0;
1684         }
1685
1686         b = prioq_peek(d->latest);
1687         assert_se(b && b->enabled != SD_EVENT_OFF);
1688
1689         t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1690         if (d->next == t)
1691                 return 0;
1692
1693         assert_se(d->fd >= 0);
1694
1695         if (t == 0) {
1696                 /* We don' want to disarm here, just mean some time looooong ago. */
1697                 its.it_value.tv_sec = 0;
1698                 its.it_value.tv_nsec = 1;
1699         } else
1700                 timespec_store(&its.it_value, t);
1701
1702         r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1703         if (r < 0)
1704                 return -errno;
1705
1706         d->next = t;
1707         return 0;
1708 }
1709
1710 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1711         assert(e);
1712         assert(s);
1713         assert(s->type == SOURCE_IO);
1714
1715         /* If the event source was already pending, we just OR in the
1716          * new revents, otherwise we reset the value. The ORing is
1717          * necessary to handle EPOLLONESHOT events properly where
1718          * readability might happen independently of writability, and
1719          * we need to keep track of both */
1720
1721         if (s->pending)
1722                 s->io.revents |= revents;
1723         else
1724                 s->io.revents = revents;
1725
1726         return source_set_pending(s, true);
1727 }
1728
1729 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1730         uint64_t x;
1731         ssize_t ss;
1732
1733         assert(e);
1734         assert(fd >= 0);
1735
1736         assert_return(events == EPOLLIN, -EIO);
1737
1738         ss = read(fd, &x, sizeof(x));
1739         if (ss < 0) {
1740                 if (errno == EAGAIN || errno == EINTR)
1741                         return 0;
1742
1743                 return -errno;
1744         }
1745
1746         if (_unlikely_(ss != sizeof(x)))
1747                 return -EIO;
1748
1749         if (next)
1750                 *next = (usec_t) -1;
1751
1752         return 0;
1753 }
1754
1755 static int process_timer(
1756                 sd_event *e,
1757                 usec_t n,
1758                 struct clock_data *d) {
1759
1760         sd_event_source *s;
1761         int r;
1762
1763         assert(e);
1764         assert(d);
1765
1766         for (;;) {
1767                 s = prioq_peek(d->earliest);
1768                 if (!s ||
1769                     s->time.next > n ||
1770                     s->enabled == SD_EVENT_OFF ||
1771                     s->pending)
1772                         break;
1773
1774                 r = source_set_pending(s, true);
1775                 if (r < 0)
1776                         return r;
1777
1778                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1779                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1780         }
1781
1782         return 0;
1783 }
1784
1785 static int process_child(sd_event *e) {
1786         sd_event_source *s;
1787         Iterator i;
1788         int r;
1789
1790         assert(e);
1791
1792         e->need_process_child = false;
1793
1794         /*
1795            So, this is ugly. We iteratively invoke waitid() with P_PID
1796            + WNOHANG for each PID we wait for, instead of using
1797            P_ALL. This is because we only want to get child
1798            information of very specific child processes, and not all
1799            of them. We might not have processed the SIGCHLD even of a
1800            previous invocation and we don't want to maintain a
1801            unbounded *per-child* event queue, hence we really don't
1802            want anything flushed out of the kernel's queue that we
1803            don't care about. Since this is O(n) this means that if you
1804            have a lot of processes you probably want to handle SIGCHLD
1805            yourself.
1806
1807            We do not reap the children here (by using WNOWAIT), this
1808            is only done after the event source is dispatched so that
1809            the callback still sees the process as a zombie.
1810         */
1811
1812         HASHMAP_FOREACH(s, e->child_sources, i) {
1813                 assert(s->type == SOURCE_CHILD);
1814
1815                 if (s->pending)
1816                         continue;
1817
1818                 if (s->enabled == SD_EVENT_OFF)
1819                         continue;
1820
1821                 zero(s->child.siginfo);
1822                 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1823                            WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1824                 if (r < 0)
1825                         return -errno;
1826
1827                 if (s->child.siginfo.si_pid != 0) {
1828                         bool zombie =
1829                                 s->child.siginfo.si_code == CLD_EXITED ||
1830                                 s->child.siginfo.si_code == CLD_KILLED ||
1831                                 s->child.siginfo.si_code == CLD_DUMPED;
1832
1833                         if (!zombie && (s->child.options & WEXITED)) {
1834                                 /* If the child isn't dead then let's
1835                                  * immediately remove the state change
1836                                  * from the queue, since there's no
1837                                  * benefit in leaving it queued */
1838
1839                                 assert(s->child.options & (WSTOPPED|WCONTINUED));
1840                                 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1841                         }
1842
1843                         r = source_set_pending(s, true);
1844                         if (r < 0)
1845                                 return r;
1846                 }
1847         }
1848
1849         return 0;
1850 }
1851
1852 static int process_signal(sd_event *e, uint32_t events) {
1853         bool read_one = false;
1854         int r;
1855
1856         assert(e);
1857         assert(e->signal_sources);
1858
1859         assert_return(events == EPOLLIN, -EIO);
1860
1861         for (;;) {
1862                 struct signalfd_siginfo si;
1863                 ssize_t ss;
1864                 sd_event_source *s;
1865
1866                 ss = read(e->signal_fd, &si, sizeof(si));
1867                 if (ss < 0) {
1868                         if (errno == EAGAIN || errno == EINTR)
1869                                 return read_one;
1870
1871                         return -errno;
1872                 }
1873
1874                 if (_unlikely_(ss != sizeof(si)))
1875                         return -EIO;
1876
1877                 read_one = true;
1878
1879                 s = e->signal_sources[si.ssi_signo];
1880                 if (si.ssi_signo == SIGCHLD) {
1881                         r = process_child(e);
1882                         if (r < 0)
1883                                 return r;
1884                         if (r > 0 || !s)
1885                                 continue;
1886                 } else
1887                         if (!s)
1888                                 return -EIO;
1889
1890                 s->signal.siginfo = si;
1891                 r = source_set_pending(s, true);
1892                 if (r < 0)
1893                         return r;
1894         }
1895 }
1896
1897 static int source_dispatch(sd_event_source *s) {
1898         int r = 0;
1899
1900         assert(s);
1901         assert(s->pending || s->type == SOURCE_EXIT);
1902
1903         if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1904                 r = source_set_pending(s, false);
1905                 if (r < 0)
1906                         return r;
1907         }
1908
1909         if (s->type != SOURCE_POST) {
1910                 sd_event_source *z;
1911                 Iterator i;
1912
1913                 /* If we execute a non-post source, let's mark all
1914                  * post sources as pending */
1915
1916                 SET_FOREACH(z, s->event->post_sources, i) {
1917                         if (z->enabled == SD_EVENT_OFF)
1918                                 continue;
1919
1920                         r = source_set_pending(z, true);
1921                         if (r < 0)
1922                                 return r;
1923                 }
1924         }
1925
1926         if (s->enabled == SD_EVENT_ONESHOT) {
1927                 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1928                 if (r < 0)
1929                         return r;
1930         }
1931
1932         s->dispatching = true;
1933
1934         switch (s->type) {
1935
1936         case SOURCE_IO:
1937                 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1938                 break;
1939
1940         case SOURCE_TIME_REALTIME:
1941         case SOURCE_TIME_MONOTONIC:
1942         case SOURCE_TIME_REALTIME_ALARM:
1943         case SOURCE_TIME_BOOTTIME_ALARM:
1944                 r = s->time.callback(s, s->time.next, s->userdata);
1945                 break;
1946
1947         case SOURCE_SIGNAL:
1948                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1949                 break;
1950
1951         case SOURCE_CHILD: {
1952                 bool zombie;
1953
1954                 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1955                          s->child.siginfo.si_code == CLD_KILLED ||
1956                          s->child.siginfo.si_code == CLD_DUMPED;
1957
1958                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1959
1960                 /* Now, reap the PID for good. */
1961                 if (zombie)
1962                         waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1963
1964                 break;
1965         }
1966
1967         case SOURCE_DEFER:
1968                 r = s->defer.callback(s, s->userdata);
1969                 break;
1970
1971         case SOURCE_POST:
1972                 r = s->post.callback(s, s->userdata);
1973                 break;
1974
1975         case SOURCE_EXIT:
1976                 r = s->exit.callback(s, s->userdata);
1977                 break;
1978
1979         case SOURCE_WATCHDOG:
1980                 assert_not_reached("Wut? I shouldn't exist.");
1981         }
1982
1983         s->dispatching = false;
1984
1985         if (r < 0)
1986                 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1987
1988         if (s->n_ref == 0)
1989                 source_free(s);
1990         else if (r < 0)
1991                 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1992
1993         return 1;
1994 }
1995
1996 static int event_prepare(sd_event *e) {
1997         int r;
1998
1999         assert(e);
2000
2001         for (;;) {
2002                 sd_event_source *s;
2003
2004                 s = prioq_peek(e->prepare);
2005                 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2006                         break;
2007
2008                 s->prepare_iteration = e->iteration;
2009                 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2010                 if (r < 0)
2011                         return r;
2012
2013                 assert(s->prepare);
2014
2015                 s->dispatching = true;
2016                 r = s->prepare(s, s->userdata);
2017                 s->dispatching = false;
2018
2019                 if (r < 0)
2020                         log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2021
2022                 if (s->n_ref == 0)
2023                         source_free(s);
2024                 else if (r < 0)
2025                         sd_event_source_set_enabled(s, SD_EVENT_OFF);
2026         }
2027
2028         return 0;
2029 }
2030
2031 static int dispatch_exit(sd_event *e) {
2032         sd_event_source *p;
2033         int r;
2034
2035         assert(e);
2036
2037         p = prioq_peek(e->exit);
2038         if (!p || p->enabled == SD_EVENT_OFF) {
2039                 e->state = SD_EVENT_FINISHED;
2040                 return 0;
2041         }
2042
2043         sd_event_ref(e);
2044         e->iteration++;
2045         e->state = SD_EVENT_EXITING;
2046
2047         r = source_dispatch(p);
2048
2049         e->state = SD_EVENT_PASSIVE;
2050         sd_event_unref(e);
2051
2052         return r;
2053 }
2054
2055 static sd_event_source* event_next_pending(sd_event *e) {
2056         sd_event_source *p;
2057
2058         assert(e);
2059
2060         p = prioq_peek(e->pending);
2061         if (!p)
2062                 return NULL;
2063
2064         if (p->enabled == SD_EVENT_OFF)
2065                 return NULL;
2066
2067         return p;
2068 }
2069
2070 static int arm_watchdog(sd_event *e) {
2071         struct itimerspec its = {};
2072         usec_t t;
2073         int r;
2074
2075         assert(e);
2076         assert(e->watchdog_fd >= 0);
2077
2078         t = sleep_between(e,
2079                           e->watchdog_last + (e->watchdog_period / 2),
2080                           e->watchdog_last + (e->watchdog_period * 3 / 4));
2081
2082         timespec_store(&its.it_value, t);
2083
2084         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2085         if (r < 0)
2086                 return -errno;
2087
2088         return 0;
2089 }
2090
2091 static int process_watchdog(sd_event *e) {
2092         assert(e);
2093
2094         if (!e->watchdog)
2095                 return 0;
2096
2097         /* Don't notify watchdog too often */
2098         if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2099                 return 0;
2100
2101         sd_notify(false, "WATCHDOG=1");
2102         e->watchdog_last = e->timestamp.monotonic;
2103
2104         return arm_watchdog(e);
2105 }
2106
2107 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2108         struct epoll_event *ev_queue;
2109         unsigned ev_queue_max;
2110         sd_event_source *p;
2111         int r, i, m;
2112
2113         assert_return(e, -EINVAL);
2114         assert_return(!event_pid_changed(e), -ECHILD);
2115         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2116         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2117
2118         if (e->exit_requested)
2119                 return dispatch_exit(e);
2120
2121         sd_event_ref(e);
2122         e->iteration++;
2123         e->state = SD_EVENT_RUNNING;
2124
2125         r = event_prepare(e);
2126         if (r < 0)
2127                 goto finish;
2128
2129         r = event_arm_timer(e, &e->realtime);
2130         if (r < 0)
2131                 goto finish;
2132
2133         r = event_arm_timer(e, &e->monotonic);
2134         if (r < 0)
2135                 goto finish;
2136
2137         r = event_arm_timer(e, &e->realtime_alarm);
2138         if (r < 0)
2139                 goto finish;
2140
2141         r = event_arm_timer(e, &e->boottime_alarm);
2142         if (r < 0)
2143                 goto finish;
2144
2145         if (event_next_pending(e) || e->need_process_child)
2146                 timeout = 0;
2147
2148         ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2149         ev_queue = newa(struct epoll_event, ev_queue_max);
2150
2151         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2152                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2153         if (m < 0) {
2154                 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2155                 goto finish;
2156         }
2157
2158         dual_timestamp_get(&e->timestamp);
2159         e->timestamp_boottime = now(CLOCK_BOOTTIME);
2160
2161         for (i = 0; i < m; i++) {
2162
2163                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2164                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2165                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2166                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2167                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2168                         r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2169                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2170                         r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2171                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2172                         r = process_signal(e, ev_queue[i].events);
2173                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2174                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2175                 else
2176                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2177
2178                 if (r < 0)
2179                         goto finish;
2180         }
2181
2182         r = process_watchdog(e);
2183         if (r < 0)
2184                 goto finish;
2185
2186         r = process_timer(e, e->timestamp.realtime, &e->realtime);
2187         if (r < 0)
2188                 goto finish;
2189
2190         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2191         if (r < 0)
2192                 goto finish;
2193
2194         r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2195         if (r < 0)
2196                 goto finish;
2197
2198         r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2199         if (r < 0)
2200                 goto finish;
2201
2202         if (e->need_process_child) {
2203                 r = process_child(e);
2204                 if (r < 0)
2205                         goto finish;
2206         }
2207
2208         p = event_next_pending(e);
2209         if (!p) {
2210                 r = 1;
2211                 goto finish;
2212         }
2213
2214         r = source_dispatch(p);
2215
2216 finish:
2217         e->state = SD_EVENT_PASSIVE;
2218         sd_event_unref(e);
2219
2220         return r;
2221 }
2222
2223 _public_ int sd_event_loop(sd_event *e) {
2224         int r;
2225
2226         assert_return(e, -EINVAL);
2227         assert_return(!event_pid_changed(e), -ECHILD);
2228         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2229
2230         sd_event_ref(e);
2231
2232         while (e->state != SD_EVENT_FINISHED) {
2233                 r = sd_event_run(e, (uint64_t) -1);
2234                 if (r < 0)
2235                         goto finish;
2236         }
2237
2238         r = e->exit_code;
2239
2240 finish:
2241         sd_event_unref(e);
2242         return r;
2243 }
2244
2245 _public_ int sd_event_get_state(sd_event *e) {
2246         assert_return(e, -EINVAL);
2247         assert_return(!event_pid_changed(e), -ECHILD);
2248
2249         return e->state;
2250 }
2251
2252 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2253         assert_return(e, -EINVAL);
2254         assert_return(code, -EINVAL);
2255         assert_return(!event_pid_changed(e), -ECHILD);
2256
2257         if (!e->exit_requested)
2258                 return -ENODATA;
2259
2260         *code = e->exit_code;
2261         return 0;
2262 }
2263
2264 _public_ int sd_event_exit(sd_event *e, int code) {
2265         assert_return(e, -EINVAL);
2266         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2267         assert_return(!event_pid_changed(e), -ECHILD);
2268
2269         e->exit_requested = true;
2270         e->exit_code = code;
2271
2272         return 0;
2273 }
2274
2275 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2276         assert_return(e, -EINVAL);
2277         assert_return(usec, -EINVAL);
2278         assert_return(!event_pid_changed(e), -ECHILD);
2279
2280         /* If we haven't run yet, just get the actual time */
2281         if (!dual_timestamp_is_set(&e->timestamp))
2282                 return -ENODATA;
2283
2284         switch (clock) {
2285
2286         case CLOCK_REALTIME:
2287         case CLOCK_REALTIME_ALARM:
2288                 *usec = e->timestamp.realtime;
2289                 break;
2290
2291         case CLOCK_MONOTONIC:
2292                 *usec = e->timestamp.monotonic;
2293                 break;
2294
2295         case CLOCK_BOOTTIME_ALARM:
2296                 *usec = e->timestamp_boottime;
2297                 break;
2298         }
2299
2300         return 0;
2301 }
2302
2303 _public_ int sd_event_default(sd_event **ret) {
2304
2305         static thread_local sd_event *default_event = NULL;
2306         sd_event *e = NULL;
2307         int r;
2308
2309         if (!ret)
2310                 return !!default_event;
2311
2312         if (default_event) {
2313                 *ret = sd_event_ref(default_event);
2314                 return 0;
2315         }
2316
2317         r = sd_event_new(&e);
2318         if (r < 0)
2319                 return r;
2320
2321         e->default_event_ptr = &default_event;
2322         e->tid = gettid();
2323         default_event = e;
2324
2325         *ret = e;
2326         return 1;
2327 }
2328
2329 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2330         assert_return(e, -EINVAL);
2331         assert_return(tid, -EINVAL);
2332         assert_return(!event_pid_changed(e), -ECHILD);
2333
2334         if (e->tid != 0) {
2335                 *tid = e->tid;
2336                 return 0;
2337         }
2338
2339         return -ENXIO;
2340 }
2341
2342 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2343         int r;
2344
2345         assert_return(e, -EINVAL);
2346         assert_return(!event_pid_changed(e), -ECHILD);
2347
2348         if (e->watchdog == !!b)
2349                 return e->watchdog;
2350
2351         if (b) {
2352                 struct epoll_event ev = {};
2353
2354                 r = sd_watchdog_enabled(false, &e->watchdog_period);
2355                 if (r <= 0)
2356                         return r;
2357
2358                 /* Issue first ping immediately */
2359                 sd_notify(false, "WATCHDOG=1");
2360                 e->watchdog_last = now(CLOCK_MONOTONIC);
2361
2362                 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2363                 if (e->watchdog_fd < 0)
2364                         return -errno;
2365
2366                 r = arm_watchdog(e);
2367                 if (r < 0)
2368                         goto fail;
2369
2370                 ev.events = EPOLLIN;
2371                 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2372
2373                 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2374                 if (r < 0) {
2375                         r = -errno;
2376                         goto fail;
2377                 }
2378
2379         } else {
2380                 if (e->watchdog_fd >= 0) {
2381                         epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2382                         e->watchdog_fd = safe_close(e->watchdog_fd);
2383                 }
2384         }
2385
2386         e->watchdog = !!b;
2387         return e->watchdog;
2388
2389 fail:
2390         e->watchdog_fd = safe_close(e->watchdog_fd);
2391         return r;
2392 }
2393
2394 _public_ int sd_event_get_watchdog(sd_event *e) {
2395         assert_return(e, -EINVAL);
2396         assert_return(!event_pid_changed(e), -ECHILD);
2397
2398         return e->watchdog;
2399 }