chiark / gitweb /
core: make systemd.confirm_spawn=1 actually work
[elogind.git] / src / core / manager.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <sys/epoll.h>
26 #include <signal.h>
27 #include <sys/signalfd.h>
28 #include <sys/wait.h>
29 #include <unistd.h>
30 #include <sys/poll.h>
31 #include <sys/reboot.h>
32 #include <sys/ioctl.h>
33 #include <linux/kd.h>
34 #include <termios.h>
35 #include <fcntl.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39
40 #ifdef HAVE_AUDIT
41 #include <libaudit.h>
42 #endif
43
44 #include <systemd/sd-daemon.h>
45
46 #include "manager.h"
47 #include "transaction.h"
48 #include "hashmap.h"
49 #include "macro.h"
50 #include "strv.h"
51 #include "log.h"
52 #include "util.h"
53 #include "mkdir.h"
54 #include "ratelimit.h"
55 #include "cgroup.h"
56 #include "mount-setup.h"
57 #include "unit-name.h"
58 #include "dbus-unit.h"
59 #include "dbus-job.h"
60 #include "missing.h"
61 #include "path-lookup.h"
62 #include "special.h"
63 #include "bus-errors.h"
64 #include "exit-status.h"
65 #include "virt.h"
66 #include "watchdog.h"
67 #include "cgroup-util.h"
68 #include "path-util.h"
69
70 /* As soon as 16 units are in our GC queue, make sure to run a gc sweep */
71 #define GC_QUEUE_ENTRIES_MAX 16
72
73 /* As soon as 5s passed since a unit was added to our GC queue, make sure to run a gc sweep */
74 #define GC_QUEUE_USEC_MAX (10*USEC_PER_SEC)
75
76 /* Where clients shall send notification messages to */
77 #define NOTIFY_SOCKET_SYSTEM "/run/systemd/notify"
78 #define NOTIFY_SOCKET_USER "@/org/freedesktop/systemd1/notify"
79
80 static int manager_setup_notify(Manager *m) {
81         union {
82                 struct sockaddr sa;
83                 struct sockaddr_un un;
84         } sa;
85         struct epoll_event ev;
86         int one = 1, r;
87         mode_t u;
88
89         assert(m);
90
91         m->notify_watch.type = WATCH_NOTIFY;
92         if ((m->notify_watch.fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
93                 log_error("Failed to allocate notification socket: %m");
94                 return -errno;
95         }
96
97         zero(sa);
98         sa.sa.sa_family = AF_UNIX;
99
100         if (getpid() != 1)
101                 snprintf(sa.un.sun_path, sizeof(sa.un.sun_path), NOTIFY_SOCKET_USER "/%llu", random_ull());
102         else {
103                 unlink(NOTIFY_SOCKET_SYSTEM);
104                 strncpy(sa.un.sun_path, NOTIFY_SOCKET_SYSTEM, sizeof(sa.un.sun_path));
105         }
106
107         if (sa.un.sun_path[0] == '@')
108                 sa.un.sun_path[0] = 0;
109
110         u = umask(0111);
111         r = bind(m->notify_watch.fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1));
112         umask(u);
113
114         if (r < 0) {
115                 log_error("bind() failed: %m");
116                 return -errno;
117         }
118
119         if (setsockopt(m->notify_watch.fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0) {
120                 log_error("SO_PASSCRED failed: %m");
121                 return -errno;
122         }
123
124         zero(ev);
125         ev.events = EPOLLIN;
126         ev.data.ptr = &m->notify_watch;
127
128         if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->notify_watch.fd, &ev) < 0)
129                 return -errno;
130
131         if (sa.un.sun_path[0] == 0)
132                 sa.un.sun_path[0] = '@';
133
134         if (!(m->notify_socket = strdup(sa.un.sun_path)))
135                 return -ENOMEM;
136
137         log_debug("Using notification socket %s", m->notify_socket);
138
139         return 0;
140 }
141
142 static int enable_special_signals(Manager *m) {
143         int fd;
144
145         assert(m);
146
147         /* Enable that we get SIGINT on control-alt-del. In containers
148          * this will fail with EPERM, so ignore that. */
149         if (reboot(RB_DISABLE_CAD) < 0 && errno != EPERM)
150                 log_warning("Failed to enable ctrl-alt-del handling: %m");
151
152         fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
153         if (fd < 0) {
154                 /* Support systems without virtual console */
155                 if (fd != -ENOENT)
156                         log_warning("Failed to open /dev/tty0: %m");
157         } else {
158                 /* Enable that we get SIGWINCH on kbrequest */
159                 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
160                         log_warning("Failed to enable kbrequest handling: %s", strerror(errno));
161
162                 close_nointr_nofail(fd);
163         }
164
165         return 0;
166 }
167
168 static int manager_setup_signals(Manager *m) {
169         sigset_t mask;
170         struct epoll_event ev;
171         struct sigaction sa;
172
173         assert(m);
174
175         /* We are not interested in SIGSTOP and friends. */
176         zero(sa);
177         sa.sa_handler = SIG_DFL;
178         sa.sa_flags = SA_NOCLDSTOP|SA_RESTART;
179         assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
180
181         assert_se(sigemptyset(&mask) == 0);
182
183         sigset_add_many(&mask,
184                         SIGCHLD,     /* Child died */
185                         SIGTERM,     /* Reexecute daemon */
186                         SIGHUP,      /* Reload configuration */
187                         SIGUSR1,     /* systemd/upstart: reconnect to D-Bus */
188                         SIGUSR2,     /* systemd: dump status */
189                         SIGINT,      /* Kernel sends us this on control-alt-del */
190                         SIGWINCH,    /* Kernel sends us this on kbrequest (alt-arrowup) */
191                         SIGPWR,      /* Some kernel drivers and upsd send us this on power failure */
192                         SIGRTMIN+0,  /* systemd: start default.target */
193                         SIGRTMIN+1,  /* systemd: isolate rescue.target */
194                         SIGRTMIN+2,  /* systemd: isolate emergency.target */
195                         SIGRTMIN+3,  /* systemd: start halt.target */
196                         SIGRTMIN+4,  /* systemd: start poweroff.target */
197                         SIGRTMIN+5,  /* systemd: start reboot.target */
198                         SIGRTMIN+6,  /* systemd: start kexec.target */
199                         SIGRTMIN+13, /* systemd: Immediate halt */
200                         SIGRTMIN+14, /* systemd: Immediate poweroff */
201                         SIGRTMIN+15, /* systemd: Immediate reboot */
202                         SIGRTMIN+16, /* systemd: Immediate kexec */
203                         SIGRTMIN+20, /* systemd: enable status messages */
204                         SIGRTMIN+21, /* systemd: disable status messages */
205                         SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
206                         SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
207                         SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
208                         SIGRTMIN+27, /* systemd: set log target to console */
209                         SIGRTMIN+28, /* systemd: set log target to kmsg */
210                         SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg */
211                         -1);
212         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
213
214         m->signal_watch.type = WATCH_SIGNAL;
215         if ((m->signal_watch.fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0)
216                 return -errno;
217
218         zero(ev);
219         ev.events = EPOLLIN;
220         ev.data.ptr = &m->signal_watch;
221
222         if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->signal_watch.fd, &ev) < 0)
223                 return -errno;
224
225         if (m->running_as == MANAGER_SYSTEM)
226                 return enable_special_signals(m);
227
228         return 0;
229 }
230
231 static void manager_strip_environment(Manager *m) {
232         assert(m);
233
234         /* Remove variables from the inherited set that are part of
235          * the container interface:
236          * http://www.freedesktop.org/wiki/Software/systemd/ContainerInterface */
237         strv_remove_prefix(m->environment, "container=");
238         strv_remove_prefix(m->environment, "container_");
239
240         /* Remove variables from the inherited set that are part of
241          * the initrd interface:
242          * http://www.freedesktop.org/wiki/Software/systemd/InitrdInterface */
243         strv_remove_prefix(m->environment, "RD_");
244 }
245
246 int manager_new(ManagerRunningAs running_as, Manager **_m) {
247         Manager *m;
248         int r = -ENOMEM;
249
250         assert(_m);
251         assert(running_as >= 0);
252         assert(running_as < _MANAGER_RUNNING_AS_MAX);
253
254         if (!(m = new0(Manager, 1)))
255                 return -ENOMEM;
256
257         dual_timestamp_get(&m->startup_timestamp);
258
259         m->running_as = running_as;
260         m->name_data_slot = m->conn_data_slot = m->subscribed_data_slot = -1;
261         m->exit_code = _MANAGER_EXIT_CODE_INVALID;
262         m->pin_cgroupfs_fd = -1;
263         m->idle_pipe[0] = m->idle_pipe[1] = -1;
264
265 #ifdef HAVE_AUDIT
266         m->audit_fd = -1;
267 #endif
268
269         m->signal_watch.fd = m->mount_watch.fd = m->udev_watch.fd = m->epoll_fd = m->dev_autofs_fd = m->swap_watch.fd = -1;
270         m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
271
272         m->environment = strv_copy(environ);
273         if (!m->environment)
274                 goto fail;
275
276         manager_strip_environment(m);
277
278         if (running_as == MANAGER_SYSTEM) {
279                 m->default_controllers = strv_new("cpu", NULL);
280                 if (!m->default_controllers)
281                         goto fail;
282         }
283
284         if (!(m->units = hashmap_new(string_hash_func, string_compare_func)))
285                 goto fail;
286
287         if (!(m->jobs = hashmap_new(trivial_hash_func, trivial_compare_func)))
288                 goto fail;
289
290         if (!(m->watch_pids = hashmap_new(trivial_hash_func, trivial_compare_func)))
291                 goto fail;
292
293         if (!(m->cgroup_bondings = hashmap_new(string_hash_func, string_compare_func)))
294                 goto fail;
295
296         if (!(m->watch_bus = hashmap_new(string_hash_func, string_compare_func)))
297                 goto fail;
298
299         if ((m->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0)
300                 goto fail;
301
302         if ((r = manager_setup_signals(m)) < 0)
303                 goto fail;
304
305         if ((r = manager_setup_cgroup(m)) < 0)
306                 goto fail;
307
308         if ((r = manager_setup_notify(m)) < 0)
309                 goto fail;
310
311         /* Try to connect to the busses, if possible. */
312         if ((r = bus_init(m, running_as != MANAGER_SYSTEM)) < 0)
313                 goto fail;
314
315 #ifdef HAVE_AUDIT
316         if ((m->audit_fd = audit_open()) < 0 &&
317             /* If the kernel lacks netlink or audit support,
318              * don't worry about it. */
319             errno != EAFNOSUPPORT && errno != EPROTONOSUPPORT)
320                 log_error("Failed to connect to audit log: %m");
321 #endif
322
323         m->taint_usr = dir_is_empty("/usr") > 0;
324
325         *_m = m;
326         return 0;
327
328 fail:
329         manager_free(m);
330         return r;
331 }
332
333 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
334         Unit *u;
335         unsigned n = 0;
336
337         assert(m);
338
339         while ((u = m->cleanup_queue)) {
340                 assert(u->in_cleanup_queue);
341
342                 unit_free(u);
343                 n++;
344         }
345
346         return n;
347 }
348
349 enum {
350         GC_OFFSET_IN_PATH,  /* This one is on the path we were traveling */
351         GC_OFFSET_UNSURE,   /* No clue */
352         GC_OFFSET_GOOD,     /* We still need this unit */
353         GC_OFFSET_BAD,      /* We don't need this unit anymore */
354         _GC_OFFSET_MAX
355 };
356
357 static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
358         Iterator i;
359         Unit *other;
360         bool is_bad;
361
362         assert(u);
363
364         if (u->gc_marker == gc_marker + GC_OFFSET_GOOD ||
365             u->gc_marker == gc_marker + GC_OFFSET_BAD ||
366             u->gc_marker == gc_marker + GC_OFFSET_IN_PATH)
367                 return;
368
369         if (u->in_cleanup_queue)
370                 goto bad;
371
372         if (unit_check_gc(u))
373                 goto good;
374
375         u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
376
377         is_bad = true;
378
379         SET_FOREACH(other, u->dependencies[UNIT_REFERENCED_BY], i) {
380                 unit_gc_sweep(other, gc_marker);
381
382                 if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
383                         goto good;
384
385                 if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
386                         is_bad = false;
387         }
388
389         if (is_bad)
390                 goto bad;
391
392         /* We were unable to find anything out about this entry, so
393          * let's investigate it later */
394         u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
395         unit_add_to_gc_queue(u);
396         return;
397
398 bad:
399         /* We definitely know that this one is not useful anymore, so
400          * let's mark it for deletion */
401         u->gc_marker = gc_marker + GC_OFFSET_BAD;
402         unit_add_to_cleanup_queue(u);
403         return;
404
405 good:
406         u->gc_marker = gc_marker + GC_OFFSET_GOOD;
407 }
408
409 static unsigned manager_dispatch_gc_queue(Manager *m) {
410         Unit *u;
411         unsigned n = 0;
412         unsigned gc_marker;
413
414         assert(m);
415
416         if ((m->n_in_gc_queue < GC_QUEUE_ENTRIES_MAX) &&
417             (m->gc_queue_timestamp <= 0 ||
418              (m->gc_queue_timestamp + GC_QUEUE_USEC_MAX) > now(CLOCK_MONOTONIC)))
419                 return 0;
420
421         log_debug("Running GC...");
422
423         m->gc_marker += _GC_OFFSET_MAX;
424         if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
425                 m->gc_marker = 1;
426
427         gc_marker = m->gc_marker;
428
429         while ((u = m->gc_queue)) {
430                 assert(u->in_gc_queue);
431
432                 unit_gc_sweep(u, gc_marker);
433
434                 LIST_REMOVE(Unit, gc_queue, m->gc_queue, u);
435                 u->in_gc_queue = false;
436
437                 n++;
438
439                 if (u->gc_marker == gc_marker + GC_OFFSET_BAD ||
440                     u->gc_marker == gc_marker + GC_OFFSET_UNSURE) {
441                         log_debug("Collecting %s", u->id);
442                         u->gc_marker = gc_marker + GC_OFFSET_BAD;
443                         unit_add_to_cleanup_queue(u);
444                 }
445         }
446
447         m->n_in_gc_queue = 0;
448         m->gc_queue_timestamp = 0;
449
450         return n;
451 }
452
453 static void manager_clear_jobs_and_units(Manager *m) {
454         Unit *u;
455
456         assert(m);
457
458         while ((u = hashmap_first(m->units)))
459                 unit_free(u);
460
461         manager_dispatch_cleanup_queue(m);
462
463         assert(!m->load_queue);
464         assert(!m->run_queue);
465         assert(!m->dbus_unit_queue);
466         assert(!m->dbus_job_queue);
467         assert(!m->cleanup_queue);
468         assert(!m->gc_queue);
469
470         assert(hashmap_isempty(m->jobs));
471         assert(hashmap_isempty(m->units));
472 }
473
474 void manager_free(Manager *m) {
475         UnitType c;
476         int i;
477
478         assert(m);
479
480         manager_clear_jobs_and_units(m);
481
482         for (c = 0; c < _UNIT_TYPE_MAX; c++)
483                 if (unit_vtable[c]->shutdown)
484                         unit_vtable[c]->shutdown(m);
485
486         /* If we reexecute ourselves, we keep the root cgroup
487          * around */
488         manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
489
490         manager_undo_generators(m);
491
492         bus_done(m);
493
494         hashmap_free(m->units);
495         hashmap_free(m->jobs);
496         hashmap_free(m->watch_pids);
497         hashmap_free(m->watch_bus);
498
499         if (m->epoll_fd >= 0)
500                 close_nointr_nofail(m->epoll_fd);
501         if (m->signal_watch.fd >= 0)
502                 close_nointr_nofail(m->signal_watch.fd);
503         if (m->notify_watch.fd >= 0)
504                 close_nointr_nofail(m->notify_watch.fd);
505
506 #ifdef HAVE_AUDIT
507         if (m->audit_fd >= 0)
508                 audit_close(m->audit_fd);
509 #endif
510
511         free(m->notify_socket);
512
513         lookup_paths_free(&m->lookup_paths);
514         strv_free(m->environment);
515
516         strv_free(m->default_controllers);
517
518         hashmap_free(m->cgroup_bondings);
519         set_free_free(m->unit_path_cache);
520
521         close_pipe(m->idle_pipe);
522
523         free(m->switch_root);
524         free(m->switch_root_init);
525
526         for (i = 0; i < RLIMIT_NLIMITS; i++)
527                 free(m->rlimit[i]);
528
529         free(m);
530 }
531
532 int manager_enumerate(Manager *m) {
533         int r = 0, q;
534         UnitType c;
535
536         assert(m);
537
538         /* Let's ask every type to load all units from disk/kernel
539          * that it might know */
540         for (c = 0; c < _UNIT_TYPE_MAX; c++)
541                 if (unit_vtable[c]->enumerate)
542                         if ((q = unit_vtable[c]->enumerate(m)) < 0)
543                                 r = q;
544
545         manager_dispatch_load_queue(m);
546         return r;
547 }
548
549 int manager_coldplug(Manager *m) {
550         int r = 0, q;
551         Iterator i;
552         Unit *u;
553         char *k;
554
555         assert(m);
556
557         /* Then, let's set up their initial state. */
558         HASHMAP_FOREACH_KEY(u, k, m->units, i) {
559
560                 /* ignore aliases */
561                 if (u->id != k)
562                         continue;
563
564                 if ((q = unit_coldplug(u)) < 0)
565                         r = q;
566         }
567
568         return r;
569 }
570
571 static void manager_build_unit_path_cache(Manager *m) {
572         char **i;
573         DIR *d = NULL;
574         int r;
575
576         assert(m);
577
578         set_free_free(m->unit_path_cache);
579
580         if (!(m->unit_path_cache = set_new(string_hash_func, string_compare_func))) {
581                 log_error("Failed to allocate unit path cache.");
582                 return;
583         }
584
585         /* This simply builds a list of files we know exist, so that
586          * we don't always have to go to disk */
587
588         STRV_FOREACH(i, m->lookup_paths.unit_path) {
589                 struct dirent *de;
590
591                 if (!(d = opendir(*i))) {
592                         log_error("Failed to open directory: %m");
593                         continue;
594                 }
595
596                 while ((de = readdir(d))) {
597                         char *p;
598
599                         if (ignore_file(de->d_name))
600                                 continue;
601
602                         p = join(streq(*i, "/") ? "" : *i, "/", de->d_name, NULL);
603                         if (!p) {
604                                 r = -ENOMEM;
605                                 goto fail;
606                         }
607
608                         if ((r = set_put(m->unit_path_cache, p)) < 0) {
609                                 free(p);
610                                 goto fail;
611                         }
612                 }
613
614                 closedir(d);
615                 d = NULL;
616         }
617
618         return;
619
620 fail:
621         log_error("Failed to build unit path cache: %s", strerror(-r));
622
623         set_free_free(m->unit_path_cache);
624         m->unit_path_cache = NULL;
625
626         if (d)
627                 closedir(d);
628 }
629
630 int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
631         int r, q;
632
633         assert(m);
634
635         manager_run_generators(m);
636
637         r = lookup_paths_init(
638                         &m->lookup_paths, m->running_as, true,
639                         m->generator_unit_path,
640                         m->generator_unit_path_early,
641                         m->generator_unit_path_late);
642         if (r < 0)
643                 return r;
644
645         manager_build_unit_path_cache(m);
646
647         /* If we will deserialize make sure that during enumeration
648          * this is already known, so we increase the counter here
649          * already */
650         if (serialization)
651                 m->n_reloading ++;
652
653         /* First, enumerate what we can from all config files */
654         r = manager_enumerate(m);
655
656         /* Second, deserialize if there is something to deserialize */
657         if (serialization) {
658                 q = manager_deserialize(m, serialization, fds);
659                 if (q < 0)
660                         r = q;
661         }
662
663         /* Third, fire things up! */
664         q = manager_coldplug(m);
665         if (q < 0)
666                 r = q;
667
668         if (serialization) {
669                 assert(m->n_reloading > 0);
670                 m->n_reloading --;
671         }
672
673         return r;
674 }
675
676 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, bool override, DBusError *e, Job **_ret) {
677         int r;
678         Transaction *tr;
679
680         assert(m);
681         assert(type < _JOB_TYPE_MAX);
682         assert(unit);
683         assert(mode < _JOB_MODE_MAX);
684
685         if (mode == JOB_ISOLATE && type != JOB_START) {
686                 dbus_set_error(e, BUS_ERROR_INVALID_JOB_MODE, "Isolate is only valid for start.");
687                 return -EINVAL;
688         }
689
690         if (mode == JOB_ISOLATE && !unit->allow_isolate) {
691                 dbus_set_error(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
692                 return -EPERM;
693         }
694
695         log_debug("Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
696
697         job_type_collapse(&type, unit);
698
699         tr = transaction_new();
700         if (!tr)
701                 return -ENOMEM;
702
703         r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, override, false,
704                                                  mode == JOB_IGNORE_DEPENDENCIES || mode == JOB_IGNORE_REQUIREMENTS,
705                                                  mode == JOB_IGNORE_DEPENDENCIES, e);
706         if (r < 0)
707                 goto tr_abort;
708
709         if (mode == JOB_ISOLATE) {
710                 r = transaction_add_isolate_jobs(tr, m);
711                 if (r < 0)
712                         goto tr_abort;
713         }
714
715         r = transaction_activate(tr, m, mode, e);
716         if (r < 0)
717                 goto tr_abort;
718
719         log_debug("Enqueued job %s/%s as %u", unit->id, job_type_to_string(type), (unsigned) tr->anchor_job->id);
720
721         if (_ret)
722                 *_ret = tr->anchor_job;
723
724         transaction_free(tr);
725         return 0;
726
727 tr_abort:
728         transaction_abort(tr);
729         transaction_free(tr);
730         return r;
731 }
732
733 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, bool override, DBusError *e, Job **_ret) {
734         Unit *unit;
735         int r;
736
737         assert(m);
738         assert(type < _JOB_TYPE_MAX);
739         assert(name);
740         assert(mode < _JOB_MODE_MAX);
741
742         if ((r = manager_load_unit(m, name, NULL, NULL, &unit)) < 0)
743                 return r;
744
745         return manager_add_job(m, type, unit, mode, override, e, _ret);
746 }
747
748 Job *manager_get_job(Manager *m, uint32_t id) {
749         assert(m);
750
751         return hashmap_get(m->jobs, UINT32_TO_PTR(id));
752 }
753
754 Unit *manager_get_unit(Manager *m, const char *name) {
755         assert(m);
756         assert(name);
757
758         return hashmap_get(m->units, name);
759 }
760
761 unsigned manager_dispatch_load_queue(Manager *m) {
762         Unit *u;
763         unsigned n = 0;
764
765         assert(m);
766
767         /* Make sure we are not run recursively */
768         if (m->dispatching_load_queue)
769                 return 0;
770
771         m->dispatching_load_queue = true;
772
773         /* Dispatches the load queue. Takes a unit from the queue and
774          * tries to load its data until the queue is empty */
775
776         while ((u = m->load_queue)) {
777                 assert(u->in_load_queue);
778
779                 unit_load(u);
780                 n++;
781         }
782
783         m->dispatching_load_queue = false;
784         return n;
785 }
786
787 int manager_load_unit_prepare(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
788         Unit *ret;
789         UnitType t;
790         int r;
791
792         assert(m);
793         assert(name || path);
794
795         /* This will prepare the unit for loading, but not actually
796          * load anything from disk. */
797
798         if (path && !is_path(path)) {
799                 dbus_set_error(e, BUS_ERROR_INVALID_PATH, "Path %s is not absolute.", path);
800                 return -EINVAL;
801         }
802
803         if (!name)
804                 name = path_get_file_name(path);
805
806         t = unit_name_to_type(name);
807
808         if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid_no_type(name, false)) {
809                 dbus_set_error(e, BUS_ERROR_INVALID_NAME, "Unit name %s is not valid.", name);
810                 return -EINVAL;
811         }
812
813         ret = manager_get_unit(m, name);
814         if (ret) {
815                 *_ret = ret;
816                 return 1;
817         }
818
819         ret = unit_new(m, unit_vtable[t]->object_size);
820         if (!ret)
821                 return -ENOMEM;
822
823         if (path) {
824                 ret->fragment_path = strdup(path);
825                 if (!ret->fragment_path) {
826                         unit_free(ret);
827                         return -ENOMEM;
828                 }
829         }
830
831         if ((r = unit_add_name(ret, name)) < 0) {
832                 unit_free(ret);
833                 return r;
834         }
835
836         unit_add_to_load_queue(ret);
837         unit_add_to_dbus_queue(ret);
838         unit_add_to_gc_queue(ret);
839
840         if (_ret)
841                 *_ret = ret;
842
843         return 0;
844 }
845
846 int manager_load_unit(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
847         int r;
848
849         assert(m);
850
851         /* This will load the service information files, but not actually
852          * start any services or anything. */
853
854         if ((r = manager_load_unit_prepare(m, name, path, e, _ret)) != 0)
855                 return r;
856
857         manager_dispatch_load_queue(m);
858
859         if (_ret)
860                 *_ret = unit_follow_merge(*_ret);
861
862         return 0;
863 }
864
865 void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
866         Iterator i;
867         Job *j;
868
869         assert(s);
870         assert(f);
871
872         HASHMAP_FOREACH(j, s->jobs, i)
873                 job_dump(j, f, prefix);
874 }
875
876 void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
877         Iterator i;
878         Unit *u;
879         const char *t;
880
881         assert(s);
882         assert(f);
883
884         HASHMAP_FOREACH_KEY(u, t, s->units, i)
885                 if (u->id == t)
886                         unit_dump(u, f, prefix);
887 }
888
889 void manager_clear_jobs(Manager *m) {
890         Job *j;
891
892         assert(m);
893
894         while ((j = hashmap_first(m->jobs)))
895                 /* No need to recurse. We're cancelling all jobs. */
896                 job_finish_and_invalidate(j, JOB_CANCELED, false);
897 }
898
899 unsigned manager_dispatch_run_queue(Manager *m) {
900         Job *j;
901         unsigned n = 0;
902
903         if (m->dispatching_run_queue)
904                 return 0;
905
906         m->dispatching_run_queue = true;
907
908         while ((j = m->run_queue)) {
909                 assert(j->installed);
910                 assert(j->in_run_queue);
911
912                 job_run_and_invalidate(j);
913                 n++;
914         }
915
916         m->dispatching_run_queue = false;
917         return n;
918 }
919
920 unsigned manager_dispatch_dbus_queue(Manager *m) {
921         Job *j;
922         Unit *u;
923         unsigned n = 0;
924
925         assert(m);
926
927         if (m->dispatching_dbus_queue)
928                 return 0;
929
930         m->dispatching_dbus_queue = true;
931
932         while ((u = m->dbus_unit_queue)) {
933                 assert(u->in_dbus_queue);
934
935                 bus_unit_send_change_signal(u);
936                 n++;
937         }
938
939         while ((j = m->dbus_job_queue)) {
940                 assert(j->in_dbus_queue);
941
942                 bus_job_send_change_signal(j);
943                 n++;
944         }
945
946         m->dispatching_dbus_queue = false;
947         return n;
948 }
949
950 static int manager_process_notify_fd(Manager *m) {
951         ssize_t n;
952
953         assert(m);
954
955         for (;;) {
956                 char buf[4096];
957                 struct msghdr msghdr;
958                 struct iovec iovec;
959                 struct ucred *ucred;
960                 union {
961                         struct cmsghdr cmsghdr;
962                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
963                 } control;
964                 Unit *u;
965                 char **tags;
966
967                 zero(iovec);
968                 iovec.iov_base = buf;
969                 iovec.iov_len = sizeof(buf)-1;
970
971                 zero(control);
972                 zero(msghdr);
973                 msghdr.msg_iov = &iovec;
974                 msghdr.msg_iovlen = 1;
975                 msghdr.msg_control = &control;
976                 msghdr.msg_controllen = sizeof(control);
977
978                 if ((n = recvmsg(m->notify_watch.fd, &msghdr, MSG_DONTWAIT)) <= 0) {
979                         if (n >= 0)
980                                 return -EIO;
981
982                         if (errno == EAGAIN || errno == EINTR)
983                                 break;
984
985                         return -errno;
986                 }
987
988                 if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
989                     control.cmsghdr.cmsg_level != SOL_SOCKET ||
990                     control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
991                     control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
992                         log_warning("Received notify message without credentials. Ignoring.");
993                         continue;
994                 }
995
996                 ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
997
998                 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(ucred->pid))))
999                         if (!(u = cgroup_unit_by_pid(m, ucred->pid))) {
1000                                 log_warning("Cannot find unit for notify message of PID %lu.", (unsigned long) ucred->pid);
1001                                 continue;
1002                         }
1003
1004                 assert((size_t) n < sizeof(buf));
1005                 buf[n] = 0;
1006                 if (!(tags = strv_split(buf, "\n\r")))
1007                         return -ENOMEM;
1008
1009                 log_debug("Got notification message for unit %s", u->id);
1010
1011                 if (UNIT_VTABLE(u)->notify_message)
1012                         UNIT_VTABLE(u)->notify_message(u, ucred->pid, tags);
1013
1014                 strv_free(tags);
1015         }
1016
1017         return 0;
1018 }
1019
1020 static int manager_dispatch_sigchld(Manager *m) {
1021         assert(m);
1022
1023         for (;;) {
1024                 siginfo_t si;
1025                 Unit *u;
1026                 int r;
1027
1028                 zero(si);
1029
1030                 /* First we call waitd() for a PID and do not reap the
1031                  * zombie. That way we can still access /proc/$PID for
1032                  * it while it is a zombie. */
1033                 if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
1034
1035                         if (errno == ECHILD)
1036                                 break;
1037
1038                         if (errno == EINTR)
1039                                 continue;
1040
1041                         return -errno;
1042                 }
1043
1044                 if (si.si_pid <= 0)
1045                         break;
1046
1047                 if (si.si_code == CLD_EXITED || si.si_code == CLD_KILLED || si.si_code == CLD_DUMPED) {
1048                         char *name = NULL;
1049
1050                         get_process_comm(si.si_pid, &name);
1051                         log_debug("Got SIGCHLD for process %lu (%s)", (unsigned long) si.si_pid, strna(name));
1052                         free(name);
1053                 }
1054
1055                 /* Let's flush any message the dying child might still
1056                  * have queued for us. This ensures that the process
1057                  * still exists in /proc so that we can figure out
1058                  * which cgroup and hence unit it belongs to. */
1059                 if ((r = manager_process_notify_fd(m)) < 0)
1060                         return r;
1061
1062                 /* And now figure out the unit this belongs to */
1063                 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(si.si_pid))))
1064                         u = cgroup_unit_by_pid(m, si.si_pid);
1065
1066                 /* And now, we actually reap the zombie. */
1067                 if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
1068                         if (errno == EINTR)
1069                                 continue;
1070
1071                         return -errno;
1072                 }
1073
1074                 if (si.si_code != CLD_EXITED && si.si_code != CLD_KILLED && si.si_code != CLD_DUMPED)
1075                         continue;
1076
1077                 log_debug("Child %lu died (code=%s, status=%i/%s)",
1078                           (long unsigned) si.si_pid,
1079                           sigchld_code_to_string(si.si_code),
1080                           si.si_status,
1081                           strna(si.si_code == CLD_EXITED
1082                                 ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
1083                                 : signal_to_string(si.si_status)));
1084
1085                 if (!u)
1086                         continue;
1087
1088                 log_debug("Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
1089
1090                 hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
1091                 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
1092         }
1093
1094         return 0;
1095 }
1096
1097 static int manager_start_target(Manager *m, const char *name, JobMode mode) {
1098         int r;
1099         DBusError error;
1100
1101         dbus_error_init(&error);
1102
1103         log_debug("Activating special unit %s", name);
1104
1105         if ((r = manager_add_job_by_name(m, JOB_START, name, mode, true, &error, NULL)) < 0)
1106                 log_error("Failed to enqueue %s job: %s", name, bus_error(&error, r));
1107
1108         dbus_error_free(&error);
1109
1110         return r;
1111 }
1112
1113 static int manager_process_signal_fd(Manager *m) {
1114         ssize_t n;
1115         struct signalfd_siginfo sfsi;
1116         bool sigchld = false;
1117
1118         assert(m);
1119
1120         for (;;) {
1121                 if ((n = read(m->signal_watch.fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) {
1122
1123                         if (n >= 0)
1124                                 return -EIO;
1125
1126                         if (errno == EINTR || errno == EAGAIN)
1127                                 break;
1128
1129                         return -errno;
1130                 }
1131
1132                 if (sfsi.ssi_pid > 0) {
1133                         char *p = NULL;
1134
1135                         get_process_comm(sfsi.ssi_pid, &p);
1136
1137                         log_debug("Received SIG%s from PID %lu (%s).",
1138                                   signal_to_string(sfsi.ssi_signo),
1139                                   (unsigned long) sfsi.ssi_pid, strna(p));
1140                         free(p);
1141                 } else
1142                         log_debug("Received SIG%s.", signal_to_string(sfsi.ssi_signo));
1143
1144                 switch (sfsi.ssi_signo) {
1145
1146                 case SIGCHLD:
1147                         sigchld = true;
1148                         break;
1149
1150                 case SIGTERM:
1151                         if (m->running_as == MANAGER_SYSTEM) {
1152                                 /* This is for compatibility with the
1153                                  * original sysvinit */
1154                                 m->exit_code = MANAGER_REEXECUTE;
1155                                 break;
1156                         }
1157
1158                         /* Fall through */
1159
1160                 case SIGINT:
1161                         if (m->running_as == MANAGER_SYSTEM) {
1162                                 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE);
1163                                 break;
1164                         }
1165
1166                         /* Run the exit target if there is one, if not, just exit. */
1167                         if (manager_start_target(m, SPECIAL_EXIT_TARGET, JOB_REPLACE) < 0) {
1168                                 m->exit_code = MANAGER_EXIT;
1169                                 return 0;
1170                         }
1171
1172                         break;
1173
1174                 case SIGWINCH:
1175                         if (m->running_as == MANAGER_SYSTEM)
1176                                 manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
1177
1178                         /* This is a nop on non-init */
1179                         break;
1180
1181                 case SIGPWR:
1182                         if (m->running_as == MANAGER_SYSTEM)
1183                                 manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
1184
1185                         /* This is a nop on non-init */
1186                         break;
1187
1188                 case SIGUSR1: {
1189                         Unit *u;
1190
1191                         u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
1192
1193                         if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
1194                                 log_info("Trying to reconnect to bus...");
1195                                 bus_init(m, true);
1196                         }
1197
1198                         if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) {
1199                                 log_info("Loading D-Bus service...");
1200                                 manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
1201                         }
1202
1203                         break;
1204                 }
1205
1206                 case SIGUSR2: {
1207                         FILE *f;
1208                         char *dump = NULL;
1209                         size_t size;
1210
1211                         if (!(f = open_memstream(&dump, &size))) {
1212                                 log_warning("Failed to allocate memory stream.");
1213                                 break;
1214                         }
1215
1216                         manager_dump_units(m, f, "\t");
1217                         manager_dump_jobs(m, f, "\t");
1218
1219                         if (ferror(f)) {
1220                                 fclose(f);
1221                                 free(dump);
1222                                 log_warning("Failed to write status stream");
1223                                 break;
1224                         }
1225
1226                         fclose(f);
1227                         log_dump(LOG_INFO, dump);
1228                         free(dump);
1229
1230                         break;
1231                 }
1232
1233                 case SIGHUP:
1234                         m->exit_code = MANAGER_RELOAD;
1235                         break;
1236
1237                 default: {
1238
1239                         /* Starting SIGRTMIN+0 */
1240                         static const char * const target_table[] = {
1241                                 [0] = SPECIAL_DEFAULT_TARGET,
1242                                 [1] = SPECIAL_RESCUE_TARGET,
1243                                 [2] = SPECIAL_EMERGENCY_TARGET,
1244                                 [3] = SPECIAL_HALT_TARGET,
1245                                 [4] = SPECIAL_POWEROFF_TARGET,
1246                                 [5] = SPECIAL_REBOOT_TARGET,
1247                                 [6] = SPECIAL_KEXEC_TARGET
1248                         };
1249
1250                         /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
1251                         static const ManagerExitCode code_table[] = {
1252                                 [0] = MANAGER_HALT,
1253                                 [1] = MANAGER_POWEROFF,
1254                                 [2] = MANAGER_REBOOT,
1255                                 [3] = MANAGER_KEXEC
1256                         };
1257
1258                         if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
1259                             (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
1260                                 int idx = (int) sfsi.ssi_signo - SIGRTMIN;
1261                                 manager_start_target(m, target_table[idx],
1262                                                      (idx == 1 || idx == 2) ? JOB_ISOLATE : JOB_REPLACE);
1263                                 break;
1264                         }
1265
1266                         if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
1267                             (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
1268                                 m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
1269                                 break;
1270                         }
1271
1272                         switch (sfsi.ssi_signo - SIGRTMIN) {
1273
1274                         case 20:
1275                                 log_debug("Enabling showing of status.");
1276                                 manager_set_show_status(m, true);
1277                                 break;
1278
1279                         case 21:
1280                                 log_debug("Disabling showing of status.");
1281                                 manager_set_show_status(m, false);
1282                                 break;
1283
1284                         case 22:
1285                                 log_set_max_level(LOG_DEBUG);
1286                                 log_notice("Setting log level to debug.");
1287                                 break;
1288
1289                         case 23:
1290                                 log_set_max_level(LOG_INFO);
1291                                 log_notice("Setting log level to info.");
1292                                 break;
1293
1294                         case 26:
1295                                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1296                                 log_notice("Setting log target to journal-or-kmsg.");
1297                                 break;
1298
1299                         case 27:
1300                                 log_set_target(LOG_TARGET_CONSOLE);
1301                                 log_notice("Setting log target to console.");
1302                                 break;
1303
1304                         case 28:
1305                                 log_set_target(LOG_TARGET_KMSG);
1306                                 log_notice("Setting log target to kmsg.");
1307                                 break;
1308
1309                         case 29:
1310                                 log_set_target(LOG_TARGET_SYSLOG_OR_KMSG);
1311                                 log_notice("Setting log target to syslog-or-kmsg.");
1312                                 break;
1313
1314                         default:
1315                                 log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
1316                         }
1317                 }
1318                 }
1319         }
1320
1321         if (sigchld)
1322                 return manager_dispatch_sigchld(m);
1323
1324         return 0;
1325 }
1326
1327 static int process_event(Manager *m, struct epoll_event *ev) {
1328         int r;
1329         Watch *w;
1330
1331         assert(m);
1332         assert(ev);
1333
1334         assert_se(w = ev->data.ptr);
1335
1336         if (w->type == WATCH_INVALID)
1337                 return 0;
1338
1339         switch (w->type) {
1340
1341         case WATCH_SIGNAL:
1342
1343                 /* An incoming signal? */
1344                 if (ev->events != EPOLLIN)
1345                         return -EINVAL;
1346
1347                 if ((r = manager_process_signal_fd(m)) < 0)
1348                         return r;
1349
1350                 break;
1351
1352         case WATCH_NOTIFY:
1353
1354                 /* An incoming daemon notification event? */
1355                 if (ev->events != EPOLLIN)
1356                         return -EINVAL;
1357
1358                 if ((r = manager_process_notify_fd(m)) < 0)
1359                         return r;
1360
1361                 break;
1362
1363         case WATCH_FD:
1364
1365                 /* Some fd event, to be dispatched to the units */
1366                 UNIT_VTABLE(w->data.unit)->fd_event(w->data.unit, w->fd, ev->events, w);
1367                 break;
1368
1369         case WATCH_UNIT_TIMER:
1370         case WATCH_JOB_TIMER: {
1371                 uint64_t v;
1372                 ssize_t k;
1373
1374                 /* Some timer event, to be dispatched to the units */
1375                 if ((k = read(w->fd, &v, sizeof(v))) != sizeof(v)) {
1376
1377                         if (k < 0 && (errno == EINTR || errno == EAGAIN))
1378                                 break;
1379
1380                         return k < 0 ? -errno : -EIO;
1381                 }
1382
1383                 if (w->type == WATCH_UNIT_TIMER)
1384                         UNIT_VTABLE(w->data.unit)->timer_event(w->data.unit, v, w);
1385                 else
1386                         job_timer_event(w->data.job, v, w);
1387                 break;
1388         }
1389
1390         case WATCH_MOUNT:
1391                 /* Some mount table change, intended for the mount subsystem */
1392                 mount_fd_event(m, ev->events);
1393                 break;
1394
1395         case WATCH_SWAP:
1396                 /* Some swap table change, intended for the swap subsystem */
1397                 swap_fd_event(m, ev->events);
1398                 break;
1399
1400         case WATCH_UDEV:
1401                 /* Some notification from udev, intended for the device subsystem */
1402                 device_fd_event(m, ev->events);
1403                 break;
1404
1405         case WATCH_DBUS_WATCH:
1406                 bus_watch_event(m, w, ev->events);
1407                 break;
1408
1409         case WATCH_DBUS_TIMEOUT:
1410                 bus_timeout_event(m, w, ev->events);
1411                 break;
1412
1413         default:
1414                 log_error("event type=%i", w->type);
1415                 assert_not_reached("Unknown epoll event type.");
1416         }
1417
1418         return 0;
1419 }
1420
1421 int manager_loop(Manager *m) {
1422         int r;
1423
1424         RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
1425
1426         assert(m);
1427         m->exit_code = MANAGER_RUNNING;
1428
1429         /* Release the path cache */
1430         set_free_free(m->unit_path_cache);
1431         m->unit_path_cache = NULL;
1432
1433         manager_check_finished(m);
1434
1435         /* There might still be some zombies hanging around from
1436          * before we were exec()'ed. Leat's reap them */
1437         r = manager_dispatch_sigchld(m);
1438         if (r < 0)
1439                 return r;
1440
1441         while (m->exit_code == MANAGER_RUNNING) {
1442                 struct epoll_event event;
1443                 int n;
1444                 int wait_msec = -1;
1445
1446                 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM)
1447                         watchdog_ping();
1448
1449                 if (!ratelimit_test(&rl)) {
1450                         /* Yay, something is going seriously wrong, pause a little */
1451                         log_warning("Looping too fast. Throttling execution a little.");
1452                         sleep(1);
1453                         continue;
1454                 }
1455
1456                 if (manager_dispatch_load_queue(m) > 0)
1457                         continue;
1458
1459                 if (manager_dispatch_run_queue(m) > 0)
1460                         continue;
1461
1462                 if (bus_dispatch(m) > 0)
1463                         continue;
1464
1465                 if (manager_dispatch_cleanup_queue(m) > 0)
1466                         continue;
1467
1468                 if (manager_dispatch_gc_queue(m) > 0)
1469                         continue;
1470
1471                 if (manager_dispatch_dbus_queue(m) > 0)
1472                         continue;
1473
1474                 if (swap_dispatch_reload(m) > 0)
1475                         continue;
1476
1477                 /* Sleep for half the watchdog time */
1478                 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM) {
1479                         wait_msec = (int) (m->runtime_watchdog / 2 / USEC_PER_MSEC);
1480                         if (wait_msec <= 0)
1481                                 wait_msec = 1;
1482                 } else
1483                         wait_msec = -1;
1484
1485                 n = epoll_wait(m->epoll_fd, &event, 1, wait_msec);
1486                 if (n < 0) {
1487
1488                         if (errno == EINTR)
1489                                 continue;
1490
1491                         return -errno;
1492                 } else if (n == 0)
1493                         continue;
1494
1495                 assert(n == 1);
1496
1497                 r = process_event(m, &event);
1498                 if (r < 0)
1499                         return r;
1500         }
1501
1502         return m->exit_code;
1503 }
1504
1505 int manager_load_unit_from_dbus_path(Manager *m, const char *s, DBusError *e, Unit **_u) {
1506         char *n;
1507         Unit *u;
1508         int r;
1509
1510         assert(m);
1511         assert(s);
1512         assert(_u);
1513
1514         if (!startswith(s, "/org/freedesktop/systemd1/unit/"))
1515                 return -EINVAL;
1516
1517         n = bus_path_unescape(s+31);
1518         if (!n)
1519                 return -ENOMEM;
1520
1521         r = manager_load_unit(m, n, NULL, e, &u);
1522         free(n);
1523
1524         if (r < 0)
1525                 return r;
1526
1527         *_u = u;
1528
1529         return 0;
1530 }
1531
1532 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
1533         Job *j;
1534         unsigned id;
1535         int r;
1536
1537         assert(m);
1538         assert(s);
1539         assert(_j);
1540
1541         if (!startswith(s, "/org/freedesktop/systemd1/job/"))
1542                 return -EINVAL;
1543
1544         if ((r = safe_atou(s + 30, &id)) < 0)
1545                 return r;
1546
1547         if (!(j = manager_get_job(m, id)))
1548                 return -ENOENT;
1549
1550         *_j = j;
1551
1552         return 0;
1553 }
1554
1555 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
1556
1557 #ifdef HAVE_AUDIT
1558         char *p;
1559
1560         if (m->audit_fd < 0)
1561                 return;
1562
1563         /* Don't generate audit events if the service was already
1564          * started and we're just deserializing */
1565         if (m->n_reloading > 0)
1566                 return;
1567
1568         if (m->running_as != MANAGER_SYSTEM)
1569                 return;
1570
1571         if (u->type != UNIT_SERVICE)
1572                 return;
1573
1574         if (!(p = unit_name_to_prefix_and_instance(u->id))) {
1575                 log_error("Failed to allocate unit name for audit message: %s", strerror(ENOMEM));
1576                 return;
1577         }
1578
1579         if (audit_log_user_comm_message(m->audit_fd, type, "", p, NULL, NULL, NULL, success) < 0) {
1580                 if (errno == EPERM) {
1581                         /* We aren't allowed to send audit messages?
1582                          * Then let's not retry again. */
1583                         audit_close(m->audit_fd);
1584                         m->audit_fd = -1;
1585                 } else
1586                         log_warning("Failed to send audit message: %m");
1587         }
1588
1589         free(p);
1590 #endif
1591
1592 }
1593
1594 void manager_send_unit_plymouth(Manager *m, Unit *u) {
1595         int fd = -1;
1596         union sockaddr_union sa;
1597         int n = 0;
1598         char *message = NULL;
1599
1600         /* Don't generate plymouth events if the service was already
1601          * started and we're just deserializing */
1602         if (m->n_reloading > 0)
1603                 return;
1604
1605         if (m->running_as != MANAGER_SYSTEM)
1606                 return;
1607
1608         if (u->type != UNIT_SERVICE &&
1609             u->type != UNIT_MOUNT &&
1610             u->type != UNIT_SWAP)
1611                 return;
1612
1613         /* We set SOCK_NONBLOCK here so that we rather drop the
1614          * message then wait for plymouth */
1615         if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
1616                 log_error("socket() failed: %m");
1617                 return;
1618         }
1619
1620         zero(sa);
1621         sa.sa.sa_family = AF_UNIX;
1622         strncpy(sa.un.sun_path+1, "/org/freedesktop/plymouthd", sizeof(sa.un.sun_path)-1);
1623         if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1)) < 0) {
1624
1625                 if (errno != EPIPE &&
1626                     errno != EAGAIN &&
1627                     errno != ENOENT &&
1628                     errno != ECONNREFUSED &&
1629                     errno != ECONNRESET &&
1630                     errno != ECONNABORTED)
1631                         log_error("connect() failed: %m");
1632
1633                 goto finish;
1634         }
1635
1636         if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
1637                 log_error("Out of memory");
1638                 goto finish;
1639         }
1640
1641         errno = 0;
1642         if (write(fd, message, n + 1) != n + 1) {
1643
1644                 if (errno != EPIPE &&
1645                     errno != EAGAIN &&
1646                     errno != ENOENT &&
1647                     errno != ECONNREFUSED &&
1648                     errno != ECONNRESET &&
1649                     errno != ECONNABORTED)
1650                         log_error("Failed to write Plymouth message: %m");
1651
1652                 goto finish;
1653         }
1654
1655 finish:
1656         if (fd >= 0)
1657                 close_nointr_nofail(fd);
1658
1659         free(message);
1660 }
1661
1662 void manager_dispatch_bus_name_owner_changed(
1663                 Manager *m,
1664                 const char *name,
1665                 const char* old_owner,
1666                 const char *new_owner) {
1667
1668         Unit *u;
1669
1670         assert(m);
1671         assert(name);
1672
1673         if (!(u = hashmap_get(m->watch_bus, name)))
1674                 return;
1675
1676         UNIT_VTABLE(u)->bus_name_owner_change(u, name, old_owner, new_owner);
1677 }
1678
1679 void manager_dispatch_bus_query_pid_done(
1680                 Manager *m,
1681                 const char *name,
1682                 pid_t pid) {
1683
1684         Unit *u;
1685
1686         assert(m);
1687         assert(name);
1688         assert(pid >= 1);
1689
1690         if (!(u = hashmap_get(m->watch_bus, name)))
1691                 return;
1692
1693         UNIT_VTABLE(u)->bus_query_pid_done(u, name, pid);
1694 }
1695
1696 int manager_open_serialization(Manager *m, FILE **_f) {
1697         char *path = NULL;
1698         mode_t saved_umask;
1699         int fd;
1700         FILE *f;
1701
1702         assert(_f);
1703
1704         if (m->running_as == MANAGER_SYSTEM)
1705                 asprintf(&path, "/run/systemd/dump-%lu-XXXXXX", (unsigned long) getpid());
1706         else
1707                 asprintf(&path, "/tmp/systemd-dump-%lu-XXXXXX", (unsigned long) getpid());
1708
1709         if (!path)
1710                 return -ENOMEM;
1711
1712         saved_umask = umask(0077);
1713         fd = mkostemp(path, O_RDWR|O_CLOEXEC);
1714         umask(saved_umask);
1715
1716         if (fd < 0) {
1717                 free(path);
1718                 return -errno;
1719         }
1720
1721         unlink(path);
1722
1723         log_debug("Serializing state to %s", path);
1724         free(path);
1725
1726         if (!(f = fdopen(fd, "w+")))
1727                 return -errno;
1728
1729         *_f = f;
1730
1731         return 0;
1732 }
1733
1734 int manager_serialize(Manager *m, FILE *f, FDSet *fds) {
1735         Iterator i;
1736         Unit *u;
1737         const char *t;
1738         int r;
1739
1740         assert(m);
1741         assert(f);
1742         assert(fds);
1743
1744         m->n_reloading ++;
1745
1746         fprintf(f, "current-job-id=%i\n", m->current_job_id);
1747         fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
1748
1749         dual_timestamp_serialize(f, "initrd-timestamp", &m->initrd_timestamp);
1750
1751         if (! in_initrd()) {
1752                 dual_timestamp_serialize(f, "startup-timestamp", &m->startup_timestamp);
1753                 dual_timestamp_serialize(f, "finish-timestamp", &m->finish_timestamp);
1754         }
1755
1756         fputc('\n', f);
1757
1758         HASHMAP_FOREACH_KEY(u, t, m->units, i) {
1759                 if (u->id != t)
1760                         continue;
1761
1762                 if (!unit_can_serialize(u))
1763                         continue;
1764
1765                 /* Start marker */
1766                 fputs(u->id, f);
1767                 fputc('\n', f);
1768
1769                 if ((r = unit_serialize(u, f, fds)) < 0) {
1770                         m->n_reloading --;
1771                         return r;
1772                 }
1773         }
1774
1775         assert(m->n_reloading > 0);
1776         m->n_reloading --;
1777
1778         if (ferror(f))
1779                 return -EIO;
1780
1781         r = bus_fdset_add_all(m, fds);
1782         if (r < 0)
1783                 return r;
1784
1785         return 0;
1786 }
1787
1788 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
1789         int r = 0;
1790
1791         assert(m);
1792         assert(f);
1793
1794         log_debug("Deserializing state...");
1795
1796         m->n_reloading ++;
1797
1798         for (;;) {
1799                 char line[LINE_MAX], *l;
1800
1801                 if (!fgets(line, sizeof(line), f)) {
1802                         if (feof(f))
1803                                 r = 0;
1804                         else
1805                                 r = -errno;
1806
1807                         goto finish;
1808                 }
1809
1810                 char_array_0(line);
1811                 l = strstrip(line);
1812
1813                 if (l[0] == 0)
1814                         break;
1815
1816                 if (startswith(l, "current-job-id=")) {
1817                         uint32_t id;
1818
1819                         if (safe_atou32(l+15, &id) < 0)
1820                                 log_debug("Failed to parse current job id value %s", l+15);
1821                         else
1822                                 m->current_job_id = MAX(m->current_job_id, id);
1823                 } else if (startswith(l, "taint-usr=")) {
1824                         int b;
1825
1826                         if ((b = parse_boolean(l+10)) < 0)
1827                                 log_debug("Failed to parse taint /usr flag %s", l+10);
1828                         else
1829                                 m->taint_usr = m->taint_usr || b;
1830                 } else if (startswith(l, "initrd-timestamp="))
1831                         dual_timestamp_deserialize(l+17, &m->initrd_timestamp);
1832                 else if (startswith(l, "startup-timestamp="))
1833                         dual_timestamp_deserialize(l+18, &m->startup_timestamp);
1834                 else if (startswith(l, "finish-timestamp="))
1835                         dual_timestamp_deserialize(l+17, &m->finish_timestamp);
1836                 else
1837                         log_debug("Unknown serialization item '%s'", l);
1838         }
1839
1840         for (;;) {
1841                 Unit *u;
1842                 char name[UNIT_NAME_MAX+2];
1843
1844                 /* Start marker */
1845                 if (!fgets(name, sizeof(name), f)) {
1846                         if (feof(f))
1847                                 r = 0;
1848                         else
1849                                 r = -errno;
1850
1851                         goto finish;
1852                 }
1853
1854                 char_array_0(name);
1855
1856                 if ((r = manager_load_unit(m, strstrip(name), NULL, NULL, &u)) < 0)
1857                         goto finish;
1858
1859                 if ((r = unit_deserialize(u, f, fds)) < 0)
1860                         goto finish;
1861         }
1862
1863 finish:
1864         if (ferror(f)) {
1865                 r = -EIO;
1866                 goto finish;
1867         }
1868
1869         assert(m->n_reloading > 0);
1870         m->n_reloading --;
1871
1872         return r;
1873 }
1874
1875 int manager_reload(Manager *m) {
1876         int r, q;
1877         FILE *f;
1878         FDSet *fds;
1879
1880         assert(m);
1881
1882         r = manager_open_serialization(m, &f);
1883         if (r < 0)
1884                 return r;
1885
1886         m->n_reloading ++;
1887
1888         fds = fdset_new();
1889         if (!fds) {
1890                 m->n_reloading --;
1891                 r = -ENOMEM;
1892                 goto finish;
1893         }
1894
1895         r = manager_serialize(m, f, fds);
1896         if (r < 0) {
1897                 m->n_reloading --;
1898                 goto finish;
1899         }
1900
1901         if (fseeko(f, 0, SEEK_SET) < 0) {
1902                 m->n_reloading --;
1903                 r = -errno;
1904                 goto finish;
1905         }
1906
1907         /* From here on there is no way back. */
1908         manager_clear_jobs_and_units(m);
1909         manager_undo_generators(m);
1910         lookup_paths_free(&m->lookup_paths);
1911
1912         /* Find new unit paths */
1913         manager_run_generators(m);
1914
1915         q = lookup_paths_init(
1916                         &m->lookup_paths, m->running_as, true,
1917                         m->generator_unit_path,
1918                         m->generator_unit_path_early,
1919                         m->generator_unit_path_late);
1920         if (q < 0)
1921                 r = q;
1922
1923         manager_build_unit_path_cache(m);
1924
1925         /* First, enumerate what we can from all config files */
1926         q = manager_enumerate(m);
1927         if (q < 0)
1928                 r = q;
1929
1930         /* Second, deserialize our stored data */
1931         q = manager_deserialize(m, f, fds);
1932         if (q < 0)
1933                 r = q;
1934
1935         fclose(f);
1936         f = NULL;
1937
1938         /* Third, fire things up! */
1939         q = manager_coldplug(m);
1940         if (q < 0)
1941                 r = q;
1942
1943         assert(m->n_reloading > 0);
1944         m->n_reloading--;
1945
1946 finish:
1947         if (f)
1948                 fclose(f);
1949
1950         if (fds)
1951                 fdset_free(fds);
1952
1953         return r;
1954 }
1955
1956 bool manager_is_booting_or_shutting_down(Manager *m) {
1957         Unit *u;
1958
1959         assert(m);
1960
1961         /* Is the initial job still around? */
1962         if (manager_get_job(m, m->default_unit_job_id))
1963                 return true;
1964
1965         /* Is there a job for the shutdown target? */
1966         u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
1967         if (u)
1968                 return !!u->job;
1969
1970         return false;
1971 }
1972
1973 void manager_reset_failed(Manager *m) {
1974         Unit *u;
1975         Iterator i;
1976
1977         assert(m);
1978
1979         HASHMAP_FOREACH(u, m->units, i)
1980                 unit_reset_failed(u);
1981 }
1982
1983 bool manager_unit_pending_inactive(Manager *m, const char *name) {
1984         Unit *u;
1985
1986         assert(m);
1987         assert(name);
1988
1989         /* Returns true if the unit is inactive or going down */
1990         if (!(u = manager_get_unit(m, name)))
1991                 return true;
1992
1993         return unit_pending_inactive(u);
1994 }
1995
1996 void manager_check_finished(Manager *m) {
1997         char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
1998         usec_t kernel_usec, initrd_usec, userspace_usec, total_usec;
1999
2000         assert(m);
2001
2002         if (hashmap_size(m->jobs) > 0)
2003                 return;
2004
2005         /* Notify Type=idle units that we are done now */
2006         close_pipe(m->idle_pipe);
2007
2008         /* Turn off confirm spawn now */
2009         m->confirm_spawn = false;
2010
2011         if (dual_timestamp_is_set(&m->finish_timestamp))
2012                 return;
2013
2014         dual_timestamp_get(&m->finish_timestamp);
2015
2016         if (m->running_as == MANAGER_SYSTEM && detect_container(NULL) <= 0) {
2017
2018                 userspace_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
2019                 total_usec = m->finish_timestamp.monotonic;
2020
2021                 if (dual_timestamp_is_set(&m->initrd_timestamp)) {
2022
2023                         kernel_usec = m->initrd_timestamp.monotonic;
2024                         initrd_usec = m->startup_timestamp.monotonic - m->initrd_timestamp.monotonic;
2025
2026                         log_info("Startup finished in %s (kernel) + %s (initrd) + %s (userspace) = %s.",
2027                                  format_timespan(kernel, sizeof(kernel), kernel_usec),
2028                                  format_timespan(initrd, sizeof(initrd), initrd_usec),
2029                                  format_timespan(userspace, sizeof(userspace), userspace_usec),
2030                                  format_timespan(sum, sizeof(sum), total_usec));
2031                 } else {
2032                         kernel_usec = m->startup_timestamp.monotonic;
2033                         initrd_usec = 0;
2034
2035                         log_info("Startup finished in %s (kernel) + %s (userspace) = %s.",
2036                                  format_timespan(kernel, sizeof(kernel), kernel_usec),
2037                                  format_timespan(userspace, sizeof(userspace), userspace_usec),
2038                                  format_timespan(sum, sizeof(sum), total_usec));
2039                 }
2040         } else {
2041                 userspace_usec = initrd_usec = kernel_usec = 0;
2042                 total_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
2043
2044                 log_debug("Startup finished in %s.",
2045                           format_timespan(sum, sizeof(sum), total_usec));
2046         }
2047
2048         bus_broadcast_finished(m, kernel_usec, initrd_usec, userspace_usec, total_usec);
2049
2050         sd_notifyf(false,
2051                    "READY=1\nSTATUS=Startup finished in %s.",
2052                    format_timespan(sum, sizeof(sum), total_usec));
2053 }
2054
2055 static int create_generator_dir(Manager *m, char **generator, const char *name) {
2056         char *p;
2057         int r;
2058
2059         assert(m);
2060         assert(generator);
2061         assert(name);
2062
2063         if (*generator)
2064                 return 0;
2065
2066         if (m->running_as == MANAGER_SYSTEM && getpid() == 1) {
2067
2068                 p = strappend("/run/systemd/", name);
2069                 if (!p) {
2070                         log_error("Out of memory");
2071                         return -ENOMEM;
2072                 }
2073
2074                 r = mkdir_p_label(p, 0755);
2075                 if (r < 0) {
2076                         log_error("Failed to create generator directory: %s", strerror(-r));
2077                         free(p);
2078                         return r;
2079                 }
2080         } else {
2081                 p = join("/tmp/systemd-", name, ".XXXXXX", NULL);
2082                 if (!p) {
2083                         log_error("Out of memory");
2084                         return -ENOMEM;
2085                 }
2086
2087                 if (!mkdtemp(p)) {
2088                         free(p);
2089                         log_error("Failed to create generator directory: %m");
2090                         return -errno;
2091                 }
2092         }
2093
2094         *generator = p;
2095         return 0;
2096 }
2097
2098 static void trim_generator_dir(Manager *m, char **generator) {
2099         assert(m);
2100         assert(generator);
2101
2102         if (!*generator)
2103                 return;
2104
2105         if (rmdir(*generator) >= 0) {
2106                 free(*generator);
2107                 *generator = NULL;
2108         }
2109
2110         return;
2111 }
2112
2113 void manager_run_generators(Manager *m) {
2114         DIR *d = NULL;
2115         const char *generator_path;
2116         const char *argv[5];
2117         mode_t u;
2118         int r;
2119
2120         assert(m);
2121
2122         generator_path = m->running_as == MANAGER_SYSTEM ? SYSTEM_GENERATOR_PATH : USER_GENERATOR_PATH;
2123         d = opendir(generator_path);
2124         if (!d) {
2125                 if (errno == ENOENT)
2126                         return;
2127
2128                 log_error("Failed to enumerate generator directory: %m");
2129                 return;
2130         }
2131
2132         r = create_generator_dir(m, &m->generator_unit_path, "generator");
2133         if (r < 0)
2134                 goto finish;
2135
2136         r = create_generator_dir(m, &m->generator_unit_path_early, "generator.early");
2137         if (r < 0)
2138                 goto finish;
2139
2140         r = create_generator_dir(m, &m->generator_unit_path_late, "generator.late");
2141         if (r < 0)
2142                 goto finish;
2143
2144         argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
2145         argv[1] = m->generator_unit_path;
2146         argv[2] = m->generator_unit_path_early;
2147         argv[3] = m->generator_unit_path_late;
2148         argv[4] = NULL;
2149
2150         u = umask(0022);
2151         execute_directory(generator_path, d, (char**) argv);
2152         umask(u);
2153
2154         trim_generator_dir(m, &m->generator_unit_path);
2155         trim_generator_dir(m, &m->generator_unit_path_early);
2156         trim_generator_dir(m, &m->generator_unit_path_late);
2157
2158 finish:
2159         if (d)
2160                 closedir(d);
2161 }
2162
2163 static void remove_generator_dir(Manager *m, char **generator) {
2164         assert(m);
2165         assert(generator);
2166
2167         if (!*generator)
2168                 return;
2169
2170         strv_remove(m->lookup_paths.unit_path, *generator);
2171         rm_rf(*generator, false, true, false);
2172
2173         free(*generator);
2174         *generator = NULL;
2175 }
2176
2177 void manager_undo_generators(Manager *m) {
2178         assert(m);
2179
2180         remove_generator_dir(m, &m->generator_unit_path);
2181         remove_generator_dir(m, &m->generator_unit_path_early);
2182         remove_generator_dir(m, &m->generator_unit_path_late);
2183 }
2184
2185 int manager_set_default_controllers(Manager *m, char **controllers) {
2186         char **l;
2187
2188         assert(m);
2189
2190         l = strv_copy(controllers);
2191         if (!l)
2192                 return -ENOMEM;
2193
2194         strv_free(m->default_controllers);
2195         m->default_controllers = l;
2196
2197         cg_shorten_controllers(m->default_controllers);
2198
2199         return 0;
2200 }
2201
2202 int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
2203         int i;
2204
2205         assert(m);
2206
2207         for (i = 0; i < RLIMIT_NLIMITS; i++) {
2208                 if (!default_rlimit[i])
2209                         continue;
2210
2211                 m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
2212                 if (!m->rlimit[i])
2213                         return -ENOMEM;
2214         }
2215
2216         return 0;
2217 }
2218
2219 void manager_recheck_journal(Manager *m) {
2220         Unit *u;
2221
2222         assert(m);
2223
2224         if (m->running_as != MANAGER_SYSTEM)
2225                 return;
2226
2227         u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
2228         if (u && SOCKET(u)->state != SOCKET_RUNNING) {
2229                 log_close_journal();
2230                 return;
2231         }
2232
2233         u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
2234         if (u && SERVICE(u)->state != SERVICE_RUNNING) {
2235                 log_close_journal();
2236                 return;
2237         }
2238
2239         /* Hmm, OK, so the socket is fully up and the service is up
2240          * too, then let's make use of the thing. */
2241         log_open();
2242 }
2243
2244 void manager_set_show_status(Manager *m, bool b) {
2245         assert(m);
2246
2247         if (m->running_as != MANAGER_SYSTEM)
2248                 return;
2249
2250         m->show_status = b;
2251
2252         if (b)
2253                 touch("/run/systemd/show-status");
2254         else
2255                 unlink("/run/systemd/show-status");
2256 }
2257
2258 bool manager_get_show_status(Manager *m) {
2259         assert(m);
2260
2261         if (m->running_as != MANAGER_SYSTEM)
2262                 return false;
2263
2264         if (m->show_status)
2265                 return true;
2266
2267         /* If Plymouth is running make sure we show the status, so
2268          * that there's something nice to see when people press Esc */
2269
2270         return plymouth_running();
2271 }
2272
2273 static const char* const manager_running_as_table[_MANAGER_RUNNING_AS_MAX] = {
2274         [MANAGER_SYSTEM] = "system",
2275         [MANAGER_USER] = "user"
2276 };
2277
2278 DEFINE_STRING_TABLE_LOOKUP(manager_running_as, ManagerRunningAs);