chiark / gitweb /
manager: introduce SwitchRoot bus call for initrd/main transition
[elogind.git] / src / core / manager.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <sys/epoll.h>
26 #include <signal.h>
27 #include <sys/signalfd.h>
28 #include <sys/wait.h>
29 #include <unistd.h>
30 #include <sys/poll.h>
31 #include <sys/reboot.h>
32 #include <sys/ioctl.h>
33 #include <linux/kd.h>
34 #include <termios.h>
35 #include <fcntl.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39
40 #ifdef HAVE_AUDIT
41 #include <libaudit.h>
42 #endif
43
44 #include <systemd/sd-daemon.h>
45
46 #include "manager.h"
47 #include "transaction.h"
48 #include "hashmap.h"
49 #include "macro.h"
50 #include "strv.h"
51 #include "log.h"
52 #include "util.h"
53 #include "mkdir.h"
54 #include "ratelimit.h"
55 #include "cgroup.h"
56 #include "mount-setup.h"
57 #include "unit-name.h"
58 #include "dbus-unit.h"
59 #include "dbus-job.h"
60 #include "missing.h"
61 #include "path-lookup.h"
62 #include "special.h"
63 #include "bus-errors.h"
64 #include "exit-status.h"
65 #include "virt.h"
66 #include "watchdog.h"
67 #include "cgroup-util.h"
68 #include "path-util.h"
69
70 /* As soon as 16 units are in our GC queue, make sure to run a gc sweep */
71 #define GC_QUEUE_ENTRIES_MAX 16
72
73 /* As soon as 5s passed since a unit was added to our GC queue, make sure to run a gc sweep */
74 #define GC_QUEUE_USEC_MAX (10*USEC_PER_SEC)
75
76 /* Where clients shall send notification messages to */
77 #define NOTIFY_SOCKET_SYSTEM "/run/systemd/notify"
78 #define NOTIFY_SOCKET_USER "@/org/freedesktop/systemd1/notify"
79
80 static int manager_setup_notify(Manager *m) {
81         union {
82                 struct sockaddr sa;
83                 struct sockaddr_un un;
84         } sa;
85         struct epoll_event ev;
86         int one = 1, r;
87         mode_t u;
88
89         assert(m);
90
91         m->notify_watch.type = WATCH_NOTIFY;
92         if ((m->notify_watch.fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
93                 log_error("Failed to allocate notification socket: %m");
94                 return -errno;
95         }
96
97         zero(sa);
98         sa.sa.sa_family = AF_UNIX;
99
100         if (getpid() != 1)
101                 snprintf(sa.un.sun_path, sizeof(sa.un.sun_path), NOTIFY_SOCKET_USER "/%llu", random_ull());
102         else {
103                 unlink(NOTIFY_SOCKET_SYSTEM);
104                 strncpy(sa.un.sun_path, NOTIFY_SOCKET_SYSTEM, sizeof(sa.un.sun_path));
105         }
106
107         if (sa.un.sun_path[0] == '@')
108                 sa.un.sun_path[0] = 0;
109
110         u = umask(0111);
111         r = bind(m->notify_watch.fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1));
112         umask(u);
113
114         if (r < 0) {
115                 log_error("bind() failed: %m");
116                 return -errno;
117         }
118
119         if (setsockopt(m->notify_watch.fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0) {
120                 log_error("SO_PASSCRED failed: %m");
121                 return -errno;
122         }
123
124         zero(ev);
125         ev.events = EPOLLIN;
126         ev.data.ptr = &m->notify_watch;
127
128         if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->notify_watch.fd, &ev) < 0)
129                 return -errno;
130
131         if (sa.un.sun_path[0] == 0)
132                 sa.un.sun_path[0] = '@';
133
134         if (!(m->notify_socket = strdup(sa.un.sun_path)))
135                 return -ENOMEM;
136
137         log_debug("Using notification socket %s", m->notify_socket);
138
139         return 0;
140 }
141
142 static int enable_special_signals(Manager *m) {
143         int fd;
144
145         assert(m);
146
147         /* Enable that we get SIGINT on control-alt-del. In containers
148          * this will fail with EPERM, so ignore that. */
149         if (reboot(RB_DISABLE_CAD) < 0 && errno != EPERM)
150                 log_warning("Failed to enable ctrl-alt-del handling: %m");
151
152         fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
153         if (fd < 0) {
154                 /* Support systems without virtual console */
155                 if (fd != -ENOENT)
156                         log_warning("Failed to open /dev/tty0: %m");
157         } else {
158                 /* Enable that we get SIGWINCH on kbrequest */
159                 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
160                         log_warning("Failed to enable kbrequest handling: %s", strerror(errno));
161
162                 close_nointr_nofail(fd);
163         }
164
165         return 0;
166 }
167
168 static int manager_setup_signals(Manager *m) {
169         sigset_t mask;
170         struct epoll_event ev;
171         struct sigaction sa;
172
173         assert(m);
174
175         /* We are not interested in SIGSTOP and friends. */
176         zero(sa);
177         sa.sa_handler = SIG_DFL;
178         sa.sa_flags = SA_NOCLDSTOP|SA_RESTART;
179         assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
180
181         assert_se(sigemptyset(&mask) == 0);
182
183         sigset_add_many(&mask,
184                         SIGCHLD,     /* Child died */
185                         SIGTERM,     /* Reexecute daemon */
186                         SIGHUP,      /* Reload configuration */
187                         SIGUSR1,     /* systemd/upstart: reconnect to D-Bus */
188                         SIGUSR2,     /* systemd: dump status */
189                         SIGINT,      /* Kernel sends us this on control-alt-del */
190                         SIGWINCH,    /* Kernel sends us this on kbrequest (alt-arrowup) */
191                         SIGPWR,      /* Some kernel drivers and upsd send us this on power failure */
192                         SIGRTMIN+0,  /* systemd: start default.target */
193                         SIGRTMIN+1,  /* systemd: isolate rescue.target */
194                         SIGRTMIN+2,  /* systemd: isolate emergency.target */
195                         SIGRTMIN+3,  /* systemd: start halt.target */
196                         SIGRTMIN+4,  /* systemd: start poweroff.target */
197                         SIGRTMIN+5,  /* systemd: start reboot.target */
198                         SIGRTMIN+6,  /* systemd: start kexec.target */
199                         SIGRTMIN+13, /* systemd: Immediate halt */
200                         SIGRTMIN+14, /* systemd: Immediate poweroff */
201                         SIGRTMIN+15, /* systemd: Immediate reboot */
202                         SIGRTMIN+16, /* systemd: Immediate kexec */
203                         SIGRTMIN+20, /* systemd: enable status messages */
204                         SIGRTMIN+21, /* systemd: disable status messages */
205                         SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
206                         SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
207                         SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
208                         SIGRTMIN+27, /* systemd: set log target to console */
209                         SIGRTMIN+28, /* systemd: set log target to kmsg */
210                         SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg */
211                         -1);
212         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
213
214         m->signal_watch.type = WATCH_SIGNAL;
215         if ((m->signal_watch.fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0)
216                 return -errno;
217
218         zero(ev);
219         ev.events = EPOLLIN;
220         ev.data.ptr = &m->signal_watch;
221
222         if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->signal_watch.fd, &ev) < 0)
223                 return -errno;
224
225         if (m->running_as == MANAGER_SYSTEM)
226                 return enable_special_signals(m);
227
228         return 0;
229 }
230
231 static void manager_strip_environment(Manager *m) {
232         assert(m);
233
234         /* Remove variables from the inherited set that are part of
235          * the container interface:
236          * http://www.freedesktop.org/wiki/Software/systemd/ContainerInterface */
237         strv_remove_prefix(m->environment, "container=");
238         strv_remove_prefix(m->environment, "container_");
239
240         /* Remove variables from the inherited set that are part of
241          * the initrd interface:
242          * http://www.freedesktop.org/wiki/Software/systemd/InitrdInterface */
243         strv_remove_prefix(m->environment, "RD_");
244 }
245
246 int manager_new(ManagerRunningAs running_as, Manager **_m) {
247         Manager *m;
248         int r = -ENOMEM;
249
250         assert(_m);
251         assert(running_as >= 0);
252         assert(running_as < _MANAGER_RUNNING_AS_MAX);
253
254         if (!(m = new0(Manager, 1)))
255                 return -ENOMEM;
256
257         dual_timestamp_get(&m->startup_timestamp);
258
259         m->running_as = running_as;
260         m->name_data_slot = m->conn_data_slot = m->subscribed_data_slot = -1;
261         m->exit_code = _MANAGER_EXIT_CODE_INVALID;
262         m->pin_cgroupfs_fd = -1;
263         m->idle_pipe[0] = m->idle_pipe[1] = -1;
264
265 #ifdef HAVE_AUDIT
266         m->audit_fd = -1;
267 #endif
268
269         m->signal_watch.fd = m->mount_watch.fd = m->udev_watch.fd = m->epoll_fd = m->dev_autofs_fd = m->swap_watch.fd = -1;
270         m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
271
272         m->environment = strv_copy(environ);
273         if (!m->environment)
274                 goto fail;
275
276         manager_strip_environment(m);
277
278         if (running_as == MANAGER_SYSTEM) {
279                 m->default_controllers = strv_new("cpu", NULL);
280                 if (!m->default_controllers)
281                         goto fail;
282         }
283
284         if (!(m->units = hashmap_new(string_hash_func, string_compare_func)))
285                 goto fail;
286
287         if (!(m->jobs = hashmap_new(trivial_hash_func, trivial_compare_func)))
288                 goto fail;
289
290         if (!(m->watch_pids = hashmap_new(trivial_hash_func, trivial_compare_func)))
291                 goto fail;
292
293         if (!(m->cgroup_bondings = hashmap_new(string_hash_func, string_compare_func)))
294                 goto fail;
295
296         if (!(m->watch_bus = hashmap_new(string_hash_func, string_compare_func)))
297                 goto fail;
298
299         if ((m->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0)
300                 goto fail;
301
302         if ((r = lookup_paths_init(&m->lookup_paths, m->running_as, true)) < 0)
303                 goto fail;
304
305         if ((r = manager_setup_signals(m)) < 0)
306                 goto fail;
307
308         if ((r = manager_setup_cgroup(m)) < 0)
309                 goto fail;
310
311         if ((r = manager_setup_notify(m)) < 0)
312                 goto fail;
313
314         /* Try to connect to the busses, if possible. */
315         if ((r = bus_init(m, running_as != MANAGER_SYSTEM)) < 0)
316                 goto fail;
317
318 #ifdef HAVE_AUDIT
319         if ((m->audit_fd = audit_open()) < 0 &&
320             /* If the kernel lacks netlink or audit support,
321              * don't worry about it. */
322             errno != EAFNOSUPPORT && errno != EPROTONOSUPPORT)
323                 log_error("Failed to connect to audit log: %m");
324 #endif
325
326         m->taint_usr = dir_is_empty("/usr") > 0;
327
328         *_m = m;
329         return 0;
330
331 fail:
332         manager_free(m);
333         return r;
334 }
335
336 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
337         Unit *u;
338         unsigned n = 0;
339
340         assert(m);
341
342         while ((u = m->cleanup_queue)) {
343                 assert(u->in_cleanup_queue);
344
345                 unit_free(u);
346                 n++;
347         }
348
349         return n;
350 }
351
352 enum {
353         GC_OFFSET_IN_PATH,  /* This one is on the path we were traveling */
354         GC_OFFSET_UNSURE,   /* No clue */
355         GC_OFFSET_GOOD,     /* We still need this unit */
356         GC_OFFSET_BAD,      /* We don't need this unit anymore */
357         _GC_OFFSET_MAX
358 };
359
360 static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
361         Iterator i;
362         Unit *other;
363         bool is_bad;
364
365         assert(u);
366
367         if (u->gc_marker == gc_marker + GC_OFFSET_GOOD ||
368             u->gc_marker == gc_marker + GC_OFFSET_BAD ||
369             u->gc_marker == gc_marker + GC_OFFSET_IN_PATH)
370                 return;
371
372         if (u->in_cleanup_queue)
373                 goto bad;
374
375         if (unit_check_gc(u))
376                 goto good;
377
378         u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
379
380         is_bad = true;
381
382         SET_FOREACH(other, u->dependencies[UNIT_REFERENCED_BY], i) {
383                 unit_gc_sweep(other, gc_marker);
384
385                 if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
386                         goto good;
387
388                 if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
389                         is_bad = false;
390         }
391
392         if (is_bad)
393                 goto bad;
394
395         /* We were unable to find anything out about this entry, so
396          * let's investigate it later */
397         u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
398         unit_add_to_gc_queue(u);
399         return;
400
401 bad:
402         /* We definitely know that this one is not useful anymore, so
403          * let's mark it for deletion */
404         u->gc_marker = gc_marker + GC_OFFSET_BAD;
405         unit_add_to_cleanup_queue(u);
406         return;
407
408 good:
409         u->gc_marker = gc_marker + GC_OFFSET_GOOD;
410 }
411
412 static unsigned manager_dispatch_gc_queue(Manager *m) {
413         Unit *u;
414         unsigned n = 0;
415         unsigned gc_marker;
416
417         assert(m);
418
419         if ((m->n_in_gc_queue < GC_QUEUE_ENTRIES_MAX) &&
420             (m->gc_queue_timestamp <= 0 ||
421              (m->gc_queue_timestamp + GC_QUEUE_USEC_MAX) > now(CLOCK_MONOTONIC)))
422                 return 0;
423
424         log_debug("Running GC...");
425
426         m->gc_marker += _GC_OFFSET_MAX;
427         if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
428                 m->gc_marker = 1;
429
430         gc_marker = m->gc_marker;
431
432         while ((u = m->gc_queue)) {
433                 assert(u->in_gc_queue);
434
435                 unit_gc_sweep(u, gc_marker);
436
437                 LIST_REMOVE(Unit, gc_queue, m->gc_queue, u);
438                 u->in_gc_queue = false;
439
440                 n++;
441
442                 if (u->gc_marker == gc_marker + GC_OFFSET_BAD ||
443                     u->gc_marker == gc_marker + GC_OFFSET_UNSURE) {
444                         log_debug("Collecting %s", u->id);
445                         u->gc_marker = gc_marker + GC_OFFSET_BAD;
446                         unit_add_to_cleanup_queue(u);
447                 }
448         }
449
450         m->n_in_gc_queue = 0;
451         m->gc_queue_timestamp = 0;
452
453         return n;
454 }
455
456 static void manager_clear_jobs_and_units(Manager *m) {
457         Unit *u;
458
459         assert(m);
460
461         while ((u = hashmap_first(m->units)))
462                 unit_free(u);
463
464         manager_dispatch_cleanup_queue(m);
465
466         assert(!m->load_queue);
467         assert(!m->run_queue);
468         assert(!m->dbus_unit_queue);
469         assert(!m->dbus_job_queue);
470         assert(!m->cleanup_queue);
471         assert(!m->gc_queue);
472
473         assert(hashmap_isempty(m->jobs));
474         assert(hashmap_isempty(m->units));
475 }
476
477 void manager_free(Manager *m) {
478         UnitType c;
479
480         assert(m);
481
482         manager_clear_jobs_and_units(m);
483
484         for (c = 0; c < _UNIT_TYPE_MAX; c++)
485                 if (unit_vtable[c]->shutdown)
486                         unit_vtable[c]->shutdown(m);
487
488         /* If we reexecute ourselves, we keep the root cgroup
489          * around */
490         manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
491
492         manager_undo_generators(m);
493
494         bus_done(m);
495
496         hashmap_free(m->units);
497         hashmap_free(m->jobs);
498         hashmap_free(m->watch_pids);
499         hashmap_free(m->watch_bus);
500
501         if (m->epoll_fd >= 0)
502                 close_nointr_nofail(m->epoll_fd);
503         if (m->signal_watch.fd >= 0)
504                 close_nointr_nofail(m->signal_watch.fd);
505         if (m->notify_watch.fd >= 0)
506                 close_nointr_nofail(m->notify_watch.fd);
507
508 #ifdef HAVE_AUDIT
509         if (m->audit_fd >= 0)
510                 audit_close(m->audit_fd);
511 #endif
512
513         free(m->notify_socket);
514
515         lookup_paths_free(&m->lookup_paths);
516         strv_free(m->environment);
517
518         strv_free(m->default_controllers);
519
520         hashmap_free(m->cgroup_bondings);
521         set_free_free(m->unit_path_cache);
522
523         close_pipe(m->idle_pipe);
524
525         free(m->switch_root);
526         free(m->switch_root_init);
527
528         free(m);
529 }
530
531 int manager_enumerate(Manager *m) {
532         int r = 0, q;
533         UnitType c;
534
535         assert(m);
536
537         /* Let's ask every type to load all units from disk/kernel
538          * that it might know */
539         for (c = 0; c < _UNIT_TYPE_MAX; c++)
540                 if (unit_vtable[c]->enumerate)
541                         if ((q = unit_vtable[c]->enumerate(m)) < 0)
542                                 r = q;
543
544         manager_dispatch_load_queue(m);
545         return r;
546 }
547
548 int manager_coldplug(Manager *m) {
549         int r = 0, q;
550         Iterator i;
551         Unit *u;
552         char *k;
553
554         assert(m);
555
556         /* Then, let's set up their initial state. */
557         HASHMAP_FOREACH_KEY(u, k, m->units, i) {
558
559                 /* ignore aliases */
560                 if (u->id != k)
561                         continue;
562
563                 if ((q = unit_coldplug(u)) < 0)
564                         r = q;
565         }
566
567         return r;
568 }
569
570 static void manager_build_unit_path_cache(Manager *m) {
571         char **i;
572         DIR *d = NULL;
573         int r;
574
575         assert(m);
576
577         set_free_free(m->unit_path_cache);
578
579         if (!(m->unit_path_cache = set_new(string_hash_func, string_compare_func))) {
580                 log_error("Failed to allocate unit path cache.");
581                 return;
582         }
583
584         /* This simply builds a list of files we know exist, so that
585          * we don't always have to go to disk */
586
587         STRV_FOREACH(i, m->lookup_paths.unit_path) {
588                 struct dirent *de;
589
590                 if (!(d = opendir(*i))) {
591                         log_error("Failed to open directory: %m");
592                         continue;
593                 }
594
595                 while ((de = readdir(d))) {
596                         char *p;
597
598                         if (ignore_file(de->d_name))
599                                 continue;
600
601                         p = join(streq(*i, "/") ? "" : *i, "/", de->d_name, NULL);
602                         if (!p) {
603                                 r = -ENOMEM;
604                                 goto fail;
605                         }
606
607                         if ((r = set_put(m->unit_path_cache, p)) < 0) {
608                                 free(p);
609                                 goto fail;
610                         }
611                 }
612
613                 closedir(d);
614                 d = NULL;
615         }
616
617         return;
618
619 fail:
620         log_error("Failed to build unit path cache: %s", strerror(-r));
621
622         set_free_free(m->unit_path_cache);
623         m->unit_path_cache = NULL;
624
625         if (d)
626                 closedir(d);
627 }
628
629 int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
630         int r, q;
631
632         assert(m);
633
634         manager_run_generators(m);
635
636         manager_build_unit_path_cache(m);
637
638         /* If we will deserialize make sure that during enumeration
639          * this is already known, so we increase the counter here
640          * already */
641         if (serialization)
642                 m->n_reloading ++;
643
644         /* First, enumerate what we can from all config files */
645         r = manager_enumerate(m);
646
647         /* Second, deserialize if there is something to deserialize */
648         if (serialization)
649                 if ((q = manager_deserialize(m, serialization, fds)) < 0)
650                         r = q;
651
652         /* Third, fire things up! */
653         if ((q = manager_coldplug(m)) < 0)
654                 r = q;
655
656         if (serialization) {
657                 assert(m->n_reloading > 0);
658                 m->n_reloading --;
659         }
660
661         return r;
662 }
663
664 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, bool override, DBusError *e, Job **_ret) {
665         int r;
666         Transaction *tr;
667
668         assert(m);
669         assert(type < _JOB_TYPE_MAX);
670         assert(unit);
671         assert(mode < _JOB_MODE_MAX);
672
673         if (mode == JOB_ISOLATE && type != JOB_START) {
674                 dbus_set_error(e, BUS_ERROR_INVALID_JOB_MODE, "Isolate is only valid for start.");
675                 return -EINVAL;
676         }
677
678         if (mode == JOB_ISOLATE && !unit->allow_isolate) {
679                 dbus_set_error(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
680                 return -EPERM;
681         }
682
683         log_debug("Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
684
685         job_type_collapse(&type, unit);
686
687         tr = transaction_new();
688         if (!tr)
689                 return -ENOMEM;
690
691         r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, override, false,
692                                                  mode == JOB_IGNORE_DEPENDENCIES || mode == JOB_IGNORE_REQUIREMENTS,
693                                                  mode == JOB_IGNORE_DEPENDENCIES, e);
694         if (r < 0)
695                 goto tr_abort;
696
697         if (mode == JOB_ISOLATE) {
698                 r = transaction_add_isolate_jobs(tr, m);
699                 if (r < 0)
700                         goto tr_abort;
701         }
702
703         r = transaction_activate(tr, m, mode, e);
704         if (r < 0)
705                 goto tr_abort;
706
707         log_debug("Enqueued job %s/%s as %u", unit->id, job_type_to_string(type), (unsigned) tr->anchor_job->id);
708
709         if (_ret)
710                 *_ret = tr->anchor_job;
711
712         transaction_free(tr);
713         return 0;
714
715 tr_abort:
716         transaction_abort(tr);
717         transaction_free(tr);
718         return r;
719 }
720
721 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, bool override, DBusError *e, Job **_ret) {
722         Unit *unit;
723         int r;
724
725         assert(m);
726         assert(type < _JOB_TYPE_MAX);
727         assert(name);
728         assert(mode < _JOB_MODE_MAX);
729
730         if ((r = manager_load_unit(m, name, NULL, NULL, &unit)) < 0)
731                 return r;
732
733         return manager_add_job(m, type, unit, mode, override, e, _ret);
734 }
735
736 Job *manager_get_job(Manager *m, uint32_t id) {
737         assert(m);
738
739         return hashmap_get(m->jobs, UINT32_TO_PTR(id));
740 }
741
742 Unit *manager_get_unit(Manager *m, const char *name) {
743         assert(m);
744         assert(name);
745
746         return hashmap_get(m->units, name);
747 }
748
749 unsigned manager_dispatch_load_queue(Manager *m) {
750         Unit *u;
751         unsigned n = 0;
752
753         assert(m);
754
755         /* Make sure we are not run recursively */
756         if (m->dispatching_load_queue)
757                 return 0;
758
759         m->dispatching_load_queue = true;
760
761         /* Dispatches the load queue. Takes a unit from the queue and
762          * tries to load its data until the queue is empty */
763
764         while ((u = m->load_queue)) {
765                 assert(u->in_load_queue);
766
767                 unit_load(u);
768                 n++;
769         }
770
771         m->dispatching_load_queue = false;
772         return n;
773 }
774
775 int manager_load_unit_prepare(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
776         Unit *ret;
777         UnitType t;
778         int r;
779
780         assert(m);
781         assert(name || path);
782
783         /* This will prepare the unit for loading, but not actually
784          * load anything from disk. */
785
786         if (path && !is_path(path)) {
787                 dbus_set_error(e, BUS_ERROR_INVALID_PATH, "Path %s is not absolute.", path);
788                 return -EINVAL;
789         }
790
791         if (!name)
792                 name = path_get_file_name(path);
793
794         t = unit_name_to_type(name);
795
796         if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid_no_type(name, false)) {
797                 dbus_set_error(e, BUS_ERROR_INVALID_NAME, "Unit name %s is not valid.", name);
798                 return -EINVAL;
799         }
800
801         ret = manager_get_unit(m, name);
802         if (ret) {
803                 *_ret = ret;
804                 return 1;
805         }
806
807         ret = unit_new(m, unit_vtable[t]->object_size);
808         if (!ret)
809                 return -ENOMEM;
810
811         if (path) {
812                 ret->fragment_path = strdup(path);
813                 if (!ret->fragment_path) {
814                         unit_free(ret);
815                         return -ENOMEM;
816                 }
817         }
818
819         if ((r = unit_add_name(ret, name)) < 0) {
820                 unit_free(ret);
821                 return r;
822         }
823
824         unit_add_to_load_queue(ret);
825         unit_add_to_dbus_queue(ret);
826         unit_add_to_gc_queue(ret);
827
828         if (_ret)
829                 *_ret = ret;
830
831         return 0;
832 }
833
834 int manager_load_unit(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
835         int r;
836
837         assert(m);
838
839         /* This will load the service information files, but not actually
840          * start any services or anything. */
841
842         if ((r = manager_load_unit_prepare(m, name, path, e, _ret)) != 0)
843                 return r;
844
845         manager_dispatch_load_queue(m);
846
847         if (_ret)
848                 *_ret = unit_follow_merge(*_ret);
849
850         return 0;
851 }
852
853 void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
854         Iterator i;
855         Job *j;
856
857         assert(s);
858         assert(f);
859
860         HASHMAP_FOREACH(j, s->jobs, i)
861                 job_dump(j, f, prefix);
862 }
863
864 void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
865         Iterator i;
866         Unit *u;
867         const char *t;
868
869         assert(s);
870         assert(f);
871
872         HASHMAP_FOREACH_KEY(u, t, s->units, i)
873                 if (u->id == t)
874                         unit_dump(u, f, prefix);
875 }
876
877 void manager_clear_jobs(Manager *m) {
878         Job *j;
879
880         assert(m);
881
882         while ((j = hashmap_first(m->jobs)))
883                 /* No need to recurse. We're cancelling all jobs. */
884                 job_finish_and_invalidate(j, JOB_CANCELED, false);
885 }
886
887 unsigned manager_dispatch_run_queue(Manager *m) {
888         Job *j;
889         unsigned n = 0;
890
891         if (m->dispatching_run_queue)
892                 return 0;
893
894         m->dispatching_run_queue = true;
895
896         while ((j = m->run_queue)) {
897                 assert(j->installed);
898                 assert(j->in_run_queue);
899
900                 job_run_and_invalidate(j);
901                 n++;
902         }
903
904         m->dispatching_run_queue = false;
905         return n;
906 }
907
908 unsigned manager_dispatch_dbus_queue(Manager *m) {
909         Job *j;
910         Unit *u;
911         unsigned n = 0;
912
913         assert(m);
914
915         if (m->dispatching_dbus_queue)
916                 return 0;
917
918         m->dispatching_dbus_queue = true;
919
920         while ((u = m->dbus_unit_queue)) {
921                 assert(u->in_dbus_queue);
922
923                 bus_unit_send_change_signal(u);
924                 n++;
925         }
926
927         while ((j = m->dbus_job_queue)) {
928                 assert(j->in_dbus_queue);
929
930                 bus_job_send_change_signal(j);
931                 n++;
932         }
933
934         m->dispatching_dbus_queue = false;
935         return n;
936 }
937
938 static int manager_process_notify_fd(Manager *m) {
939         ssize_t n;
940
941         assert(m);
942
943         for (;;) {
944                 char buf[4096];
945                 struct msghdr msghdr;
946                 struct iovec iovec;
947                 struct ucred *ucred;
948                 union {
949                         struct cmsghdr cmsghdr;
950                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
951                 } control;
952                 Unit *u;
953                 char **tags;
954
955                 zero(iovec);
956                 iovec.iov_base = buf;
957                 iovec.iov_len = sizeof(buf)-1;
958
959                 zero(control);
960                 zero(msghdr);
961                 msghdr.msg_iov = &iovec;
962                 msghdr.msg_iovlen = 1;
963                 msghdr.msg_control = &control;
964                 msghdr.msg_controllen = sizeof(control);
965
966                 if ((n = recvmsg(m->notify_watch.fd, &msghdr, MSG_DONTWAIT)) <= 0) {
967                         if (n >= 0)
968                                 return -EIO;
969
970                         if (errno == EAGAIN || errno == EINTR)
971                                 break;
972
973                         return -errno;
974                 }
975
976                 if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
977                     control.cmsghdr.cmsg_level != SOL_SOCKET ||
978                     control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
979                     control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
980                         log_warning("Received notify message without credentials. Ignoring.");
981                         continue;
982                 }
983
984                 ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
985
986                 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(ucred->pid))))
987                         if (!(u = cgroup_unit_by_pid(m, ucred->pid))) {
988                                 log_warning("Cannot find unit for notify message of PID %lu.", (unsigned long) ucred->pid);
989                                 continue;
990                         }
991
992                 assert((size_t) n < sizeof(buf));
993                 buf[n] = 0;
994                 if (!(tags = strv_split(buf, "\n\r")))
995                         return -ENOMEM;
996
997                 log_debug("Got notification message for unit %s", u->id);
998
999                 if (UNIT_VTABLE(u)->notify_message)
1000                         UNIT_VTABLE(u)->notify_message(u, ucred->pid, tags);
1001
1002                 strv_free(tags);
1003         }
1004
1005         return 0;
1006 }
1007
1008 static int manager_dispatch_sigchld(Manager *m) {
1009         assert(m);
1010
1011         for (;;) {
1012                 siginfo_t si;
1013                 Unit *u;
1014                 int r;
1015
1016                 zero(si);
1017
1018                 /* First we call waitd() for a PID and do not reap the
1019                  * zombie. That way we can still access /proc/$PID for
1020                  * it while it is a zombie. */
1021                 if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
1022
1023                         if (errno == ECHILD)
1024                                 break;
1025
1026                         if (errno == EINTR)
1027                                 continue;
1028
1029                         return -errno;
1030                 }
1031
1032                 if (si.si_pid <= 0)
1033                         break;
1034
1035                 if (si.si_code == CLD_EXITED || si.si_code == CLD_KILLED || si.si_code == CLD_DUMPED) {
1036                         char *name = NULL;
1037
1038                         get_process_comm(si.si_pid, &name);
1039                         log_debug("Got SIGCHLD for process %lu (%s)", (unsigned long) si.si_pid, strna(name));
1040                         free(name);
1041                 }
1042
1043                 /* Let's flush any message the dying child might still
1044                  * have queued for us. This ensures that the process
1045                  * still exists in /proc so that we can figure out
1046                  * which cgroup and hence unit it belongs to. */
1047                 if ((r = manager_process_notify_fd(m)) < 0)
1048                         return r;
1049
1050                 /* And now figure out the unit this belongs to */
1051                 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(si.si_pid))))
1052                         u = cgroup_unit_by_pid(m, si.si_pid);
1053
1054                 /* And now, we actually reap the zombie. */
1055                 if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
1056                         if (errno == EINTR)
1057                                 continue;
1058
1059                         return -errno;
1060                 }
1061
1062                 if (si.si_code != CLD_EXITED && si.si_code != CLD_KILLED && si.si_code != CLD_DUMPED)
1063                         continue;
1064
1065                 log_debug("Child %lu died (code=%s, status=%i/%s)",
1066                           (long unsigned) si.si_pid,
1067                           sigchld_code_to_string(si.si_code),
1068                           si.si_status,
1069                           strna(si.si_code == CLD_EXITED
1070                                 ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
1071                                 : signal_to_string(si.si_status)));
1072
1073                 if (!u)
1074                         continue;
1075
1076                 log_debug("Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
1077
1078                 hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
1079                 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
1080         }
1081
1082         return 0;
1083 }
1084
1085 static int manager_start_target(Manager *m, const char *name, JobMode mode) {
1086         int r;
1087         DBusError error;
1088
1089         dbus_error_init(&error);
1090
1091         log_debug("Activating special unit %s", name);
1092
1093         if ((r = manager_add_job_by_name(m, JOB_START, name, mode, true, &error, NULL)) < 0)
1094                 log_error("Failed to enqueue %s job: %s", name, bus_error(&error, r));
1095
1096         dbus_error_free(&error);
1097
1098         return r;
1099 }
1100
1101 static int manager_process_signal_fd(Manager *m) {
1102         ssize_t n;
1103         struct signalfd_siginfo sfsi;
1104         bool sigchld = false;
1105
1106         assert(m);
1107
1108         for (;;) {
1109                 if ((n = read(m->signal_watch.fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) {
1110
1111                         if (n >= 0)
1112                                 return -EIO;
1113
1114                         if (errno == EINTR || errno == EAGAIN)
1115                                 break;
1116
1117                         return -errno;
1118                 }
1119
1120                 if (sfsi.ssi_pid > 0) {
1121                         char *p = NULL;
1122
1123                         get_process_comm(sfsi.ssi_pid, &p);
1124
1125                         log_debug("Received SIG%s from PID %lu (%s).",
1126                                   signal_to_string(sfsi.ssi_signo),
1127                                   (unsigned long) sfsi.ssi_pid, strna(p));
1128                         free(p);
1129                 } else
1130                         log_debug("Received SIG%s.", signal_to_string(sfsi.ssi_signo));
1131
1132                 switch (sfsi.ssi_signo) {
1133
1134                 case SIGCHLD:
1135                         sigchld = true;
1136                         break;
1137
1138                 case SIGTERM:
1139                         if (m->running_as == MANAGER_SYSTEM) {
1140                                 /* This is for compatibility with the
1141                                  * original sysvinit */
1142                                 m->exit_code = MANAGER_REEXECUTE;
1143                                 break;
1144                         }
1145
1146                         /* Fall through */
1147
1148                 case SIGINT:
1149                         if (m->running_as == MANAGER_SYSTEM) {
1150                                 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE);
1151                                 break;
1152                         }
1153
1154                         /* Run the exit target if there is one, if not, just exit. */
1155                         if (manager_start_target(m, SPECIAL_EXIT_TARGET, JOB_REPLACE) < 0) {
1156                                 m->exit_code = MANAGER_EXIT;
1157                                 return 0;
1158                         }
1159
1160                         break;
1161
1162                 case SIGWINCH:
1163                         if (m->running_as == MANAGER_SYSTEM)
1164                                 manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
1165
1166                         /* This is a nop on non-init */
1167                         break;
1168
1169                 case SIGPWR:
1170                         if (m->running_as == MANAGER_SYSTEM)
1171                                 manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
1172
1173                         /* This is a nop on non-init */
1174                         break;
1175
1176                 case SIGUSR1: {
1177                         Unit *u;
1178
1179                         u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
1180
1181                         if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
1182                                 log_info("Trying to reconnect to bus...");
1183                                 bus_init(m, true);
1184                         }
1185
1186                         if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) {
1187                                 log_info("Loading D-Bus service...");
1188                                 manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
1189                         }
1190
1191                         break;
1192                 }
1193
1194                 case SIGUSR2: {
1195                         FILE *f;
1196                         char *dump = NULL;
1197                         size_t size;
1198
1199                         if (!(f = open_memstream(&dump, &size))) {
1200                                 log_warning("Failed to allocate memory stream.");
1201                                 break;
1202                         }
1203
1204                         manager_dump_units(m, f, "\t");
1205                         manager_dump_jobs(m, f, "\t");
1206
1207                         if (ferror(f)) {
1208                                 fclose(f);
1209                                 free(dump);
1210                                 log_warning("Failed to write status stream");
1211                                 break;
1212                         }
1213
1214                         fclose(f);
1215                         log_dump(LOG_INFO, dump);
1216                         free(dump);
1217
1218                         break;
1219                 }
1220
1221                 case SIGHUP:
1222                         m->exit_code = MANAGER_RELOAD;
1223                         break;
1224
1225                 default: {
1226
1227                         /* Starting SIGRTMIN+0 */
1228                         static const char * const target_table[] = {
1229                                 [0] = SPECIAL_DEFAULT_TARGET,
1230                                 [1] = SPECIAL_RESCUE_TARGET,
1231                                 [2] = SPECIAL_EMERGENCY_TARGET,
1232                                 [3] = SPECIAL_HALT_TARGET,
1233                                 [4] = SPECIAL_POWEROFF_TARGET,
1234                                 [5] = SPECIAL_REBOOT_TARGET,
1235                                 [6] = SPECIAL_KEXEC_TARGET
1236                         };
1237
1238                         /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
1239                         static const ManagerExitCode code_table[] = {
1240                                 [0] = MANAGER_HALT,
1241                                 [1] = MANAGER_POWEROFF,
1242                                 [2] = MANAGER_REBOOT,
1243                                 [3] = MANAGER_KEXEC
1244                         };
1245
1246                         if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
1247                             (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
1248                                 int idx = (int) sfsi.ssi_signo - SIGRTMIN;
1249                                 manager_start_target(m, target_table[idx],
1250                                                      (idx == 1 || idx == 2) ? JOB_ISOLATE : JOB_REPLACE);
1251                                 break;
1252                         }
1253
1254                         if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
1255                             (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
1256                                 m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
1257                                 break;
1258                         }
1259
1260                         switch (sfsi.ssi_signo - SIGRTMIN) {
1261
1262                         case 20:
1263                                 log_debug("Enabling showing of status.");
1264                                 manager_set_show_status(m, true);
1265                                 break;
1266
1267                         case 21:
1268                                 log_debug("Disabling showing of status.");
1269                                 manager_set_show_status(m, false);
1270                                 break;
1271
1272                         case 22:
1273                                 log_set_max_level(LOG_DEBUG);
1274                                 log_notice("Setting log level to debug.");
1275                                 break;
1276
1277                         case 23:
1278                                 log_set_max_level(LOG_INFO);
1279                                 log_notice("Setting log level to info.");
1280                                 break;
1281
1282                         case 26:
1283                                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1284                                 log_notice("Setting log target to journal-or-kmsg.");
1285                                 break;
1286
1287                         case 27:
1288                                 log_set_target(LOG_TARGET_CONSOLE);
1289                                 log_notice("Setting log target to console.");
1290                                 break;
1291
1292                         case 28:
1293                                 log_set_target(LOG_TARGET_KMSG);
1294                                 log_notice("Setting log target to kmsg.");
1295                                 break;
1296
1297                         case 29:
1298                                 log_set_target(LOG_TARGET_SYSLOG_OR_KMSG);
1299                                 log_notice("Setting log target to syslog-or-kmsg.");
1300                                 break;
1301
1302                         default:
1303                                 log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
1304                         }
1305                 }
1306                 }
1307         }
1308
1309         if (sigchld)
1310                 return manager_dispatch_sigchld(m);
1311
1312         return 0;
1313 }
1314
1315 static int process_event(Manager *m, struct epoll_event *ev) {
1316         int r;
1317         Watch *w;
1318
1319         assert(m);
1320         assert(ev);
1321
1322         assert_se(w = ev->data.ptr);
1323
1324         if (w->type == WATCH_INVALID)
1325                 return 0;
1326
1327         switch (w->type) {
1328
1329         case WATCH_SIGNAL:
1330
1331                 /* An incoming signal? */
1332                 if (ev->events != EPOLLIN)
1333                         return -EINVAL;
1334
1335                 if ((r = manager_process_signal_fd(m)) < 0)
1336                         return r;
1337
1338                 break;
1339
1340         case WATCH_NOTIFY:
1341
1342                 /* An incoming daemon notification event? */
1343                 if (ev->events != EPOLLIN)
1344                         return -EINVAL;
1345
1346                 if ((r = manager_process_notify_fd(m)) < 0)
1347                         return r;
1348
1349                 break;
1350
1351         case WATCH_FD:
1352
1353                 /* Some fd event, to be dispatched to the units */
1354                 UNIT_VTABLE(w->data.unit)->fd_event(w->data.unit, w->fd, ev->events, w);
1355                 break;
1356
1357         case WATCH_UNIT_TIMER:
1358         case WATCH_JOB_TIMER: {
1359                 uint64_t v;
1360                 ssize_t k;
1361
1362                 /* Some timer event, to be dispatched to the units */
1363                 if ((k = read(w->fd, &v, sizeof(v))) != sizeof(v)) {
1364
1365                         if (k < 0 && (errno == EINTR || errno == EAGAIN))
1366                                 break;
1367
1368                         return k < 0 ? -errno : -EIO;
1369                 }
1370
1371                 if (w->type == WATCH_UNIT_TIMER)
1372                         UNIT_VTABLE(w->data.unit)->timer_event(w->data.unit, v, w);
1373                 else
1374                         job_timer_event(w->data.job, v, w);
1375                 break;
1376         }
1377
1378         case WATCH_MOUNT:
1379                 /* Some mount table change, intended for the mount subsystem */
1380                 mount_fd_event(m, ev->events);
1381                 break;
1382
1383         case WATCH_SWAP:
1384                 /* Some swap table change, intended for the swap subsystem */
1385                 swap_fd_event(m, ev->events);
1386                 break;
1387
1388         case WATCH_UDEV:
1389                 /* Some notification from udev, intended for the device subsystem */
1390                 device_fd_event(m, ev->events);
1391                 break;
1392
1393         case WATCH_DBUS_WATCH:
1394                 bus_watch_event(m, w, ev->events);
1395                 break;
1396
1397         case WATCH_DBUS_TIMEOUT:
1398                 bus_timeout_event(m, w, ev->events);
1399                 break;
1400
1401         default:
1402                 log_error("event type=%i", w->type);
1403                 assert_not_reached("Unknown epoll event type.");
1404         }
1405
1406         return 0;
1407 }
1408
1409 int manager_loop(Manager *m) {
1410         int r;
1411
1412         RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
1413
1414         assert(m);
1415         m->exit_code = MANAGER_RUNNING;
1416
1417         /* Release the path cache */
1418         set_free_free(m->unit_path_cache);
1419         m->unit_path_cache = NULL;
1420
1421         manager_check_finished(m);
1422
1423         /* There might still be some zombies hanging around from
1424          * before we were exec()'ed. Leat's reap them */
1425         r = manager_dispatch_sigchld(m);
1426         if (r < 0)
1427                 return r;
1428
1429         while (m->exit_code == MANAGER_RUNNING) {
1430                 struct epoll_event event;
1431                 int n;
1432                 int wait_msec = -1;
1433
1434                 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM)
1435                         watchdog_ping();
1436
1437                 if (!ratelimit_test(&rl)) {
1438                         /* Yay, something is going seriously wrong, pause a little */
1439                         log_warning("Looping too fast. Throttling execution a little.");
1440                         sleep(1);
1441                         continue;
1442                 }
1443
1444                 if (manager_dispatch_load_queue(m) > 0)
1445                         continue;
1446
1447                 if (manager_dispatch_run_queue(m) > 0)
1448                         continue;
1449
1450                 if (bus_dispatch(m) > 0)
1451                         continue;
1452
1453                 if (manager_dispatch_cleanup_queue(m) > 0)
1454                         continue;
1455
1456                 if (manager_dispatch_gc_queue(m) > 0)
1457                         continue;
1458
1459                 if (manager_dispatch_dbus_queue(m) > 0)
1460                         continue;
1461
1462                 if (swap_dispatch_reload(m) > 0)
1463                         continue;
1464
1465                 /* Sleep for half the watchdog time */
1466                 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM) {
1467                         wait_msec = (int) (m->runtime_watchdog / 2 / USEC_PER_MSEC);
1468                         if (wait_msec <= 0)
1469                                 wait_msec = 1;
1470                 } else
1471                         wait_msec = -1;
1472
1473                 n = epoll_wait(m->epoll_fd, &event, 1, wait_msec);
1474                 if (n < 0) {
1475
1476                         if (errno == EINTR)
1477                                 continue;
1478
1479                         return -errno;
1480                 } else if (n == 0)
1481                         continue;
1482
1483                 assert(n == 1);
1484
1485                 r = process_event(m, &event);
1486                 if (r < 0)
1487                         return r;
1488         }
1489
1490         return m->exit_code;
1491 }
1492
1493 int manager_get_unit_from_dbus_path(Manager *m, const char *s, Unit **_u) {
1494         char *n;
1495         Unit *u;
1496
1497         assert(m);
1498         assert(s);
1499         assert(_u);
1500
1501         if (!startswith(s, "/org/freedesktop/systemd1/unit/"))
1502                 return -EINVAL;
1503
1504         if (!(n = bus_path_unescape(s+31)))
1505                 return -ENOMEM;
1506
1507         u = manager_get_unit(m, n);
1508         free(n);
1509
1510         if (!u)
1511                 return -ENOENT;
1512
1513         *_u = u;
1514
1515         return 0;
1516 }
1517
1518 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
1519         Job *j;
1520         unsigned id;
1521         int r;
1522
1523         assert(m);
1524         assert(s);
1525         assert(_j);
1526
1527         if (!startswith(s, "/org/freedesktop/systemd1/job/"))
1528                 return -EINVAL;
1529
1530         if ((r = safe_atou(s + 30, &id)) < 0)
1531                 return r;
1532
1533         if (!(j = manager_get_job(m, id)))
1534                 return -ENOENT;
1535
1536         *_j = j;
1537
1538         return 0;
1539 }
1540
1541 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
1542
1543 #ifdef HAVE_AUDIT
1544         char *p;
1545
1546         if (m->audit_fd < 0)
1547                 return;
1548
1549         /* Don't generate audit events if the service was already
1550          * started and we're just deserializing */
1551         if (m->n_reloading > 0)
1552                 return;
1553
1554         if (m->running_as != MANAGER_SYSTEM)
1555                 return;
1556
1557         if (u->type != UNIT_SERVICE)
1558                 return;
1559
1560         if (!(p = unit_name_to_prefix_and_instance(u->id))) {
1561                 log_error("Failed to allocate unit name for audit message: %s", strerror(ENOMEM));
1562                 return;
1563         }
1564
1565         if (audit_log_user_comm_message(m->audit_fd, type, "", p, NULL, NULL, NULL, success) < 0) {
1566                 if (errno == EPERM) {
1567                         /* We aren't allowed to send audit messages?
1568                          * Then let's not retry again. */
1569                         audit_close(m->audit_fd);
1570                         m->audit_fd = -1;
1571                 } else
1572                         log_warning("Failed to send audit message: %m");
1573         }
1574
1575         free(p);
1576 #endif
1577
1578 }
1579
1580 void manager_send_unit_plymouth(Manager *m, Unit *u) {
1581         int fd = -1;
1582         union sockaddr_union sa;
1583         int n = 0;
1584         char *message = NULL;
1585
1586         /* Don't generate plymouth events if the service was already
1587          * started and we're just deserializing */
1588         if (m->n_reloading > 0)
1589                 return;
1590
1591         if (m->running_as != MANAGER_SYSTEM)
1592                 return;
1593
1594         if (u->type != UNIT_SERVICE &&
1595             u->type != UNIT_MOUNT &&
1596             u->type != UNIT_SWAP)
1597                 return;
1598
1599         /* We set SOCK_NONBLOCK here so that we rather drop the
1600          * message then wait for plymouth */
1601         if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
1602                 log_error("socket() failed: %m");
1603                 return;
1604         }
1605
1606         zero(sa);
1607         sa.sa.sa_family = AF_UNIX;
1608         strncpy(sa.un.sun_path+1, "/org/freedesktop/plymouthd", sizeof(sa.un.sun_path)-1);
1609         if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1)) < 0) {
1610
1611                 if (errno != EPIPE &&
1612                     errno != EAGAIN &&
1613                     errno != ENOENT &&
1614                     errno != ECONNREFUSED &&
1615                     errno != ECONNRESET &&
1616                     errno != ECONNABORTED)
1617                         log_error("connect() failed: %m");
1618
1619                 goto finish;
1620         }
1621
1622         if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
1623                 log_error("Out of memory");
1624                 goto finish;
1625         }
1626
1627         errno = 0;
1628         if (write(fd, message, n + 1) != n + 1) {
1629
1630                 if (errno != EPIPE &&
1631                     errno != EAGAIN &&
1632                     errno != ENOENT &&
1633                     errno != ECONNREFUSED &&
1634                     errno != ECONNRESET &&
1635                     errno != ECONNABORTED)
1636                         log_error("Failed to write Plymouth message: %m");
1637
1638                 goto finish;
1639         }
1640
1641 finish:
1642         if (fd >= 0)
1643                 close_nointr_nofail(fd);
1644
1645         free(message);
1646 }
1647
1648 void manager_dispatch_bus_name_owner_changed(
1649                 Manager *m,
1650                 const char *name,
1651                 const char* old_owner,
1652                 const char *new_owner) {
1653
1654         Unit *u;
1655
1656         assert(m);
1657         assert(name);
1658
1659         if (!(u = hashmap_get(m->watch_bus, name)))
1660                 return;
1661
1662         UNIT_VTABLE(u)->bus_name_owner_change(u, name, old_owner, new_owner);
1663 }
1664
1665 void manager_dispatch_bus_query_pid_done(
1666                 Manager *m,
1667                 const char *name,
1668                 pid_t pid) {
1669
1670         Unit *u;
1671
1672         assert(m);
1673         assert(name);
1674         assert(pid >= 1);
1675
1676         if (!(u = hashmap_get(m->watch_bus, name)))
1677                 return;
1678
1679         UNIT_VTABLE(u)->bus_query_pid_done(u, name, pid);
1680 }
1681
1682 int manager_open_serialization(Manager *m, FILE **_f) {
1683         char *path = NULL;
1684         mode_t saved_umask;
1685         int fd;
1686         FILE *f;
1687
1688         assert(_f);
1689
1690         if (m->running_as == MANAGER_SYSTEM)
1691                 asprintf(&path, "/run/systemd/dump-%lu-XXXXXX", (unsigned long) getpid());
1692         else
1693                 asprintf(&path, "/tmp/systemd-dump-%lu-XXXXXX", (unsigned long) getpid());
1694
1695         if (!path)
1696                 return -ENOMEM;
1697
1698         saved_umask = umask(0077);
1699         fd = mkostemp(path, O_RDWR|O_CLOEXEC);
1700         umask(saved_umask);
1701
1702         if (fd < 0) {
1703                 free(path);
1704                 return -errno;
1705         }
1706
1707         unlink(path);
1708
1709         log_debug("Serializing state to %s", path);
1710         free(path);
1711
1712         if (!(f = fdopen(fd, "w+")))
1713                 return -errno;
1714
1715         *_f = f;
1716
1717         return 0;
1718 }
1719
1720 int manager_serialize(Manager *m, FILE *f, FDSet *fds) {
1721         Iterator i;
1722         Unit *u;
1723         const char *t;
1724         int r;
1725
1726         assert(m);
1727         assert(f);
1728         assert(fds);
1729
1730         m->n_reloading ++;
1731
1732         fprintf(f, "current-job-id=%i\n", m->current_job_id);
1733         fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
1734
1735         dual_timestamp_serialize(f, "initrd-timestamp", &m->initrd_timestamp);
1736         dual_timestamp_serialize(f, "startup-timestamp", &m->startup_timestamp);
1737         dual_timestamp_serialize(f, "finish-timestamp", &m->finish_timestamp);
1738
1739         fputc('\n', f);
1740
1741         HASHMAP_FOREACH_KEY(u, t, m->units, i) {
1742                 if (u->id != t)
1743                         continue;
1744
1745                 if (!unit_can_serialize(u))
1746                         continue;
1747
1748                 /* Start marker */
1749                 fputs(u->id, f);
1750                 fputc('\n', f);
1751
1752                 if ((r = unit_serialize(u, f, fds)) < 0) {
1753                         m->n_reloading --;
1754                         return r;
1755                 }
1756         }
1757
1758         assert(m->n_reloading > 0);
1759         m->n_reloading --;
1760
1761         if (ferror(f))
1762                 return -EIO;
1763
1764         r = bus_fdset_add_all(m, fds);
1765         if (r < 0)
1766                 return r;
1767
1768         return 0;
1769 }
1770
1771 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
1772         int r = 0;
1773
1774         assert(m);
1775         assert(f);
1776
1777         log_debug("Deserializing state...");
1778
1779         m->n_reloading ++;
1780
1781         for (;;) {
1782                 char line[LINE_MAX], *l;
1783
1784                 if (!fgets(line, sizeof(line), f)) {
1785                         if (feof(f))
1786                                 r = 0;
1787                         else
1788                                 r = -errno;
1789
1790                         goto finish;
1791                 }
1792
1793                 char_array_0(line);
1794                 l = strstrip(line);
1795
1796                 if (l[0] == 0)
1797                         break;
1798
1799                 if (startswith(l, "current-job-id=")) {
1800                         uint32_t id;
1801
1802                         if (safe_atou32(l+15, &id) < 0)
1803                                 log_debug("Failed to parse current job id value %s", l+15);
1804                         else
1805                                 m->current_job_id = MAX(m->current_job_id, id);
1806                 } else if (startswith(l, "taint-usr=")) {
1807                         int b;
1808
1809                         if ((b = parse_boolean(l+10)) < 0)
1810                                 log_debug("Failed to parse taint /usr flag %s", l+10);
1811                         else
1812                                 m->taint_usr = m->taint_usr || b;
1813                 } else if (startswith(l, "initrd-timestamp="))
1814                         dual_timestamp_deserialize(l+17, &m->initrd_timestamp);
1815                 else if (startswith(l, "startup-timestamp="))
1816                         dual_timestamp_deserialize(l+18, &m->startup_timestamp);
1817                 else if (startswith(l, "finish-timestamp="))
1818                         dual_timestamp_deserialize(l+17, &m->finish_timestamp);
1819                 else
1820                         log_debug("Unknown serialization item '%s'", l);
1821         }
1822
1823         for (;;) {
1824                 Unit *u;
1825                 char name[UNIT_NAME_MAX+2];
1826
1827                 /* Start marker */
1828                 if (!fgets(name, sizeof(name), f)) {
1829                         if (feof(f))
1830                                 r = 0;
1831                         else
1832                                 r = -errno;
1833
1834                         goto finish;
1835                 }
1836
1837                 char_array_0(name);
1838
1839                 if ((r = manager_load_unit(m, strstrip(name), NULL, NULL, &u)) < 0)
1840                         goto finish;
1841
1842                 if ((r = unit_deserialize(u, f, fds)) < 0)
1843                         goto finish;
1844         }
1845
1846 finish:
1847         if (ferror(f)) {
1848                 r = -EIO;
1849                 goto finish;
1850         }
1851
1852         assert(m->n_reloading > 0);
1853         m->n_reloading --;
1854
1855         return r;
1856 }
1857
1858 int manager_reload(Manager *m) {
1859         int r, q;
1860         FILE *f;
1861         FDSet *fds;
1862
1863         assert(m);
1864
1865         if ((r = manager_open_serialization(m, &f)) < 0)
1866                 return r;
1867
1868         m->n_reloading ++;
1869
1870         if (!(fds = fdset_new())) {
1871                 m->n_reloading --;
1872                 r = -ENOMEM;
1873                 goto finish;
1874         }
1875
1876         if ((r = manager_serialize(m, f, fds)) < 0) {
1877                 m->n_reloading --;
1878                 goto finish;
1879         }
1880
1881         if (fseeko(f, 0, SEEK_SET) < 0) {
1882                 m->n_reloading --;
1883                 r = -errno;
1884                 goto finish;
1885         }
1886
1887         /* From here on there is no way back. */
1888         manager_clear_jobs_and_units(m);
1889         manager_undo_generators(m);
1890
1891         /* Find new unit paths */
1892         lookup_paths_free(&m->lookup_paths);
1893         if ((q = lookup_paths_init(&m->lookup_paths, m->running_as, true)) < 0)
1894                 r = q;
1895
1896         manager_run_generators(m);
1897
1898         manager_build_unit_path_cache(m);
1899
1900         /* First, enumerate what we can from all config files */
1901         if ((q = manager_enumerate(m)) < 0)
1902                 r = q;
1903
1904         /* Second, deserialize our stored data */
1905         if ((q = manager_deserialize(m, f, fds)) < 0)
1906                 r = q;
1907
1908         fclose(f);
1909         f = NULL;
1910
1911         /* Third, fire things up! */
1912         if ((q = manager_coldplug(m)) < 0)
1913                 r = q;
1914
1915         assert(m->n_reloading > 0);
1916         m->n_reloading--;
1917
1918 finish:
1919         if (f)
1920                 fclose(f);
1921
1922         if (fds)
1923                 fdset_free(fds);
1924
1925         return r;
1926 }
1927
1928 bool manager_is_booting_or_shutting_down(Manager *m) {
1929         Unit *u;
1930
1931         assert(m);
1932
1933         /* Is the initial job still around? */
1934         if (manager_get_job(m, m->default_unit_job_id))
1935                 return true;
1936
1937         /* Is there a job for the shutdown target? */
1938         u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
1939         if (u)
1940                 return !!u->job;
1941
1942         return false;
1943 }
1944
1945 void manager_reset_failed(Manager *m) {
1946         Unit *u;
1947         Iterator i;
1948
1949         assert(m);
1950
1951         HASHMAP_FOREACH(u, m->units, i)
1952                 unit_reset_failed(u);
1953 }
1954
1955 bool manager_unit_pending_inactive(Manager *m, const char *name) {
1956         Unit *u;
1957
1958         assert(m);
1959         assert(name);
1960
1961         /* Returns true if the unit is inactive or going down */
1962         if (!(u = manager_get_unit(m, name)))
1963                 return true;
1964
1965         return unit_pending_inactive(u);
1966 }
1967
1968 void manager_check_finished(Manager *m) {
1969         char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
1970         usec_t kernel_usec, initrd_usec, userspace_usec, total_usec;
1971
1972         assert(m);
1973
1974         if (hashmap_size(m->jobs) > 0)
1975                 return;
1976
1977         /* Notify Type=idle units that we are done now */
1978         close_pipe(m->idle_pipe);
1979
1980         if (dual_timestamp_is_set(&m->finish_timestamp))
1981                 return;
1982
1983         dual_timestamp_get(&m->finish_timestamp);
1984
1985         if (m->running_as == MANAGER_SYSTEM && detect_container(NULL) <= 0) {
1986
1987                 userspace_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
1988                 total_usec = m->finish_timestamp.monotonic;
1989
1990                 if (dual_timestamp_is_set(&m->initrd_timestamp)) {
1991
1992                         kernel_usec = m->initrd_timestamp.monotonic;
1993                         initrd_usec = m->startup_timestamp.monotonic - m->initrd_timestamp.monotonic;
1994
1995                         log_info("Startup finished in %s (kernel) + %s (initrd) + %s (userspace) = %s.",
1996                                  format_timespan(kernel, sizeof(kernel), kernel_usec),
1997                                  format_timespan(initrd, sizeof(initrd), initrd_usec),
1998                                  format_timespan(userspace, sizeof(userspace), userspace_usec),
1999                                  format_timespan(sum, sizeof(sum), total_usec));
2000                 } else {
2001                         kernel_usec = m->startup_timestamp.monotonic;
2002                         initrd_usec = 0;
2003
2004                         log_info("Startup finished in %s (kernel) + %s (userspace) = %s.",
2005                                  format_timespan(kernel, sizeof(kernel), kernel_usec),
2006                                  format_timespan(userspace, sizeof(userspace), userspace_usec),
2007                                  format_timespan(sum, sizeof(sum), total_usec));
2008                 }
2009         } else {
2010                 userspace_usec = initrd_usec = kernel_usec = 0;
2011                 total_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
2012
2013                 log_debug("Startup finished in %s.",
2014                           format_timespan(sum, sizeof(sum), total_usec));
2015         }
2016
2017         bus_broadcast_finished(m, kernel_usec, initrd_usec, userspace_usec, total_usec);
2018
2019         sd_notifyf(false,
2020                    "READY=1\nSTATUS=Startup finished in %s.",
2021                    format_timespan(sum, sizeof(sum), total_usec));
2022 }
2023
2024 void manager_run_generators(Manager *m) {
2025         DIR *d = NULL;
2026         const char *generator_path;
2027         const char *argv[3];
2028         mode_t u;
2029
2030         assert(m);
2031
2032         generator_path = m->running_as == MANAGER_SYSTEM ? SYSTEM_GENERATOR_PATH : USER_GENERATOR_PATH;
2033         if (!(d = opendir(generator_path))) {
2034
2035                 if (errno == ENOENT)
2036                         return;
2037
2038                 log_error("Failed to enumerate generator directory: %m");
2039                 return;
2040         }
2041
2042         if (!m->generator_unit_path) {
2043                 const char *p;
2044                 char user_path[] = "/tmp/systemd-generator-XXXXXX";
2045
2046                 if (m->running_as == MANAGER_SYSTEM && getpid() == 1) {
2047                         p = "/run/systemd/generator";
2048
2049                         if (mkdir_p(p, 0755) < 0) {
2050                                 log_error("Failed to create generator directory: %m");
2051                                 goto finish;
2052                         }
2053
2054                 } else {
2055                         if (!(p = mkdtemp(user_path))) {
2056                                 log_error("Failed to create generator directory: %m");
2057                                 goto finish;
2058                         }
2059                 }
2060
2061                 if (!(m->generator_unit_path = strdup(p))) {
2062                         log_error("Failed to allocate generator unit path.");
2063                         goto finish;
2064                 }
2065         }
2066
2067         argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
2068         argv[1] = m->generator_unit_path;
2069         argv[2] = NULL;
2070
2071         u = umask(0022);
2072         execute_directory(generator_path, d, (char**) argv);
2073         umask(u);
2074
2075         if (rmdir(m->generator_unit_path) >= 0) {
2076                 /* Uh? we were able to remove this dir? I guess that
2077                  * means the directory was empty, hence let's shortcut
2078                  * this */
2079
2080                 free(m->generator_unit_path);
2081                 m->generator_unit_path = NULL;
2082                 goto finish;
2083         }
2084
2085         if (!strv_find(m->lookup_paths.unit_path, m->generator_unit_path)) {
2086                 char **l;
2087
2088                 if (!(l = strv_append(m->lookup_paths.unit_path, m->generator_unit_path))) {
2089                         log_error("Failed to add generator directory to unit search path: %m");
2090                         goto finish;
2091                 }
2092
2093                 strv_free(m->lookup_paths.unit_path);
2094                 m->lookup_paths.unit_path = l;
2095
2096                 log_debug("Added generator unit path %s to search path.", m->generator_unit_path);
2097         }
2098
2099 finish:
2100         if (d)
2101                 closedir(d);
2102 }
2103
2104 void manager_undo_generators(Manager *m) {
2105         assert(m);
2106
2107         if (!m->generator_unit_path)
2108                 return;
2109
2110         strv_remove(m->lookup_paths.unit_path, m->generator_unit_path);
2111         rm_rf(m->generator_unit_path, false, true, false);
2112
2113         free(m->generator_unit_path);
2114         m->generator_unit_path = NULL;
2115 }
2116
2117 int manager_set_default_controllers(Manager *m, char **controllers) {
2118         char **l;
2119
2120         assert(m);
2121
2122         l = strv_copy(controllers);
2123         if (!l)
2124                 return -ENOMEM;
2125
2126         strv_free(m->default_controllers);
2127         m->default_controllers = l;
2128
2129         cg_shorten_controllers(m->default_controllers);
2130
2131         return 0;
2132 }
2133
2134 void manager_recheck_journal(Manager *m) {
2135         Unit *u;
2136
2137         assert(m);
2138
2139         if (m->running_as != MANAGER_SYSTEM)
2140                 return;
2141
2142         u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
2143         if (u && SOCKET(u)->state != SOCKET_RUNNING) {
2144                 log_close_journal();
2145                 return;
2146         }
2147
2148         u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
2149         if (u && SERVICE(u)->state != SERVICE_RUNNING) {
2150                 log_close_journal();
2151                 return;
2152         }
2153
2154         /* Hmm, OK, so the socket is fully up and the service is up
2155          * too, then let's make use of the thing. */
2156         log_open();
2157 }
2158
2159 void manager_set_show_status(Manager *m, bool b) {
2160         assert(m);
2161
2162         if (m->running_as != MANAGER_SYSTEM)
2163                 return;
2164
2165         m->show_status = b;
2166
2167         if (b)
2168                 touch("/run/systemd/show-status");
2169         else
2170                 unlink("/run/systemd/show-status");
2171 }
2172
2173 bool manager_get_show_status(Manager *m) {
2174         assert(m);
2175
2176         if (m->running_as != MANAGER_SYSTEM)
2177                 return false;
2178
2179         if (m->show_status)
2180                 return true;
2181
2182         /* If Plymouth is running make sure we show the status, so
2183          * that there's something nice to see when people press Esc */
2184
2185         return plymouth_running();
2186 }
2187
2188 static const char* const manager_running_as_table[_MANAGER_RUNNING_AS_MAX] = {
2189         [MANAGER_SYSTEM] = "system",
2190         [MANAGER_USER] = "user"
2191 };
2192
2193 DEFINE_STRING_TABLE_LOOKUP(manager_running_as, ManagerRunningAs);