1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include <sys/epoll.h>
27 #include <sys/signalfd.h>
31 #include <sys/reboot.h>
32 #include <sys/ioctl.h>
36 #include <sys/types.h>
44 #include <systemd/sd-daemon.h>
47 #include "transaction.h"
54 #include "ratelimit.h"
56 #include "mount-setup.h"
57 #include "unit-name.h"
58 #include "dbus-unit.h"
61 #include "path-lookup.h"
63 #include "bus-errors.h"
64 #include "exit-status.h"
67 #include "cgroup-util.h"
69 /* As soon as 16 units are in our GC queue, make sure to run a gc sweep */
70 #define GC_QUEUE_ENTRIES_MAX 16
72 /* As soon as 5s passed since a unit was added to our GC queue, make sure to run a gc sweep */
73 #define GC_QUEUE_USEC_MAX (10*USEC_PER_SEC)
75 /* Where clients shall send notification messages to */
76 #define NOTIFY_SOCKET_SYSTEM "/run/systemd/notify"
77 #define NOTIFY_SOCKET_USER "@/org/freedesktop/systemd1/notify"
79 static int manager_setup_notify(Manager *m) {
82 struct sockaddr_un un;
84 struct epoll_event ev;
90 m->notify_watch.type = WATCH_NOTIFY;
91 if ((m->notify_watch.fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
92 log_error("Failed to allocate notification socket: %m");
97 sa.sa.sa_family = AF_UNIX;
100 snprintf(sa.un.sun_path, sizeof(sa.un.sun_path), NOTIFY_SOCKET_USER "/%llu", random_ull());
102 unlink(NOTIFY_SOCKET_SYSTEM);
103 strncpy(sa.un.sun_path, NOTIFY_SOCKET_SYSTEM, sizeof(sa.un.sun_path));
106 if (sa.un.sun_path[0] == '@')
107 sa.un.sun_path[0] = 0;
110 r = bind(m->notify_watch.fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1));
114 log_error("bind() failed: %m");
118 if (setsockopt(m->notify_watch.fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0) {
119 log_error("SO_PASSCRED failed: %m");
125 ev.data.ptr = &m->notify_watch;
127 if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->notify_watch.fd, &ev) < 0)
130 if (sa.un.sun_path[0] == 0)
131 sa.un.sun_path[0] = '@';
133 if (!(m->notify_socket = strdup(sa.un.sun_path)))
136 log_debug("Using notification socket %s", m->notify_socket);
141 static int enable_special_signals(Manager *m) {
146 /* Enable that we get SIGINT on control-alt-del. In containers
147 * this will fail with EPERM, so ignore that. */
148 if (reboot(RB_DISABLE_CAD) < 0 && errno != EPERM)
149 log_warning("Failed to enable ctrl-alt-del handling: %m");
151 fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
153 /* Support systems without virtual console */
155 log_warning("Failed to open /dev/tty0: %m");
157 /* Enable that we get SIGWINCH on kbrequest */
158 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
159 log_warning("Failed to enable kbrequest handling: %s", strerror(errno));
161 close_nointr_nofail(fd);
167 static int manager_setup_signals(Manager *m) {
169 struct epoll_event ev;
174 /* We are not interested in SIGSTOP and friends. */
176 sa.sa_handler = SIG_DFL;
177 sa.sa_flags = SA_NOCLDSTOP|SA_RESTART;
178 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
180 assert_se(sigemptyset(&mask) == 0);
182 sigset_add_many(&mask,
183 SIGCHLD, /* Child died */
184 SIGTERM, /* Reexecute daemon */
185 SIGHUP, /* Reload configuration */
186 SIGUSR1, /* systemd/upstart: reconnect to D-Bus */
187 SIGUSR2, /* systemd: dump status */
188 SIGINT, /* Kernel sends us this on control-alt-del */
189 SIGWINCH, /* Kernel sends us this on kbrequest (alt-arrowup) */
190 SIGPWR, /* Some kernel drivers and upsd send us this on power failure */
191 SIGRTMIN+0, /* systemd: start default.target */
192 SIGRTMIN+1, /* systemd: isolate rescue.target */
193 SIGRTMIN+2, /* systemd: isolate emergency.target */
194 SIGRTMIN+3, /* systemd: start halt.target */
195 SIGRTMIN+4, /* systemd: start poweroff.target */
196 SIGRTMIN+5, /* systemd: start reboot.target */
197 SIGRTMIN+6, /* systemd: start kexec.target */
198 SIGRTMIN+13, /* systemd: Immediate halt */
199 SIGRTMIN+14, /* systemd: Immediate poweroff */
200 SIGRTMIN+15, /* systemd: Immediate reboot */
201 SIGRTMIN+16, /* systemd: Immediate kexec */
202 SIGRTMIN+20, /* systemd: enable status messages */
203 SIGRTMIN+21, /* systemd: disable status messages */
204 SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
205 SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
206 SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
207 SIGRTMIN+27, /* systemd: set log target to console */
208 SIGRTMIN+28, /* systemd: set log target to kmsg */
209 SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg */
211 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
213 m->signal_watch.type = WATCH_SIGNAL;
214 if ((m->signal_watch.fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0)
219 ev.data.ptr = &m->signal_watch;
221 if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->signal_watch.fd, &ev) < 0)
224 if (m->running_as == MANAGER_SYSTEM)
225 return enable_special_signals(m);
230 static void manager_strip_environment(Manager *m) {
233 /* Remove variables from the inherited set that are part of
234 * the container interface:
235 * http://www.freedesktop.org/wiki/Software/systemd/ContainerInterface */
236 strv_remove_prefix(m->environment, "container=");
237 strv_remove_prefix(m->environment, "container_");
239 /* Remove variables from the inherited set that are part of
240 * the initrd interface:
241 * http://www.freedesktop.org/wiki/Software/systemd/InitrdInterface */
242 strv_remove_prefix(m->environment, "RD_");
245 int manager_new(ManagerRunningAs running_as, Manager **_m) {
250 assert(running_as >= 0);
251 assert(running_as < _MANAGER_RUNNING_AS_MAX);
253 if (!(m = new0(Manager, 1)))
256 dual_timestamp_get(&m->startup_timestamp);
258 m->running_as = running_as;
259 m->name_data_slot = m->conn_data_slot = m->subscribed_data_slot = -1;
260 m->exit_code = _MANAGER_EXIT_CODE_INVALID;
261 m->pin_cgroupfs_fd = -1;
267 m->signal_watch.fd = m->mount_watch.fd = m->udev_watch.fd = m->epoll_fd = m->dev_autofs_fd = m->swap_watch.fd = -1;
268 m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
270 m->environment = strv_copy(environ);
274 manager_strip_environment(m);
276 if (running_as == MANAGER_SYSTEM) {
277 m->default_controllers = strv_new("cpu", NULL);
278 if (!m->default_controllers)
282 if (!(m->units = hashmap_new(string_hash_func, string_compare_func)))
285 if (!(m->jobs = hashmap_new(trivial_hash_func, trivial_compare_func)))
288 if (!(m->watch_pids = hashmap_new(trivial_hash_func, trivial_compare_func)))
291 if (!(m->cgroup_bondings = hashmap_new(string_hash_func, string_compare_func)))
294 if (!(m->watch_bus = hashmap_new(string_hash_func, string_compare_func)))
297 if ((m->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0)
300 if ((r = lookup_paths_init(&m->lookup_paths, m->running_as, true)) < 0)
303 if ((r = manager_setup_signals(m)) < 0)
306 if ((r = manager_setup_cgroup(m)) < 0)
309 if ((r = manager_setup_notify(m)) < 0)
312 /* Try to connect to the busses, if possible. */
313 if ((r = bus_init(m, running_as != MANAGER_SYSTEM)) < 0)
317 if ((m->audit_fd = audit_open()) < 0 &&
318 /* If the kernel lacks netlink or audit support,
319 * don't worry about it. */
320 errno != EAFNOSUPPORT && errno != EPROTONOSUPPORT)
321 log_error("Failed to connect to audit log: %m");
324 m->taint_usr = dir_is_empty("/usr") > 0;
334 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
340 while ((u = m->cleanup_queue)) {
341 assert(u->in_cleanup_queue);
351 GC_OFFSET_IN_PATH, /* This one is on the path we were traveling */
352 GC_OFFSET_UNSURE, /* No clue */
353 GC_OFFSET_GOOD, /* We still need this unit */
354 GC_OFFSET_BAD, /* We don't need this unit anymore */
358 static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
365 if (u->gc_marker == gc_marker + GC_OFFSET_GOOD ||
366 u->gc_marker == gc_marker + GC_OFFSET_BAD ||
367 u->gc_marker == gc_marker + GC_OFFSET_IN_PATH)
370 if (u->in_cleanup_queue)
373 if (unit_check_gc(u))
376 u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
380 SET_FOREACH(other, u->dependencies[UNIT_REFERENCED_BY], i) {
381 unit_gc_sweep(other, gc_marker);
383 if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
386 if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
393 /* We were unable to find anything out about this entry, so
394 * let's investigate it later */
395 u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
396 unit_add_to_gc_queue(u);
400 /* We definitely know that this one is not useful anymore, so
401 * let's mark it for deletion */
402 u->gc_marker = gc_marker + GC_OFFSET_BAD;
403 unit_add_to_cleanup_queue(u);
407 u->gc_marker = gc_marker + GC_OFFSET_GOOD;
410 static unsigned manager_dispatch_gc_queue(Manager *m) {
417 if ((m->n_in_gc_queue < GC_QUEUE_ENTRIES_MAX) &&
418 (m->gc_queue_timestamp <= 0 ||
419 (m->gc_queue_timestamp + GC_QUEUE_USEC_MAX) > now(CLOCK_MONOTONIC)))
422 log_debug("Running GC...");
424 m->gc_marker += _GC_OFFSET_MAX;
425 if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
428 gc_marker = m->gc_marker;
430 while ((u = m->gc_queue)) {
431 assert(u->in_gc_queue);
433 unit_gc_sweep(u, gc_marker);
435 LIST_REMOVE(Unit, gc_queue, m->gc_queue, u);
436 u->in_gc_queue = false;
440 if (u->gc_marker == gc_marker + GC_OFFSET_BAD ||
441 u->gc_marker == gc_marker + GC_OFFSET_UNSURE) {
442 log_debug("Collecting %s", u->id);
443 u->gc_marker = gc_marker + GC_OFFSET_BAD;
444 unit_add_to_cleanup_queue(u);
448 m->n_in_gc_queue = 0;
449 m->gc_queue_timestamp = 0;
454 static void manager_clear_jobs_and_units(Manager *m) {
459 while ((u = hashmap_first(m->units)))
462 manager_dispatch_cleanup_queue(m);
464 assert(!m->load_queue);
465 assert(!m->run_queue);
466 assert(!m->dbus_unit_queue);
467 assert(!m->dbus_job_queue);
468 assert(!m->cleanup_queue);
469 assert(!m->gc_queue);
471 assert(hashmap_isempty(m->jobs));
472 assert(hashmap_isempty(m->units));
475 void manager_free(Manager *m) {
480 manager_clear_jobs_and_units(m);
482 for (c = 0; c < _UNIT_TYPE_MAX; c++)
483 if (unit_vtable[c]->shutdown)
484 unit_vtable[c]->shutdown(m);
486 /* If we reexecute ourselves, we keep the root cgroup
488 manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
490 manager_undo_generators(m);
494 hashmap_free(m->units);
495 hashmap_free(m->jobs);
496 hashmap_free(m->watch_pids);
497 hashmap_free(m->watch_bus);
499 if (m->epoll_fd >= 0)
500 close_nointr_nofail(m->epoll_fd);
501 if (m->signal_watch.fd >= 0)
502 close_nointr_nofail(m->signal_watch.fd);
503 if (m->notify_watch.fd >= 0)
504 close_nointr_nofail(m->notify_watch.fd);
507 if (m->audit_fd >= 0)
508 audit_close(m->audit_fd);
511 free(m->notify_socket);
513 lookup_paths_free(&m->lookup_paths);
514 strv_free(m->environment);
516 strv_free(m->default_controllers);
518 hashmap_free(m->cgroup_bondings);
519 set_free_free(m->unit_path_cache);
524 int manager_enumerate(Manager *m) {
530 /* Let's ask every type to load all units from disk/kernel
531 * that it might know */
532 for (c = 0; c < _UNIT_TYPE_MAX; c++)
533 if (unit_vtable[c]->enumerate)
534 if ((q = unit_vtable[c]->enumerate(m)) < 0)
537 manager_dispatch_load_queue(m);
541 int manager_coldplug(Manager *m) {
549 /* Then, let's set up their initial state. */
550 HASHMAP_FOREACH_KEY(u, k, m->units, i) {
556 if ((q = unit_coldplug(u)) < 0)
563 static void manager_build_unit_path_cache(Manager *m) {
570 set_free_free(m->unit_path_cache);
572 if (!(m->unit_path_cache = set_new(string_hash_func, string_compare_func))) {
573 log_error("Failed to allocate unit path cache.");
577 /* This simply builds a list of files we know exist, so that
578 * we don't always have to go to disk */
580 STRV_FOREACH(i, m->lookup_paths.unit_path) {
583 if (!(d = opendir(*i))) {
584 log_error("Failed to open directory: %m");
588 while ((de = readdir(d))) {
591 if (ignore_file(de->d_name))
594 p = join(streq(*i, "/") ? "" : *i, "/", de->d_name, NULL);
600 if ((r = set_put(m->unit_path_cache, p)) < 0) {
613 log_error("Failed to build unit path cache: %s", strerror(-r));
615 set_free_free(m->unit_path_cache);
616 m->unit_path_cache = NULL;
622 int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
627 manager_run_generators(m);
629 manager_build_unit_path_cache(m);
631 /* If we will deserialize make sure that during enumeration
632 * this is already known, so we increase the counter here
637 /* First, enumerate what we can from all config files */
638 r = manager_enumerate(m);
640 /* Second, deserialize if there is something to deserialize */
642 if ((q = manager_deserialize(m, serialization, fds)) < 0)
645 /* Third, fire things up! */
646 if ((q = manager_coldplug(m)) < 0)
650 assert(m->n_reloading > 0);
657 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, bool override, DBusError *e, Job **_ret) {
662 assert(type < _JOB_TYPE_MAX);
664 assert(mode < _JOB_MODE_MAX);
666 if (mode == JOB_ISOLATE && type != JOB_START) {
667 dbus_set_error(e, BUS_ERROR_INVALID_JOB_MODE, "Isolate is only valid for start.");
671 if (mode == JOB_ISOLATE && !unit->allow_isolate) {
672 dbus_set_error(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
676 log_debug("Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
678 tr = transaction_new();
682 r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, override, false,
683 mode == JOB_IGNORE_DEPENDENCIES || mode == JOB_IGNORE_REQUIREMENTS,
684 mode == JOB_IGNORE_DEPENDENCIES, e);
688 if (mode == JOB_ISOLATE) {
689 r = transaction_add_isolate_jobs(tr, m);
694 r = transaction_activate(tr, m, mode, e);
698 log_debug("Enqueued job %s/%s as %u", unit->id, job_type_to_string(type), (unsigned) tr->anchor_job->id);
701 *_ret = tr->anchor_job;
703 transaction_free(tr);
707 transaction_abort(tr);
708 transaction_free(tr);
712 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, bool override, DBusError *e, Job **_ret) {
717 assert(type < _JOB_TYPE_MAX);
719 assert(mode < _JOB_MODE_MAX);
721 if ((r = manager_load_unit(m, name, NULL, NULL, &unit)) < 0)
724 return manager_add_job(m, type, unit, mode, override, e, _ret);
727 Job *manager_get_job(Manager *m, uint32_t id) {
730 return hashmap_get(m->jobs, UINT32_TO_PTR(id));
733 Unit *manager_get_unit(Manager *m, const char *name) {
737 return hashmap_get(m->units, name);
740 unsigned manager_dispatch_load_queue(Manager *m) {
746 /* Make sure we are not run recursively */
747 if (m->dispatching_load_queue)
750 m->dispatching_load_queue = true;
752 /* Dispatches the load queue. Takes a unit from the queue and
753 * tries to load its data until the queue is empty */
755 while ((u = m->load_queue)) {
756 assert(u->in_load_queue);
762 m->dispatching_load_queue = false;
766 int manager_load_unit_prepare(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
772 assert(name || path);
774 /* This will prepare the unit for loading, but not actually
775 * load anything from disk. */
777 if (path && !is_path(path)) {
778 dbus_set_error(e, BUS_ERROR_INVALID_PATH, "Path %s is not absolute.", path);
783 name = file_name_from_path(path);
785 t = unit_name_to_type(name);
787 if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid_no_type(name, false)) {
788 dbus_set_error(e, BUS_ERROR_INVALID_NAME, "Unit name %s is not valid.", name);
792 ret = manager_get_unit(m, name);
798 ret = unit_new(m, unit_vtable[t]->object_size);
803 ret->fragment_path = strdup(path);
804 if (!ret->fragment_path) {
810 if ((r = unit_add_name(ret, name)) < 0) {
815 unit_add_to_load_queue(ret);
816 unit_add_to_dbus_queue(ret);
817 unit_add_to_gc_queue(ret);
825 int manager_load_unit(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
830 /* This will load the service information files, but not actually
831 * start any services or anything. */
833 if ((r = manager_load_unit_prepare(m, name, path, e, _ret)) != 0)
836 manager_dispatch_load_queue(m);
839 *_ret = unit_follow_merge(*_ret);
844 void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
851 HASHMAP_FOREACH(j, s->jobs, i)
852 job_dump(j, f, prefix);
855 void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
863 HASHMAP_FOREACH_KEY(u, t, s->units, i)
865 unit_dump(u, f, prefix);
868 void manager_clear_jobs(Manager *m) {
873 while ((j = hashmap_first(m->jobs)))
874 /* No need to recurse. We're cancelling all jobs. */
875 job_finish_and_invalidate(j, JOB_CANCELED, false);
878 unsigned manager_dispatch_run_queue(Manager *m) {
882 if (m->dispatching_run_queue)
885 m->dispatching_run_queue = true;
887 while ((j = m->run_queue)) {
888 assert(j->installed);
889 assert(j->in_run_queue);
891 job_run_and_invalidate(j);
895 m->dispatching_run_queue = false;
899 unsigned manager_dispatch_dbus_queue(Manager *m) {
906 if (m->dispatching_dbus_queue)
909 m->dispatching_dbus_queue = true;
911 while ((u = m->dbus_unit_queue)) {
912 assert(u->in_dbus_queue);
914 bus_unit_send_change_signal(u);
918 while ((j = m->dbus_job_queue)) {
919 assert(j->in_dbus_queue);
921 bus_job_send_change_signal(j);
925 m->dispatching_dbus_queue = false;
929 static int manager_process_notify_fd(Manager *m) {
936 struct msghdr msghdr;
940 struct cmsghdr cmsghdr;
941 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
947 iovec.iov_base = buf;
948 iovec.iov_len = sizeof(buf)-1;
952 msghdr.msg_iov = &iovec;
953 msghdr.msg_iovlen = 1;
954 msghdr.msg_control = &control;
955 msghdr.msg_controllen = sizeof(control);
957 if ((n = recvmsg(m->notify_watch.fd, &msghdr, MSG_DONTWAIT)) <= 0) {
961 if (errno == EAGAIN || errno == EINTR)
967 if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
968 control.cmsghdr.cmsg_level != SOL_SOCKET ||
969 control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
970 control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
971 log_warning("Received notify message without credentials. Ignoring.");
975 ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
977 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(ucred->pid))))
978 if (!(u = cgroup_unit_by_pid(m, ucred->pid))) {
979 log_warning("Cannot find unit for notify message of PID %lu.", (unsigned long) ucred->pid);
983 assert((size_t) n < sizeof(buf));
985 if (!(tags = strv_split(buf, "\n\r")))
988 log_debug("Got notification message for unit %s", u->id);
990 if (UNIT_VTABLE(u)->notify_message)
991 UNIT_VTABLE(u)->notify_message(u, ucred->pid, tags);
999 static int manager_dispatch_sigchld(Manager *m) {
1009 /* First we call waitd() for a PID and do not reap the
1010 * zombie. That way we can still access /proc/$PID for
1011 * it while it is a zombie. */
1012 if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
1014 if (errno == ECHILD)
1026 if (si.si_code == CLD_EXITED || si.si_code == CLD_KILLED || si.si_code == CLD_DUMPED) {
1029 get_process_comm(si.si_pid, &name);
1030 log_debug("Got SIGCHLD for process %lu (%s)", (unsigned long) si.si_pid, strna(name));
1034 /* Let's flush any message the dying child might still
1035 * have queued for us. This ensures that the process
1036 * still exists in /proc so that we can figure out
1037 * which cgroup and hence unit it belongs to. */
1038 if ((r = manager_process_notify_fd(m)) < 0)
1041 /* And now figure out the unit this belongs to */
1042 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(si.si_pid))))
1043 u = cgroup_unit_by_pid(m, si.si_pid);
1045 /* And now, we actually reap the zombie. */
1046 if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
1053 if (si.si_code != CLD_EXITED && si.si_code != CLD_KILLED && si.si_code != CLD_DUMPED)
1056 log_debug("Child %lu died (code=%s, status=%i/%s)",
1057 (long unsigned) si.si_pid,
1058 sigchld_code_to_string(si.si_code),
1060 strna(si.si_code == CLD_EXITED
1061 ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
1062 : signal_to_string(si.si_status)));
1067 log_debug("Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
1069 hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
1070 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
1076 static int manager_start_target(Manager *m, const char *name, JobMode mode) {
1080 dbus_error_init(&error);
1082 log_debug("Activating special unit %s", name);
1084 if ((r = manager_add_job_by_name(m, JOB_START, name, mode, true, &error, NULL)) < 0)
1085 log_error("Failed to enqueue %s job: %s", name, bus_error(&error, r));
1087 dbus_error_free(&error);
1092 static int manager_process_signal_fd(Manager *m) {
1094 struct signalfd_siginfo sfsi;
1095 bool sigchld = false;
1100 if ((n = read(m->signal_watch.fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) {
1105 if (errno == EINTR || errno == EAGAIN)
1111 if (sfsi.ssi_pid > 0) {
1114 get_process_comm(sfsi.ssi_pid, &p);
1116 log_debug("Received SIG%s from PID %lu (%s).",
1117 signal_to_string(sfsi.ssi_signo),
1118 (unsigned long) sfsi.ssi_pid, strna(p));
1121 log_debug("Received SIG%s.", signal_to_string(sfsi.ssi_signo));
1123 switch (sfsi.ssi_signo) {
1130 if (m->running_as == MANAGER_SYSTEM) {
1131 /* This is for compatibility with the
1132 * original sysvinit */
1133 m->exit_code = MANAGER_REEXECUTE;
1140 if (m->running_as == MANAGER_SYSTEM) {
1141 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE);
1145 /* Run the exit target if there is one, if not, just exit. */
1146 if (manager_start_target(m, SPECIAL_EXIT_TARGET, JOB_REPLACE) < 0) {
1147 m->exit_code = MANAGER_EXIT;
1154 if (m->running_as == MANAGER_SYSTEM)
1155 manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
1157 /* This is a nop on non-init */
1161 if (m->running_as == MANAGER_SYSTEM)
1162 manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
1164 /* This is a nop on non-init */
1170 u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
1172 if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
1173 log_info("Trying to reconnect to bus...");
1177 if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) {
1178 log_info("Loading D-Bus service...");
1179 manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
1190 if (!(f = open_memstream(&dump, &size))) {
1191 log_warning("Failed to allocate memory stream.");
1195 manager_dump_units(m, f, "\t");
1196 manager_dump_jobs(m, f, "\t");
1201 log_warning("Failed to write status stream");
1206 log_dump(LOG_INFO, dump);
1213 m->exit_code = MANAGER_RELOAD;
1218 /* Starting SIGRTMIN+0 */
1219 static const char * const target_table[] = {
1220 [0] = SPECIAL_DEFAULT_TARGET,
1221 [1] = SPECIAL_RESCUE_TARGET,
1222 [2] = SPECIAL_EMERGENCY_TARGET,
1223 [3] = SPECIAL_HALT_TARGET,
1224 [4] = SPECIAL_POWEROFF_TARGET,
1225 [5] = SPECIAL_REBOOT_TARGET,
1226 [6] = SPECIAL_KEXEC_TARGET
1229 /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
1230 static const ManagerExitCode code_table[] = {
1232 [1] = MANAGER_POWEROFF,
1233 [2] = MANAGER_REBOOT,
1237 if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
1238 (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
1239 int idx = (int) sfsi.ssi_signo - SIGRTMIN;
1240 manager_start_target(m, target_table[idx],
1241 (idx == 1 || idx == 2) ? JOB_ISOLATE : JOB_REPLACE);
1245 if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
1246 (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
1247 m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
1251 switch (sfsi.ssi_signo - SIGRTMIN) {
1254 log_debug("Enabling showing of status.");
1255 manager_set_show_status(m, true);
1259 log_debug("Disabling showing of status.");
1260 manager_set_show_status(m, false);
1264 log_set_max_level(LOG_DEBUG);
1265 log_notice("Setting log level to debug.");
1269 log_set_max_level(LOG_INFO);
1270 log_notice("Setting log level to info.");
1274 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1275 log_notice("Setting log target to journal-or-kmsg.");
1279 log_set_target(LOG_TARGET_CONSOLE);
1280 log_notice("Setting log target to console.");
1284 log_set_target(LOG_TARGET_KMSG);
1285 log_notice("Setting log target to kmsg.");
1289 log_set_target(LOG_TARGET_SYSLOG_OR_KMSG);
1290 log_notice("Setting log target to syslog-or-kmsg.");
1294 log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
1301 return manager_dispatch_sigchld(m);
1306 static int process_event(Manager *m, struct epoll_event *ev) {
1313 assert_se(w = ev->data.ptr);
1315 if (w->type == WATCH_INVALID)
1322 /* An incoming signal? */
1323 if (ev->events != EPOLLIN)
1326 if ((r = manager_process_signal_fd(m)) < 0)
1333 /* An incoming daemon notification event? */
1334 if (ev->events != EPOLLIN)
1337 if ((r = manager_process_notify_fd(m)) < 0)
1344 /* Some fd event, to be dispatched to the units */
1345 UNIT_VTABLE(w->data.unit)->fd_event(w->data.unit, w->fd, ev->events, w);
1348 case WATCH_UNIT_TIMER:
1349 case WATCH_JOB_TIMER: {
1353 /* Some timer event, to be dispatched to the units */
1354 if ((k = read(w->fd, &v, sizeof(v))) != sizeof(v)) {
1356 if (k < 0 && (errno == EINTR || errno == EAGAIN))
1359 return k < 0 ? -errno : -EIO;
1362 if (w->type == WATCH_UNIT_TIMER)
1363 UNIT_VTABLE(w->data.unit)->timer_event(w->data.unit, v, w);
1365 job_timer_event(w->data.job, v, w);
1370 /* Some mount table change, intended for the mount subsystem */
1371 mount_fd_event(m, ev->events);
1375 /* Some swap table change, intended for the swap subsystem */
1376 swap_fd_event(m, ev->events);
1380 /* Some notification from udev, intended for the device subsystem */
1381 device_fd_event(m, ev->events);
1384 case WATCH_DBUS_WATCH:
1385 bus_watch_event(m, w, ev->events);
1388 case WATCH_DBUS_TIMEOUT:
1389 bus_timeout_event(m, w, ev->events);
1393 log_error("event type=%i", w->type);
1394 assert_not_reached("Unknown epoll event type.");
1400 int manager_loop(Manager *m) {
1403 RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
1406 m->exit_code = MANAGER_RUNNING;
1408 /* Release the path cache */
1409 set_free_free(m->unit_path_cache);
1410 m->unit_path_cache = NULL;
1412 manager_check_finished(m);
1414 /* There might still be some zombies hanging around from
1415 * before we were exec()'ed. Leat's reap them */
1416 r = manager_dispatch_sigchld(m);
1420 while (m->exit_code == MANAGER_RUNNING) {
1421 struct epoll_event event;
1425 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM)
1428 if (!ratelimit_test(&rl)) {
1429 /* Yay, something is going seriously wrong, pause a little */
1430 log_warning("Looping too fast. Throttling execution a little.");
1435 if (manager_dispatch_load_queue(m) > 0)
1438 if (manager_dispatch_run_queue(m) > 0)
1441 if (bus_dispatch(m) > 0)
1444 if (manager_dispatch_cleanup_queue(m) > 0)
1447 if (manager_dispatch_gc_queue(m) > 0)
1450 if (manager_dispatch_dbus_queue(m) > 0)
1453 if (swap_dispatch_reload(m) > 0)
1456 /* Sleep for half the watchdog time */
1457 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM) {
1458 wait_msec = (int) (m->runtime_watchdog / 2 / USEC_PER_MSEC);
1464 n = epoll_wait(m->epoll_fd, &event, 1, wait_msec);
1476 r = process_event(m, &event);
1481 return m->exit_code;
1484 int manager_get_unit_from_dbus_path(Manager *m, const char *s, Unit **_u) {
1492 if (!startswith(s, "/org/freedesktop/systemd1/unit/"))
1495 if (!(n = bus_path_unescape(s+31)))
1498 u = manager_get_unit(m, n);
1509 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
1518 if (!startswith(s, "/org/freedesktop/systemd1/job/"))
1521 if ((r = safe_atou(s + 30, &id)) < 0)
1524 if (!(j = manager_get_job(m, id)))
1532 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
1537 if (m->audit_fd < 0)
1540 /* Don't generate audit events if the service was already
1541 * started and we're just deserializing */
1542 if (m->n_reloading > 0)
1545 if (m->running_as != MANAGER_SYSTEM)
1548 if (u->type != UNIT_SERVICE)
1551 if (!(p = unit_name_to_prefix_and_instance(u->id))) {
1552 log_error("Failed to allocate unit name for audit message: %s", strerror(ENOMEM));
1556 if (audit_log_user_comm_message(m->audit_fd, type, "", p, NULL, NULL, NULL, success) < 0) {
1557 if (errno == EPERM) {
1558 /* We aren't allowed to send audit messages?
1559 * Then let's not retry again. */
1560 audit_close(m->audit_fd);
1563 log_warning("Failed to send audit message: %m");
1571 void manager_send_unit_plymouth(Manager *m, Unit *u) {
1573 union sockaddr_union sa;
1575 char *message = NULL;
1577 /* Don't generate plymouth events if the service was already
1578 * started and we're just deserializing */
1579 if (m->n_reloading > 0)
1582 if (m->running_as != MANAGER_SYSTEM)
1585 if (u->type != UNIT_SERVICE &&
1586 u->type != UNIT_MOUNT &&
1587 u->type != UNIT_SWAP)
1590 /* We set SOCK_NONBLOCK here so that we rather drop the
1591 * message then wait for plymouth */
1592 if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
1593 log_error("socket() failed: %m");
1598 sa.sa.sa_family = AF_UNIX;
1599 strncpy(sa.un.sun_path+1, "/org/freedesktop/plymouthd", sizeof(sa.un.sun_path)-1);
1600 if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1)) < 0) {
1602 if (errno != EPIPE &&
1605 errno != ECONNREFUSED &&
1606 errno != ECONNRESET &&
1607 errno != ECONNABORTED)
1608 log_error("connect() failed: %m");
1613 if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
1614 log_error("Out of memory");
1619 if (write(fd, message, n + 1) != n + 1) {
1621 if (errno != EPIPE &&
1624 errno != ECONNREFUSED &&
1625 errno != ECONNRESET &&
1626 errno != ECONNABORTED)
1627 log_error("Failed to write Plymouth message: %m");
1634 close_nointr_nofail(fd);
1639 void manager_dispatch_bus_name_owner_changed(
1642 const char* old_owner,
1643 const char *new_owner) {
1650 if (!(u = hashmap_get(m->watch_bus, name)))
1653 UNIT_VTABLE(u)->bus_name_owner_change(u, name, old_owner, new_owner);
1656 void manager_dispatch_bus_query_pid_done(
1667 if (!(u = hashmap_get(m->watch_bus, name)))
1670 UNIT_VTABLE(u)->bus_query_pid_done(u, name, pid);
1673 int manager_open_serialization(Manager *m, FILE **_f) {
1681 if (m->running_as == MANAGER_SYSTEM)
1682 asprintf(&path, "/run/systemd/dump-%lu-XXXXXX", (unsigned long) getpid());
1684 asprintf(&path, "/tmp/systemd-dump-%lu-XXXXXX", (unsigned long) getpid());
1689 saved_umask = umask(0077);
1690 fd = mkostemp(path, O_RDWR|O_CLOEXEC);
1700 log_debug("Serializing state to %s", path);
1703 if (!(f = fdopen(fd, "w+")))
1711 int manager_serialize(Manager *m, FILE *f, FDSet *fds) {
1723 fprintf(f, "current-job-id=%i\n", m->current_job_id);
1724 fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
1726 dual_timestamp_serialize(f, "initrd-timestamp", &m->initrd_timestamp);
1727 dual_timestamp_serialize(f, "startup-timestamp", &m->startup_timestamp);
1728 dual_timestamp_serialize(f, "finish-timestamp", &m->finish_timestamp);
1732 HASHMAP_FOREACH_KEY(u, t, m->units, i) {
1736 if (!unit_can_serialize(u))
1743 if ((r = unit_serialize(u, f, fds)) < 0) {
1749 assert(m->n_reloading > 0);
1755 r = bus_fdset_add_all(m, fds);
1762 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
1768 log_debug("Deserializing state...");
1773 char line[LINE_MAX], *l;
1775 if (!fgets(line, sizeof(line), f)) {
1790 if (startswith(l, "current-job-id=")) {
1793 if (safe_atou32(l+15, &id) < 0)
1794 log_debug("Failed to parse current job id value %s", l+15);
1796 m->current_job_id = MAX(m->current_job_id, id);
1797 } else if (startswith(l, "taint-usr=")) {
1800 if ((b = parse_boolean(l+10)) < 0)
1801 log_debug("Failed to parse taint /usr flag %s", l+10);
1803 m->taint_usr = m->taint_usr || b;
1804 } else if (startswith(l, "initrd-timestamp="))
1805 dual_timestamp_deserialize(l+17, &m->initrd_timestamp);
1806 else if (startswith(l, "startup-timestamp="))
1807 dual_timestamp_deserialize(l+18, &m->startup_timestamp);
1808 else if (startswith(l, "finish-timestamp="))
1809 dual_timestamp_deserialize(l+17, &m->finish_timestamp);
1811 log_debug("Unknown serialization item '%s'", l);
1816 char name[UNIT_NAME_MAX+2];
1819 if (!fgets(name, sizeof(name), f)) {
1830 if ((r = manager_load_unit(m, strstrip(name), NULL, NULL, &u)) < 0)
1833 if ((r = unit_deserialize(u, f, fds)) < 0)
1843 assert(m->n_reloading > 0);
1849 int manager_reload(Manager *m) {
1856 if ((r = manager_open_serialization(m, &f)) < 0)
1861 if (!(fds = fdset_new())) {
1867 if ((r = manager_serialize(m, f, fds)) < 0) {
1872 if (fseeko(f, 0, SEEK_SET) < 0) {
1878 /* From here on there is no way back. */
1879 manager_clear_jobs_and_units(m);
1880 manager_undo_generators(m);
1882 /* Find new unit paths */
1883 lookup_paths_free(&m->lookup_paths);
1884 if ((q = lookup_paths_init(&m->lookup_paths, m->running_as, true)) < 0)
1887 manager_run_generators(m);
1889 manager_build_unit_path_cache(m);
1891 /* First, enumerate what we can from all config files */
1892 if ((q = manager_enumerate(m)) < 0)
1895 /* Second, deserialize our stored data */
1896 if ((q = manager_deserialize(m, f, fds)) < 0)
1902 /* Third, fire things up! */
1903 if ((q = manager_coldplug(m)) < 0)
1906 assert(m->n_reloading > 0);
1919 bool manager_is_booting_or_shutting_down(Manager *m) {
1924 /* Is the initial job still around? */
1925 if (manager_get_job(m, m->default_unit_job_id))
1928 /* Is there a job for the shutdown target? */
1929 u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
1936 void manager_reset_failed(Manager *m) {
1942 HASHMAP_FOREACH(u, m->units, i)
1943 unit_reset_failed(u);
1946 bool manager_unit_pending_inactive(Manager *m, const char *name) {
1952 /* Returns true if the unit is inactive or going down */
1953 if (!(u = manager_get_unit(m, name)))
1956 return unit_pending_inactive(u);
1959 void manager_check_finished(Manager *m) {
1960 char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
1961 usec_t kernel_usec, initrd_usec, userspace_usec, total_usec;
1965 if (dual_timestamp_is_set(&m->finish_timestamp))
1968 if (hashmap_size(m->jobs) > 0)
1971 dual_timestamp_get(&m->finish_timestamp);
1973 if (m->running_as == MANAGER_SYSTEM && detect_container(NULL) <= 0) {
1975 userspace_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
1976 total_usec = m->finish_timestamp.monotonic;
1978 if (dual_timestamp_is_set(&m->initrd_timestamp)) {
1980 kernel_usec = m->initrd_timestamp.monotonic;
1981 initrd_usec = m->startup_timestamp.monotonic - m->initrd_timestamp.monotonic;
1983 log_info("Startup finished in %s (kernel) + %s (initrd) + %s (userspace) = %s.",
1984 format_timespan(kernel, sizeof(kernel), kernel_usec),
1985 format_timespan(initrd, sizeof(initrd), initrd_usec),
1986 format_timespan(userspace, sizeof(userspace), userspace_usec),
1987 format_timespan(sum, sizeof(sum), total_usec));
1989 kernel_usec = m->startup_timestamp.monotonic;
1992 log_info("Startup finished in %s (kernel) + %s (userspace) = %s.",
1993 format_timespan(kernel, sizeof(kernel), kernel_usec),
1994 format_timespan(userspace, sizeof(userspace), userspace_usec),
1995 format_timespan(sum, sizeof(sum), total_usec));
1998 userspace_usec = initrd_usec = kernel_usec = 0;
1999 total_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
2001 log_debug("Startup finished in %s.",
2002 format_timespan(sum, sizeof(sum), total_usec));
2005 bus_broadcast_finished(m, kernel_usec, initrd_usec, userspace_usec, total_usec);
2008 "READY=1\nSTATUS=Startup finished in %s.",
2009 format_timespan(sum, sizeof(sum), total_usec));
2012 void manager_run_generators(Manager *m) {
2014 const char *generator_path;
2015 const char *argv[3];
2020 generator_path = m->running_as == MANAGER_SYSTEM ? SYSTEM_GENERATOR_PATH : USER_GENERATOR_PATH;
2021 if (!(d = opendir(generator_path))) {
2023 if (errno == ENOENT)
2026 log_error("Failed to enumerate generator directory: %m");
2030 if (!m->generator_unit_path) {
2032 char user_path[] = "/tmp/systemd-generator-XXXXXX";
2034 if (m->running_as == MANAGER_SYSTEM && getpid() == 1) {
2035 p = "/run/systemd/generator";
2037 if (mkdir_p(p, 0755) < 0) {
2038 log_error("Failed to create generator directory: %m");
2043 if (!(p = mkdtemp(user_path))) {
2044 log_error("Failed to create generator directory: %m");
2049 if (!(m->generator_unit_path = strdup(p))) {
2050 log_error("Failed to allocate generator unit path.");
2055 argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
2056 argv[1] = m->generator_unit_path;
2060 execute_directory(generator_path, d, (char**) argv);
2063 if (rmdir(m->generator_unit_path) >= 0) {
2064 /* Uh? we were able to remove this dir? I guess that
2065 * means the directory was empty, hence let's shortcut
2068 free(m->generator_unit_path);
2069 m->generator_unit_path = NULL;
2073 if (!strv_find(m->lookup_paths.unit_path, m->generator_unit_path)) {
2076 if (!(l = strv_append(m->lookup_paths.unit_path, m->generator_unit_path))) {
2077 log_error("Failed to add generator directory to unit search path: %m");
2081 strv_free(m->lookup_paths.unit_path);
2082 m->lookup_paths.unit_path = l;
2084 log_debug("Added generator unit path %s to search path.", m->generator_unit_path);
2092 void manager_undo_generators(Manager *m) {
2095 if (!m->generator_unit_path)
2098 strv_remove(m->lookup_paths.unit_path, m->generator_unit_path);
2099 rm_rf(m->generator_unit_path, false, true, false);
2101 free(m->generator_unit_path);
2102 m->generator_unit_path = NULL;
2105 int manager_set_default_controllers(Manager *m, char **controllers) {
2110 l = strv_copy(controllers);
2114 strv_free(m->default_controllers);
2115 m->default_controllers = l;
2117 cg_shorten_controllers(m->default_controllers);
2122 void manager_recheck_journal(Manager *m) {
2127 if (m->running_as != MANAGER_SYSTEM)
2130 u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
2131 if (u && SOCKET(u)->state != SOCKET_RUNNING) {
2132 log_close_journal();
2136 u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
2137 if (u && SERVICE(u)->state != SERVICE_RUNNING) {
2138 log_close_journal();
2142 /* Hmm, OK, so the socket is fully up and the service is up
2143 * too, then let's make use of the thing. */
2147 void manager_set_show_status(Manager *m, bool b) {
2150 if (m->running_as != MANAGER_SYSTEM)
2156 touch("/run/systemd/show-status");
2158 unlink("/run/systemd/show-status");
2161 bool manager_get_show_status(Manager *m) {
2164 if (m->running_as != MANAGER_SYSTEM)
2170 /* If Plymouth is running make sure we show the status, so
2171 * that there's something nice to see when people press Esc */
2173 return plymouth_running();
2176 static const char* const manager_running_as_table[_MANAGER_RUNNING_AS_MAX] = {
2177 [MANAGER_SYSTEM] = "system",
2178 [MANAGER_USER] = "user"
2181 DEFINE_STRING_TABLE_LOOKUP(manager_running_as, ManagerRunningAs);