X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fcore%2Fshutdown.c;h=9189cfb1ade3eed754249518501bb374be09c075;hp=cd478b0349901e6a8762bea0adcc6f8adc190f73;hb=b1e90ec515408aec2702522f6f68c4920b56375b;hpb=146de8175e11c877908f20f8ed1b5a3805455c0a diff --git a/src/core/shutdown.c b/src/core/shutdown.c index cd478b034..9189cfb1a 100644 --- a/src/core/shutdown.c +++ b/src/core/shutdown.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -36,185 +35,107 @@ #include #include #include +#include #include "missing.h" #include "log.h" +#include "fileio.h" #include "umount.h" #include "util.h" #include "mkdir.h" #include "virt.h" #include "watchdog.h" +#include "killall.h" +#include "cgroup-util.h" +#include "def.h" -#define TIMEOUT_USEC (5 * USEC_PER_SEC) #define FINALIZE_ATTEMPTS 50 -static bool ignore_proc(pid_t pid) { - char buf[PATH_MAX]; - FILE *f; - char c; - size_t count; - uid_t uid; - int r; - - /* We are PID 1, let's not commit suicide */ - if (pid == 1) - return true; - - r = get_process_uid(pid, &uid); - if (r < 0) - return true; /* not really, but better safe than sorry */ - - /* Non-root processes otherwise are always subject to be killed */ - if (uid != 0) - return false; - - snprintf(buf, sizeof(buf), "/proc/%lu/cmdline", (unsigned long) pid); - char_array_0(buf); +static char* arg_verb; - f = fopen(buf, "re"); - if (!f) - return true; /* not really, but has the desired effect */ +static int parse_argv(int argc, char *argv[]) { + enum { + ARG_LOG_LEVEL = 0x100, + ARG_LOG_TARGET, + ARG_LOG_COLOR, + ARG_LOG_LOCATION, + }; - count = fread(&c, 1, 1, f); - fclose(f); + static const struct option options[] = { + { "log-level", required_argument, NULL, ARG_LOG_LEVEL }, + { "log-target", required_argument, NULL, ARG_LOG_TARGET }, + { "log-color", optional_argument, NULL, ARG_LOG_COLOR }, + { "log-location", optional_argument, NULL, ARG_LOG_LOCATION }, + {} + }; - /* Kernel threads have an empty cmdline */ - if (count <= 0) - return true; + int c, r; - /* Processes with argv[0][0] = '@' we ignore from the killing - * spree. - * - * http://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons */ - if (count == 1 && c == '@') - return true; - - return false; -} + assert(argc >= 1); + assert(argv); -static int killall(int sign) { - DIR *dir; - struct dirent *d; - unsigned int n_processes = 0; + opterr = 0; - dir = opendir("/proc"); - if (!dir) - return -errno; - - while ((d = readdir(dir))) { - pid_t pid; + while ((c = getopt_long(argc, argv, ":", options, NULL)) >= 0) + switch (c) { - if (parse_pid(d->d_name, &pid) < 0) - continue; + case ARG_LOG_LEVEL: + r = log_set_max_level_from_string(optarg); + if (r < 0) + log_error("Failed to parse log level %s, ignoring.", optarg); - if (ignore_proc(pid)) - continue; + break; - if (kill(pid, sign) == 0) - n_processes++; - else - log_warning("Could not kill %d: %m", pid); - } + case ARG_LOG_TARGET: + r = log_set_target_from_string(optarg); + if (r < 0) + log_error("Failed to parse log target %s, ignoring", optarg); - closedir(dir); + break; - return n_processes; -} + case ARG_LOG_COLOR: -static void wait_for_children(int n_processes, sigset_t *mask) { - usec_t until; + if (optarg) { + r = log_show_color_from_string(optarg); + if (r < 0) + log_error("Failed to parse log color setting %s, ignoring", optarg); + } else + log_show_color(true); - assert(mask); + break; - until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC; - for (;;) { - struct timespec ts; - int k; - usec_t n; + case ARG_LOG_LOCATION: + if (optarg) { + r = log_show_location_from_string(optarg); + if (r < 0) + log_error("Failed to parse log location setting %s, ignoring", optarg); + } else + log_show_location(true); - for (;;) { - pid_t pid = waitpid(-1, NULL, WNOHANG); + break; - if (pid == 0) - break; + case '?': + log_error("Unknown option %s.", argv[optind-1]); + return -EINVAL; - if (pid < 0 && errno == ECHILD) - return; + case ':': + log_error("Missing argument to %s.", argv[optind-1]); + return -EINVAL; - if (n_processes > 0) - if (--n_processes == 0) - return; + default: + assert_not_reached("Unhandled option code."); } - n = now(CLOCK_MONOTONIC); - if (n >= until) - return; - - timespec_store(&ts, until - n); - - if ((k = sigtimedwait(mask, NULL, &ts)) != SIGCHLD) { - - if (k < 0 && errno != EAGAIN) { - log_error("sigtimedwait() failed: %m"); - return; - } - - if (k >= 0) - log_warning("sigtimedwait() returned unexpected signal."); - } + if (optind >= argc) { + log_error("Verb argument missing."); + return -EINVAL; } -} -static void send_signal(int sign) { - sigset_t mask, oldmask; - int n_processes; + arg_verb = argv[optind]; - assert_se(sigemptyset(&mask) == 0); - assert_se(sigaddset(&mask, SIGCHLD) == 0); - assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0); - - if (kill(-1, SIGSTOP) < 0 && errno != ESRCH) - log_warning("kill(-1, SIGSTOP) failed: %m"); - - n_processes = killall(sign); - - if (kill(-1, SIGCONT) < 0 && errno != ESRCH) - log_warning("kill(-1, SIGCONT) failed: %m"); - - if (n_processes <= 0) - goto finish; - - wait_for_children(n_processes, &mask); - -finish: - sigprocmask(SIG_SETMASK, &oldmask, NULL); -} - -static void ultimate_send_signal(int sign) { - sigset_t mask, oldmask; - int r; - - assert_se(sigemptyset(&mask) == 0); - assert_se(sigaddset(&mask, SIGCHLD) == 0); - assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0); - - if (kill(-1, SIGSTOP) < 0 && errno != ESRCH) - log_warning("kill(-1, SIGSTOP) failed: %m"); - - r = kill(-1, sign); - if (r < 0 && errno != ESRCH) - log_warning("kill(-1, %s) failed: %m", signal_to_string(sign)); - - if (kill(-1, SIGCONT) < 0 && errno != ESRCH) - log_warning("kill(-1, SIGCONT) failed: %m"); - - if (r < 0) - goto finish; - - wait_for_children(0, &mask); - -finish: - sigprocmask(SIG_SETMASK, &oldmask, NULL); + if (optind + 1 < argc) + log_error("Excess arguments, ignoring"); + return 0; } static int prepare_new_root(void) { @@ -238,7 +159,7 @@ static int prepare_new_root(void) { } NULSTR_FOREACH(dir, dirs) - if (mkdir_p(dir, 0755) < 0 && errno != EEXIST) { + if (mkdir_p_label(dir, 0755) < 0 && errno != EEXIST) { log_error("Failed to mkdir %s: %m", dir); return -errno; } @@ -267,97 +188,98 @@ static int prepare_new_root(void) { } static int pivot_to_new_root(void) { - int fd; - - chdir("/run/initramfs"); - /* - In case some evil process made "/" MS_SHARED - It works for pivot_root, but the ref count for the root device - is not decreasing :-/ - */ - if (mount(NULL, "/", NULL, MS_PRIVATE, NULL) < 0) { - log_error("Failed to make \"/\" private mount %m"); + if (chdir("/run/initramfs") < 0) { + log_error("Failed to change directory to /run/initramfs: %m"); return -errno; } + /* Work-around for a kernel bug: for some reason the kernel + * refuses switching root if any file systems are mounted + * MS_SHARED. Hence remount them MS_PRIVATE here as a + * work-around. + * + * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */ + if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) + log_warning("Failed to make \"/\" private mount: %m"); + if (pivot_root(".", "oldroot") < 0) { log_error("pivot failed: %m"); - /* only chroot if pivot root succeded */ + /* only chroot if pivot root succeeded */ return -errno; } chroot("."); - log_info("Successfully changed into root pivot."); - fd = open("/dev/console", O_RDWR); - if (fd < 0) - log_error("Failed to open /dev/console: %m"); - else { - make_stdio(fd); + setsid(); + make_console_stdio(); - /* Initialize the controlling terminal */ - setsid(); - ioctl(STDIN_FILENO, TIOCSCTTY, NULL); - } + log_info("Successfully changed into root pivot."); return 0; } int main(int argc, char *argv[]) { - int cmd, r; - unsigned retries; bool need_umount = true, need_swapoff = true, need_loop_detach = true, need_dm_detach = true; - bool killed_everbody = false, in_container, use_watchdog = false; + bool in_container, use_watchdog = false; + _cleanup_free_ char *line = NULL, *cgroup = NULL; + char *arguments[3]; + unsigned retries; + int cmd, r; log_parse_environment(); - log_set_target(LOG_TARGET_CONSOLE); /* syslog will die if not gone yet */ + r = parse_argv(argc, argv); + if (r < 0) + goto error; + + /* journald will die if not gone yet. The log target defaults + * to console, but may have been changed by commandline options. */ + + log_close_console(); /* force reopen of /dev/console */ log_open(); umask(0022); if (getpid() != 1) { - log_error("Not executed by init (pid 1)."); + log_error("Not executed by init (PID 1)."); r = -EPERM; goto error; } - if (argc != 2) { - log_error("Invalid number of arguments."); - r = -EINVAL; - goto error; - } - in_container = detect_container(NULL) > 0; - if (streq(argv[1], "reboot")) + if (streq(arg_verb, "reboot")) cmd = RB_AUTOBOOT; - else if (streq(argv[1], "poweroff")) + else if (streq(arg_verb, "poweroff")) cmd = RB_POWER_OFF; - else if (streq(argv[1], "halt")) + else if (streq(arg_verb, "halt")) cmd = RB_HALT_SYSTEM; - else if (streq(argv[1], "kexec")) + else if (streq(arg_verb, "kexec")) cmd = LINUX_REBOOT_CMD_KEXEC; else { - log_error("Unknown action '%s'.", argv[1]); r = -EINVAL; + log_error("Unknown action '%s'.", arg_verb); goto error; } + cg_get_root_path(&cgroup); + use_watchdog = !!getenv("WATCHDOG_USEC"); /* lock us into memory */ - if (mlockall(MCL_CURRENT|MCL_FUTURE) != 0) - log_warning("Cannot lock process memory: %m"); + mlockall(MCL_CURRENT|MCL_FUTURE); log_info("Sending SIGTERM to remaining processes..."); - send_signal(SIGTERM); + broadcast_signal(SIGTERM, true, true); log_info("Sending SIGKILL to remaining processes..."); - send_signal(SIGKILL); + broadcast_signal(SIGKILL, true, false); - if (in_container) + if (in_container) { need_swapoff = false; + need_dm_detach = false; + need_loop_detach = false; + } /* Unmount all mountpoints, swaps, and loopback devices */ for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) { @@ -366,34 +288,44 @@ int main(int argc, char *argv[]) { if (use_watchdog) watchdog_ping(); + /* Let's trim the cgroup tree on each iteration so + that we leave an empty cgroup tree around, so that + container managers get a nice notify event when we + are down */ + if (cgroup) + cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false); + if (need_umount) { log_info("Unmounting file systems."); r = umount_all(&changed); - if (r == 0) + if (r == 0) { need_umount = false; - else if (r > 0) + log_info("All filesystems unmounted."); + } else if (r > 0) log_info("Not all file systems unmounted, %d left.", r); else log_error("Failed to unmount file systems: %s", strerror(-r)); } if (need_swapoff) { - log_info("Disabling swaps."); + log_info("Deactivating swaps."); r = swapoff_all(&changed); - if (r == 0) + if (r == 0) { need_swapoff = false; - else if (r > 0) - log_info("Not all swaps are turned off, %d left.", r); + log_info("All swaps deactivated."); + } else if (r > 0) + log_info("Not all swaps deactivated, %d left.", r); else - log_error("Failed to turn off swaps: %s", strerror(-r)); + log_error("Failed to deactivate swaps: %s", strerror(-r)); } if (need_loop_detach) { log_info("Detaching loop devices."); r = loopback_detach_all(&changed); - if (r == 0) + if (r == 0) { need_loop_detach = false; - else if (r > 0) + log_info("All loop devices detached."); + } else if (r > 0) log_info("Not all loop devices detached, %d left.", r); else log_error("Failed to detach loop devices: %s", strerror(-r)); @@ -402,10 +334,11 @@ int main(int argc, char *argv[]) { if (need_dm_detach) { log_info("Detaching DM devices."); r = dm_detach_all(&changed); - if (r == 0) + if (r == 0) { need_dm_detach = false; - else if (r > 0) - log_warning("Not all DM devices detached, %d left.", r); + log_info("All DM devices detached."); + } else if (r > 0) + log_info("Not all DM devices detached, %d left.", r); else log_error("Failed to detach DM devices: %s", strerror(-r)); } @@ -418,21 +351,10 @@ int main(int argc, char *argv[]) { } /* If in this iteration we didn't manage to - * unmount/deactivate anything, we either kill more - * processes, or simply give up */ + * unmount/deactivate anything, we simply give up */ if (!changed) { - - if (killed_everbody) { - /* Hmm, we already killed everybody, - * let's just give up */ - log_error("Cannot finalize remaining file systems and devices, giving up."); - break; - } - - log_warning("Cannot finalize remaining file systems and devices, trying to kill remaining processes."); - ultimate_send_signal(SIGTERM); - ultimate_send_signal(SIGKILL); - killed_everbody = true; + log_error("Cannot finalize remaining file systems and devices, giving up."); + break; } log_debug("Couldn't finalize remaining file systems and devices after %u retries, trying again.", retries+1); @@ -440,47 +362,101 @@ int main(int argc, char *argv[]) { if (retries >= FINALIZE_ATTEMPTS) log_error("Too many iterations, giving up."); + else + log_info("Storage is finalized."); - execute_directory(SYSTEM_SHUTDOWN_PATH, NULL, NULL); + arguments[0] = NULL; + arguments[1] = arg_verb; + arguments[2] = NULL; + execute_directory(SYSTEM_SHUTDOWN_PATH, NULL, arguments); - /* If we are in a container, just exit, this will kill our - * container for good. */ - if (in_container) { - log_error("Exiting container."); - exit(0); - } - - if (access("/run/initramfs/shutdown", X_OK) == 0) { + if (!in_container && !in_initrd() && + access("/run/initramfs/shutdown", X_OK) == 0) { if (prepare_new_root() >= 0 && pivot_to_new_root() >= 0) { - execv("/shutdown", argv); + arguments[0] = (char*) "/shutdown"; + + log_info("Returning to initrd..."); + + execv("/shutdown", arguments); log_error("Failed to execute shutdown binary: %m"); } } - sync(); - - if (cmd == LINUX_REBOOT_CMD_KEXEC) { - /* We cheat and exec kexec to avoid doing all its work */ - pid_t pid = fork(); - - if (pid < 0) - log_error("Could not fork: %m. Falling back to normal reboot."); - else if (pid > 0) { - wait_for_terminate_and_warn("kexec", pid); - log_warning("kexec failed. Falling back to normal reboot."); - } else { - /* Child */ - const char *args[3] = { "/sbin/kexec", "-e", NULL }; - execv(args[0], (char * const *) args); - return EXIT_FAILURE; + /* The kernel will automaticall flush ATA disks and suchlike + * on reboot(), but the file systems need to be synce'd + * explicitly in advance. So let's do this here, but not + * needlessly slow down containers. */ + if (!in_container) + sync(); + + switch (cmd) { + + case LINUX_REBOOT_CMD_KEXEC: + + if (!in_container) { + /* We cheat and exec kexec to avoid doing all its work */ + pid_t pid; + + log_info("Rebooting with kexec."); + + pid = fork(); + if (pid < 0) + log_error("Failed to fork: %m"); + else if (pid == 0) { + + const char * const args[] = { + KEXEC, "-e", NULL + }; + + /* Child */ + + execv(args[0], (char * const *) args); + _exit(EXIT_FAILURE); + } else + wait_for_terminate_and_warn("kexec", pid); } cmd = RB_AUTOBOOT; + /* Fall through */ + + case RB_AUTOBOOT: + + if (!in_container) { + _cleanup_free_ char *param = NULL; + + if (read_one_line_file(REBOOT_PARAM_FILE, ¶m) >= 0) { + log_info("Rebooting with argument '%s'.", param); + syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, + LINUX_REBOOT_CMD_RESTART2, param); + } + } + + log_info("Rebooting."); + break; + + case RB_POWER_OFF: + log_info("Powering off."); + break; + + case RB_HALT_SYSTEM: + log_info("Halting system."); + break; + + default: + assert_not_reached("Unknown magic"); } reboot(cmd); + if (errno == EPERM && in_container) { + /* If we are in a container, and we lacked + * CAP_SYS_BOOT just exit, this will kill our + * container for good. */ + log_info("Exiting container."); + exit(0); + } + log_error("Failed to invoke reboot(): %m"); r = -errno; @@ -488,5 +464,4 @@ int main(int argc, char *argv[]) { log_error("Critical error while doing system shutdown: %s", strerror(-r)); freeze(); - return EXIT_FAILURE; }