chiark / gitweb /
journal: properly implement matching with multiple matches
[elogind.git] / src / shutdown.c
index 2c14371df2692bf0b9105ccbc133cf7a7bc68309..11213f9d595a288c57e3d803d3ed8d21ff16a115 100644 (file)
 #include <sys/reboot.h>
 #include <linux/reboot.h>
 #include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
 #include <dirent.h>
 #include <errno.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
 
+#include "missing.h"
 #include "log.h"
 #include "umount.h"
 #include "util.h"
+#include "virt.h"
 
 #define TIMEOUT_USEC (5 * USEC_PER_SEC)
 #define FINALIZE_ATTEMPTS 50
-#define FINALIZE_CRITICAL_ATTEMPTS 10
 
 static bool ignore_proc(pid_t pid) {
         if (pid == 1)
@@ -69,7 +75,7 @@ static bool is_kernel_thread(pid_t pid)
 static int killall(int sign) {
         DIR *dir;
         struct dirent *d;
-        unsigned int processes = 0;
+        unsigned int n_processes = 0;
 
         if ((dir = opendir("/proc")) == NULL)
                 return -errno;
@@ -87,125 +93,211 @@ static int killall(int sign) {
                         continue;
 
                 if (kill(pid, sign) == 0)
-                        processes++;
+                        n_processes++;
                 else
                         log_warning("Could not kill %d: %m", pid);
         }
 
         closedir(dir);
 
-        return processes;
+        return n_processes;
 }
 
-static int send_signal(int sign) {
-        sigset_t mask, oldmask;
+static void wait_for_children(int n_processes, sigset_t *mask) {
         usec_t until;
-        int processes;
-        struct timespec ts;
-
-        assert_se(sigemptyset(&mask) == 0);
-        assert_se(sigaddset(&mask, SIGCHLD) == 0);
-        if (sigprocmask(SIG_BLOCK, &mask, &oldmask) != 0)
-                return -errno;
-
-        if (kill(-1, SIGSTOP) < 0)
-                log_warning("Failed kill(-1, SIGSTOP): %m");
 
-        processes = killall(sign);
-
-        if (kill(-1, SIGCONT) < 0)
-                log_warning("Failed kill(-1, SIGCONT): %m");
-
-        if (processes <= 0)
-                goto finish;
+        assert(mask);
 
         until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
         for (;;) {
-                usec_t n = now(CLOCK_MONOTONIC);
+                struct timespec ts;
+                int k;
+                usec_t n;
+
                 for (;;) {
                         pid_t pid = waitpid(-1, NULL, WNOHANG);
+
                         if (pid == 0)
                                 break;
-                        else if (pid < 0 && errno == ECHILD) {
-                                processes = 0;
-                                goto finish;
-                        }
 
-                        if (--processes == 0)
-                                goto finish;
+                        if (pid < 0 && errno == ECHILD)
+                                return;
+
+                        if (n_processes > 0)
+                                if (--n_processes == 0)
+                                        return;
                 }
 
+                n = now(CLOCK_MONOTONIC);
                 if (n >= until)
-                        goto finish;
+                        return;
 
                 timespec_store(&ts, until - n);
-                if (sigtimedwait(&mask, NULL, &ts) != SIGCHLD)
-                        log_warning("Failed: sigtimedwait did not return SIGCHLD: %m");
+
+                if ((k = sigtimedwait(mask, NULL, &ts)) != SIGCHLD) {
+
+                        if (k < 0 && errno != EAGAIN) {
+                                log_error("sigtimedwait() failed: %m");
+                                return;
+                        }
+
+                        if (k >= 0)
+                                log_warning("sigtimedwait() returned unexpected signal.");
+                }
         }
+}
+
+static void send_signal(int sign) {
+        sigset_t mask, oldmask;
+        int n_processes;
+
+        assert_se(sigemptyset(&mask) == 0);
+        assert_se(sigaddset(&mask, SIGCHLD) == 0);
+        assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0);
+
+        if (kill(-1, SIGSTOP) < 0 && errno != ESRCH)
+                log_warning("kill(-1, SIGSTOP) failed: %m");
+
+        n_processes = killall(sign);
+
+        if (kill(-1, SIGCONT) < 0 && errno != ESRCH)
+                log_warning("kill(-1, SIGCONT) failed: %m");
+
+        if (n_processes <= 0)
+                goto finish;
+
+        wait_for_children(n_processes, &mask);
 
 finish:
         sigprocmask(SIG_SETMASK, &oldmask, NULL);
-
-        return processes;
 }
 
-static int rescue_send_signal(int sign) {
+static void ultimate_send_signal(int sign) {
         sigset_t mask, oldmask;
-        usec_t until;
-        struct timespec ts;
         int r;
 
-        sigemptyset(&mask);
-        sigaddset(&mask, SIGCHLD);
-        if (sigprocmask(SIG_BLOCK, &mask, &oldmask) != 0)
-                return -errno;
+        assert_se(sigemptyset(&mask) == 0);
+        assert_se(sigaddset(&mask, SIGCHLD) == 0);
+        assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0);
 
-        if (kill(-1, SIGSTOP) < 0)
-                log_warning("Failed kill(-1, SIGSTOP): %m");
+        if (kill(-1, SIGSTOP) < 0 && errno != ESRCH)
+                log_warning("kill(-1, SIGSTOP) failed: %m");
 
         r = kill(-1, sign);
-        if (r < 0)
-                log_warning("Failed kill(-1, %d): %m", sign);
+        if (r < 0 && errno != ESRCH)
+                log_warning("kill(-1, %s) failed: %m", signal_to_string(sign));
 
-        if (kill(-1, SIGCONT) < 0)
-                log_warning("Failed kill(-1, SIGCONT): %m");
+        if (kill(-1, SIGCONT) < 0 && errno != ESRCH)
+                log_warning("kill(-1, SIGCONT) failed: %m");
 
         if (r < 0)
                 goto finish;
 
-        until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
-        for (;;) {
-                usec_t n = now(CLOCK_MONOTONIC);
-                for (;;) {
-                        pid_t pid = waitpid(-1, NULL, WNOHANG);
-                        if (pid == 0)
-                                break;
-                        else if (pid < 0 && errno == ECHILD)
-                                goto finish;
+        wait_for_children(0, &mask);
+
+finish:
+        sigprocmask(SIG_SETMASK, &oldmask, NULL);
+}
+
+static int prepare_new_root(void) {
+        static const char dirs[] =
+                "/run/initramfs/oldroot\0"
+                "/run/initramfs/proc\0"
+                "/run/initramfs/sys\0"
+                "/run/initramfs/dev\0"
+                "/run/initramfs/run\0";
+
+        const char *dir;
+
+        if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0) {
+                log_error("Failed to mount bind /run/initramfs on /run/initramfs: %m");
+                return -errno;
+        }
+
+        if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0) {
+                log_error("Failed to make /run/initramfs private mount: %m");
+                return -errno;
+        }
+
+        NULSTR_FOREACH(dir, dirs)
+                if (mkdir_p(dir, 0755) < 0 && errno != EEXIST) {
+                        log_error("Failed to mkdir %s: %m", dir);
+                        return -errno;
                 }
 
-                if (n >= until)
-                        goto finish;
+        if (mount("/sys", "/run/initramfs/sys", NULL, MS_BIND, NULL) < 0) {
+                log_error("Failed to mount bind /sys on /run/initramfs/sys: %m");
+                return -errno;
+        }
 
-                timespec_store(&ts, until - n);
-                if (sigtimedwait(&mask, NULL, &ts) != SIGCHLD)
-                        log_warning("Failed: sigtimedwait did not return SIGCHLD: %m");
+        if (mount("/proc", "/run/initramfs/proc", NULL, MS_BIND, NULL) < 0) {
+                log_error("Failed to mount bind /proc on /run/initramfs/proc: %m");
+                return -errno;
         }
 
-finish:
-        sigprocmask(SIG_SETMASK, &oldmask, NULL);
+        if (mount("/dev", "/run/initramfs/dev", NULL, MS_BIND, NULL) < 0) {
+                log_error("Failed to mount bind /dev on /run/initramfs/dev: %m");
+                return -errno;
+        }
+
+        if (mount("/run", "/run/initramfs/run", NULL, MS_BIND, NULL) < 0) {
+                log_error("Failed to mount bind /run on /run/initramfs/run: %m");
+                return -errno;
+        }
+
+        return 0;
+}
+
+static int pivot_to_new_root(void) {
+        int fd;
+
+        chdir("/run/initramfs");
+
+        /*
+          In case some evil process made "/" MS_SHARED
+          It works for pivot_root, but the ref count for the root device
+          is not decreasing :-/
+        */
+        if (mount(NULL, "/", NULL, MS_PRIVATE, NULL) < 0) {
+                log_error("Failed to make \"/\" private mount %m");
+                return -errno;
+        }
+
+        if (pivot_root(".", "oldroot") < 0) {
+                log_error("pivot failed: %m");
+                /* only chroot if pivot root succeded */
+                return -errno;
+        }
+
+        chroot(".");
+        log_info("Successfully changed into root pivot.");
+
+        fd = open("/dev/console", O_RDWR);
+        if (fd < 0)
+                log_error("Failed to open /dev/console: %m");
+        else {
+                make_stdio(fd);
+
+                /* Initialize the controlling terminal */
+                setsid();
+                ioctl(STDIN_FILENO, TIOCSCTTY, NULL);
+        }
 
-        return r;
+        return 0;
 }
 
 int main(int argc, char *argv[]) {
-        int cmd, r, retries;
+        int cmd, r;
+        unsigned retries;
         bool need_umount = true, need_swapoff = true, need_loop_detach = true, need_dm_detach = true;
+        bool killed_everbody = false, in_container;
 
         log_parse_environment();
         log_set_target(LOG_TARGET_CONSOLE); /* syslog will die if not gone yet */
         log_open();
 
+        umask(0022);
+
         if (getpid() != 1) {
                 log_error("Not executed by init (pid 1).");
                 r = -EPERM;
@@ -218,6 +310,8 @@ int main(int argc, char *argv[]) {
                 goto error;
         }
 
+        in_container = detect_container(NULL) > 0;
+
         if (streq(argv[1], "reboot"))
                 cmd = RB_AUTOBOOT;
         else if (streq(argv[1], "poweroff"))
@@ -236,88 +330,118 @@ int main(int argc, char *argv[]) {
         if (mlockall(MCL_CURRENT|MCL_FUTURE) != 0)
                 log_warning("Cannot lock process memory: %m");
 
-        log_info("Sending SIGTERM to processes");
-        r = send_signal(SIGTERM);
-        if (r < 0)
-                log_warning("Cannot send SIGTERM to all process: %s", strerror(r));
+        log_info("Sending SIGTERM to remaining processes...");
+        send_signal(SIGTERM);
 
-        log_info("Sending SIGKILL to processes");
-        r = send_signal(SIGKILL);
-        if (r < 0)
-                log_warning("Cannot send SIGKILL to all process: %s", strerror(r));
+        log_info("Sending SIGKILL to remaining processes...");
+        send_signal(SIGKILL);
+
+        if (in_container)
+                need_swapoff = false;
 
         /* Unmount all mountpoints, swaps, and loopback devices */
-        retries = FINALIZE_ATTEMPTS;
-        for (;;) {
+        for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) {
+                bool changed = false;
+
                 if (need_umount) {
-                        log_info("Unmounting filesystems.");
-                        r = umount_all();
+                        log_info("Unmounting file systems.");
+                        r = umount_all(&changed);
                         if (r == 0)
                                 need_umount = false;
                         else if (r > 0)
-                                log_warning("Not all filesystems unmounted, %d left.", r);
+                                log_info("Not all file systems unmounted, %d left.", r);
                         else
-                                log_error("Error unmounting filesystems: %s", strerror(-r));
+                                log_error("Failed to unmount file systems: %s", strerror(-r));
                 }
 
                 if (need_swapoff) {
                         log_info("Disabling swaps.");
-                        r = swapoff_all();
+                        r = swapoff_all(&changed);
                         if (r == 0)
                                 need_swapoff = false;
                         else if (r > 0)
-                                log_warning("Not all swaps are off, %d left.", r);
+                                log_info("Not all swaps are turned off, %d left.", r);
                         else
-                                log_error("Error turning off swaps: %s", strerror(-r));
+                                log_error("Failed to turn off swaps: %s", strerror(-r));
                 }
 
                 if (need_loop_detach) {
                         log_info("Detaching loop devices.");
-                        r = loopback_detach_all();
+                        r = loopback_detach_all(&changed);
                         if (r == 0)
                                 need_loop_detach = false;
                         else if (r > 0)
-                                log_warning("Not all loop devices detached, %d left.", r);
+                                log_info("Not all loop devices detached, %d left.", r);
                         else
-                                log_error("Error detaching loop devices: %s", strerror(-r));
+                                log_error("Failed to detach loop devices: %s", strerror(-r));
                 }
 
                 if (need_dm_detach) {
                         log_info("Detaching DM devices.");
-                        r = dm_detach_all();
+                        r = dm_detach_all(&changed);
                         if (r == 0)
                                 need_dm_detach = false;
                         else if (r > 0)
-                                log_warning("Not all dm devices detached, %d left.", r);
+                                log_warning("Not all DM devices detached, %d left.", r);
                         else
-                                log_error("Error detaching dm devices: %s", strerror(-r));
+                                log_error("Failed to detach DM devices: %s", strerror(-r));
                 }
 
-                if (need_umount || need_swapoff || need_loop_detach || need_dm_detach) {
-                        retries--;
+                if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
+                        if (retries > 0)
+                                log_info("All filesystems, swaps, loop devices, DM devices detached.");
+                        /* Yay, done */
+                        break;
+                }
 
-                        if (retries == FINALIZE_CRITICAL_ATTEMPTS) {
-                                log_warning("Approaching critical level to finalize filesystem and devices, try to kill all processes.");
-                                rescue_send_signal(SIGTERM);
-                                rescue_send_signal(SIGKILL);
-                        }
+                /* If in this iteration we didn't manage to
+                 * unmount/deactivate anything, we either kill more
+                 * processes, or simply give up */
+                if (!changed) {
 
-                        if (retries > 0)
-                                log_info("Action still required, %d tries left.", retries);
-                        else {
-                                log_error("Giving up. Actions left: Umount=%s, Swap off=%s, Loop detach=%s, dm detach=%s",
-                                          yes_no(need_umount), yes_no(need_swapoff), yes_no(need_loop_detach), yes_no(need_dm_detach));
+                        if (killed_everbody) {
+                                /* Hmm, we already killed everybody,
+                                 * let's just give up */
+                                log_error("Cannot finalize remaining file systems and devices, giving up.");
                                 break;
                         }
-                } else
-                        break;
+
+                        log_warning("Cannot finalize remaining file systems and devices, trying to kill remaining processes.");
+                        ultimate_send_signal(SIGTERM);
+                        ultimate_send_signal(SIGKILL);
+                        killed_everbody = true;
+                }
+
+                log_debug("Couldn't finalize remaining file systems and devices after %u retries, trying again.", retries+1);
+        }
+
+        if (retries >= FINALIZE_ATTEMPTS)
+                log_error("Too many iterations, giving up.");
+
+        execute_directory(SYSTEM_SHUTDOWN_PATH, NULL, NULL);
+
+        /* If we are in a container, just exit, this will kill our
+         * container for good. */
+        if (in_container) {
+                log_error("Exiting container.");
+                exit(0);
+        }
+
+        if (access("/run/initramfs/shutdown", X_OK) == 0) {
+
+                if (prepare_new_root() >= 0 &&
+                    pivot_to_new_root() >= 0) {
+                        execv("/shutdown", argv);
+                        log_error("Failed to execute shutdown binary: %m");
+                }
         }
 
         sync();
 
         if (cmd == LINUX_REBOOT_CMD_KEXEC) {
-                /* we cheat and exec kexec to avoid doing all its work */
+                /* We cheat and exec kexec to avoid doing all its work */
                 pid_t pid = fork();
+
                 if (pid < 0)
                         log_error("Could not fork: %m. Falling back to normal reboot.");
                 else if (pid > 0) {
@@ -325,7 +449,7 @@ int main(int argc, char *argv[]) {
                         log_warning("kexec failed. Falling back to normal reboot.");
                 } else {
                         /* Child */
-                        const char *args[5] = { KEXEC_BINARY_PATH, "-e", "-f", "-x", NULL };
+                        const char *args[3] = { "/sbin/kexec", "-e", NULL };
                         execv(args[0], (char * const *) args);
                         return EXIT_FAILURE;
                 }
@@ -338,7 +462,6 @@ int main(int argc, char *argv[]) {
         r = -errno;
 
   error:
-        sync();
         log_error("Critical error while doing system shutdown: %s", strerror(-r));
 
         freeze();