chiark / gitweb /
main: do_switch_root() do not recursively remove across device boundaries
[elogind.git] / src / core / main.c
index 9bcedbe71f6e0657f580782ac884b774537c2787..4d09cb71742a54fd20d6d5d53f83c13ccceb26fe 100644 (file)
@@ -32,6 +32,7 @@
 #include <sys/wait.h>
 #include <fcntl.h>
 #include <sys/prctl.h>
+#include <sys/mount.h>
 
 #include "manager.h"
 #include "log.h"
@@ -47,6 +48,7 @@
 #include "def.h"
 #include "virt.h"
 #include "watchdog.h"
+#include "path-util.h"
 
 #include "mount-setup.h"
 #include "loopback-setup.h"
@@ -54,6 +56,7 @@
 #include "hostname-setup.h"
 #include "machine-id-setup.h"
 #include "locale-setup.h"
+#include "hwclock.h"
 #include "selinux-setup.h"
 #include "ima-setup.h"
 
@@ -76,8 +79,6 @@ static bool arg_show_status = true;
 #ifdef HAVE_SYSV_COMPAT
 static bool arg_sysv_console = true;
 #endif
-static bool arg_mount_auto = true;
-static bool arg_swap_auto = true;
 static char **arg_default_controllers = NULL;
 static char ***arg_join_controllers = NULL;
 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
@@ -658,8 +659,6 @@ static int parse_config_file(void) {
 #endif
                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
-                { "Manager", "MountAuto",             config_parse_bool,         0, &arg_mount_auto          },
-                { "Manager", "SwapAuto",              config_parse_bool,         0, &arg_swap_auto           },
                 { "Manager", "DefaultControllers",    config_parse_strv,         0, &arg_default_controllers },
                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
@@ -747,6 +746,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_SHOW_STATUS,
                 ARG_SYSV_CONSOLE,
                 ARG_DESERIALIZE,
+                ARG_SWITCHEDROOT,
                 ARG_INTROSPECT,
                 ARG_DEFAULT_STD_OUTPUT,
                 ARG_DEFAULT_STD_ERROR
@@ -763,14 +763,15 @@ static int parse_argv(int argc, char *argv[]) {
                 { "test",                     no_argument,       NULL, ARG_TEST                     },
                 { "help",                     no_argument,       NULL, 'h'                          },
                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
-                { "dump-core",                no_argument,       NULL, ARG_DUMP_CORE                },
-                { "crash-shell",              no_argument,       NULL, ARG_CRASH_SHELL              },
-                { "confirm-spawn",            no_argument,       NULL, ARG_CONFIRM_SPAWN            },
+                { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
+                { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
+                { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
 #ifdef HAVE_SYSV_COMPAT
                 { "sysv-console",             optional_argument, NULL, ARG_SYSV_CONSOLE             },
 #endif
                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
+                { "switchedroot",             no_argument,       NULL, ARG_SWITCHEDROOT             },
                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
@@ -874,39 +875,49 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_DUMP_CORE:
-                        arg_dump_core = true;
+                        r = optarg ? parse_boolean(optarg) : 1;
+                        if (r < 0) {
+                                log_error("Failed to parse dump core boolean %s.", optarg);
+                                return r;
+                        }
+                        arg_dump_core = r;
                         break;
 
                 case ARG_CRASH_SHELL:
-                        arg_crash_shell = true;
+                        r = optarg ? parse_boolean(optarg) : 1;
+                        if (r < 0) {
+                                log_error("Failed to parse crash shell boolean %s.", optarg);
+                                return r;
+                        }
+                        arg_crash_shell = r;
                         break;
 
                 case ARG_CONFIRM_SPAWN:
-                        arg_confirm_spawn = true;
+                        r = optarg ? parse_boolean(optarg) : 1;
+                        if (r < 0) {
+                                log_error("Failed to parse confirm spawn boolean %s.", optarg);
+                                return r;
+                        }
+                        arg_confirm_spawn = r;
                         break;
 
                 case ARG_SHOW_STATUS:
-
-                        if (optarg) {
-                                if ((r = parse_boolean(optarg)) < 0) {
-                                        log_error("Failed to show status boolean %s.", optarg);
-                                        return r;
-                                }
-                                arg_show_status = r;
-                        } else
-                                arg_show_status = true;
+                        r = optarg ? parse_boolean(optarg) : 1;
+                        if (r < 0) {
+                                log_error("Failed to parse show status boolean %s.", optarg);
+                                return r;
+                        }
+                        arg_show_status = r;
                         break;
+
 #ifdef HAVE_SYSV_COMPAT
                 case ARG_SYSV_CONSOLE:
-
-                        if (optarg) {
-                                if ((r = parse_boolean(optarg)) < 0) {
-                                        log_error("Failed to SysV console boolean %s.", optarg);
-                                        return r;
-                                }
-                                arg_sysv_console = r;
-                        } else
-                                arg_sysv_console = true;
+                        r = optarg ? parse_boolean(optarg) : 1;
+                        if (r < 0) {
+                                log_error("Failed to parse SysV console boolean %s.", optarg);
+                                return r;
+                        }
+                        arg_sysv_console = r;
                         break;
 #endif
 
@@ -932,6 +943,10 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_SWITCHEDROOT:
+                        /* Nothing special yet */
+                        break;
+
                 case ARG_INTROSPECT: {
                         const char * const * i = NULL;
 
@@ -1017,9 +1032,9 @@ static int help(void) {
                "     --unit=UNIT                 Set default unit\n"
                "     --system                    Run a system instance, even if PID != 1\n"
                "     --user                      Run a user instance\n"
-               "     --dump-core                 Dump core on crash\n"
-               "     --crash-shell               Run shell on crash\n"
-               "     --confirm-spawn             Ask for confirmation when spawning processes\n"
+               "     --dump-core[=0|1]           Dump core on crash\n"
+               "     --crash-shell[=0|1]         Run shell on crash\n"
+               "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
 #ifdef HAVE_SYSV_COMPAT
                "     --sysv-console[=0|1]        Connect output of SysV scripts to console\n"
@@ -1158,6 +1173,90 @@ static void test_cgroups(void) {
         sleep(10);
 }
 
+static int do_switch_root(const char *switch_root) {
+        int r=0;
+        /*  Don't try to unmount the old "/", there's no way to do it. */
+        const char *umounts[] = { "/dev", "/proc", "/sys", "/run", NULL };
+        int i;
+        int cfd = -1;
+        struct stat switch_root_stat, sb;
+        bool remove_old_root;
+
+        if (path_equal(switch_root, "/"))
+                return 0;
+
+        if (stat(switch_root, &switch_root_stat) != 0) {
+                r = -errno;
+                log_error("failed to stat directory %s", switch_root);
+                goto fail;
+        }
+
+        remove_old_root = in_initrd();
+
+        for (i = 0; umounts[i] != NULL; i++) {
+                char newmount[PATH_MAX];
+
+                snprintf(newmount, sizeof(newmount), "%s%s", switch_root, umounts[i]);
+
+                if ((stat(newmount, &sb) != 0) || (sb.st_dev != switch_root_stat.st_dev)) {
+                        /* mount point seems to be mounted already or stat failed */
+                        umount2(umounts[i], MNT_DETACH);
+                        continue;
+                }
+
+                if (mount(umounts[i], newmount, NULL, MS_MOVE, NULL) < 0) {
+                        log_error("failed to mount moving %s to %s",
+                                  umounts[i], newmount);
+                        log_error("forcing unmount of %s", umounts[i]);
+                        umount2(umounts[i], MNT_FORCE);
+                }
+        }
+
+        if (chdir(switch_root)) {
+                r = -errno;
+                log_error("failed to change directory to %s", switch_root);
+                goto fail;
+        }
+
+        if (remove_old_root)
+                cfd = open("/", O_RDONLY);
+
+        if (mount(switch_root, "/", NULL, MS_MOVE, NULL) < 0) {
+                r = -errno;
+                log_error("failed to mount moving %s to /", switch_root);
+                goto fail;
+        }
+
+        if (chroot(".")) {
+                r = -errno;
+                log_error("failed to change root");
+                goto fail;
+        }
+
+        if (cfd >= 0) {
+                struct stat rb;
+
+                if (fstat(cfd, &rb)) {
+                        log_error("failed to stat old root directory");
+                        goto fail;
+                }
+
+                rm_rf_children(cfd, false, false, &rb);
+                close(cfd);
+                cfd=-1;
+        }
+
+        return 0;
+
+fail:
+        if (cfd >= 0)
+                close(cfd);
+
+        log_error("Failed to switch root, ignoring: %s", strerror(-r));
+
+        return r;
+}
+
 int main(int argc, char *argv[]) {
         Manager *m = NULL;
         int r, retval = EXIT_FAILURE;
@@ -1172,6 +1271,7 @@ int main(int argc, char *argv[]) {
         int j;
         bool loaded_policy = false;
         bool arm_reboot_watchdog = false;
+        char *switch_root = NULL, *switch_root_init = NULL;
 
 #ifdef HAVE_SYSV_COMPAT
         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
@@ -1195,6 +1295,13 @@ int main(int argc, char *argv[]) {
                         break;
                 }
 
+        /* If we have switched root, do all the special things */
+        for (j = 1; j < argc; j++)
+                if (streq(argv[j], "--switchedroot")) {
+                        is_reexec = false;
+                        break;
+                }
+
         /* If we get started via the /sbin/init symlink then we are
            called 'init'. After a subsequent reexecution we are then
            called 'systemd'. That is confusing, hence let's call us
@@ -1210,8 +1317,21 @@ int main(int argc, char *argv[]) {
         log_set_max_level(LOG_INFO);
 
         if (getpid() == 1) {
+                if (in_initrd()) {
+                        char *rd_timestamp = NULL;
+
+                        dual_timestamp_get(&initrd_timestamp);
+                        asprintf(&rd_timestamp, "%llu %llu",
+                                 (unsigned long long) initrd_timestamp.realtime,
+                                 (unsigned long long) initrd_timestamp.monotonic);
+                        if (rd_timestamp) {
+                                setenv("RD_TIMESTAMP", rd_timestamp, 1);
+                                free(rd_timestamp);
+                        }
+                }
+
                 arg_running_as = MANAGER_SYSTEM;
-                log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_JOURNAL_OR_KMSG);
+                log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_JOURNAL : LOG_TARGET_JOURNAL_OR_KMSG);
 
                 if (!is_reexec) {
                         if (selinux_setup(&loaded_policy) < 0)
@@ -1222,7 +1342,7 @@ int main(int argc, char *argv[]) {
 
                 log_open();
 
-                if (label_init() < 0)
+                if (label_init(NULL) < 0)
                         goto finish;
 
                 if (!is_reexec)
@@ -1337,7 +1457,8 @@ int main(int argc, char *argv[]) {
                 /* Parse the data passed to us. We leave this
                  * variables set, but the manager later on will not
                  * pass them on to our children. */
-                parse_initrd_timestamp(&initrd_timestamp);
+                if(!in_initrd())
+                        parse_initrd_timestamp(&initrd_timestamp);
 
                 /* Unset some environment variables passed in from the
                  * kernel that don't really make sense for us. */
@@ -1427,8 +1548,6 @@ int main(int argc, char *argv[]) {
 #ifdef HAVE_SYSV_COMPAT
         m->sysv_console = arg_sysv_console;
 #endif
-        m->mount_auto = arg_mount_auto;
-        m->swap_auto = arg_swap_auto;
         m->default_std_output = arg_default_std_output;
         m->default_std_error = arg_default_std_error;
         m->runtime_watchdog = arg_runtime_watchdog;
@@ -1544,6 +1663,7 @@ int main(int argc, char *argv[]) {
                         break;
 
                 case MANAGER_REEXECUTE:
+
                         if (prepare_reexecute(m, &serialization, &fds) < 0)
                                 goto finish;
 
@@ -1551,6 +1671,20 @@ int main(int argc, char *argv[]) {
                         log_notice("Reexecuting.");
                         goto finish;
 
+                case MANAGER_SWITCH_ROOT:
+                        /* Steal the switch root parameters */
+                        switch_root = m->switch_root;
+                        switch_root_init = m->switch_root_init;
+                        m->switch_root = m->switch_root_init = NULL;
+
+                        if (!switch_root_init)
+                                if (prepare_reexecute(m, &serialization, &fds) < 0)
+                                        goto finish;
+
+                        reexecute = true;
+                        log_notice("Switching root.");
+                        goto finish;
+
                 case MANAGER_REBOOT:
                 case MANAGER_POWEROFF:
                 case MANAGER_HALT:
@@ -1583,69 +1717,86 @@ finish:
         free_join_controllers();
 
         dbus_shutdown();
-
         label_finish();
 
         if (reexecute) {
-                const char *args[15];
-                unsigned i = 0;
-                char sfd[16];
+                const char **args;
+                unsigned i, args_size;
 
-                assert(serialization);
-                assert(fds);
+                /* Close and disarm the watchdog, so that the new
+                 * instance can reinitialize it, but doesn't get
+                 * rebooted while we do that */
+                watchdog_close(true);
 
-                args[i++] = SYSTEMD_BINARY_PATH;
+                if (switch_root)
+                        do_switch_root(switch_root);
 
-                args[i++] = "--log-level";
-                args[i++] = log_level_to_string(log_get_max_level());
+                args_size = MAX(6, argc+1);
+                args = newa(const char*, args_size);
 
-                args[i++] = "--log-target";
-                args[i++] = log_target_to_string(log_get_target());
+                if (!switch_root_init) {
+                        char sfd[16];
 
-                if (arg_running_as == MANAGER_SYSTEM)
-                        args[i++] = "--system";
-                else
-                        args[i++] = "--user";
+                        /* First try to spawn ourselves with the right
+                         * path, and with full serialization. We do
+                         * this only if the user didn't specify an
+                         * explicit init to spawn. */
 
-                if (arg_dump_core)
-                        args[i++] = "--dump-core";
+                        assert(serialization);
+                        assert(fds);
 
-                if (arg_crash_shell)
-                        args[i++] = "--crash-shell";
+                        snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
+                        char_array_0(sfd);
 
-                if (arg_confirm_spawn)
-                        args[i++] = "--confirm-spawn";
+                        i = 0;
+                        args[i++] = SYSTEMD_BINARY_PATH;
+                        if (switch_root)
+                                args[i++] = "--switchedroot";
+                        args[i++] = arg_running_as == MANAGER_SYSTEM ? "--system" : "--user";
+                        args[i++] = "--deserialize";
+                        args[i++] = sfd;
+                        args[i++] = NULL;
 
-                if (arg_show_status)
-                        args[i++] = "--show-status=1";
-                else
-                        args[i++] = "--show-status=0";
+                        assert(i <= args_size);
+                        execv(args[0], (char* const*) args);
+                }
 
-#ifdef HAVE_SYSV_COMPAT
-                if (arg_sysv_console)
-                        args[i++] = "--sysv-console=1";
-                else
-                        args[i++] = "--sysv-console=0";
-#endif
+                /* Try the fallback, if there is any, without any
+                 * serialization. We pass the original argv[] and
+                 * envp[]. (Well, modulo the ordering changes due to
+                 * getopt() in argv[], and some cleanups in envp[],
+                 * but let's hope that doesn't matter.) */
 
-                snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
-                char_array_0(sfd);
+                if (serialization) {
+                        fclose(serialization);
+                        serialization = NULL;
+                }
 
-                args[i++] = "--deserialize";
-                args[i++] = sfd;
+                if (fds) {
+                        fdset_free(fds);
+                        fds = NULL;
+                }
 
+                for (j = 1, i = 1; j < argc; j++)
+                        args[i++] = argv[j];
                 args[i++] = NULL;
+                assert(i <= args_size);
 
-                assert(i <= ELEMENTSOF(args));
-
-                /* Close and disarm the watchdog, so that the new
-                 * instance can reinitialize it, but doesn't get
-                 * rebooted while we do that */
-                watchdog_close(true);
+                if (switch_root_init) {
+                        args[0] = switch_root_init;
+                        execv(args[0], (char* const*) args);
+                        log_warning("Failed to execute configured init, trying fallback: %m");
+                }
 
+                args[0] = "/sbin/init";
                 execv(args[0], (char* const*) args);
 
-                log_error("Failed to reexecute: %m");
+                log_warning("Failed to execute /sbin/init, trying fallback: %m");
+
+                args[0] = "/bin/sh";
+                args[1] = NULL;
+                execv(args[0], (char* const*) args);
+                log_error("Failed to execute /bin/sh, giving up: %m");
         }
 
         if (serialization)