chiark / gitweb /
main: do_switch_root() do not recursively remove across device boundaries
[elogind.git] / src / core / main.c
index 8c25819a120e1870cad9ea84dff23d2316487a11..4d09cb71742a54fd20d6d5d53f83c13ccceb26fe 100644 (file)
@@ -746,6 +746,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_SHOW_STATUS,
                 ARG_SYSV_CONSOLE,
                 ARG_DESERIALIZE,
+                ARG_SWITCHEDROOT,
                 ARG_INTROSPECT,
                 ARG_DEFAULT_STD_OUTPUT,
                 ARG_DEFAULT_STD_ERROR
@@ -770,6 +771,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "sysv-console",             optional_argument, NULL, ARG_SYSV_CONSOLE             },
 #endif
                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
+                { "switchedroot",             no_argument,       NULL, ARG_SWITCHEDROOT             },
                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
@@ -941,6 +943,10 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_SWITCHEDROOT:
+                        /* Nothing special yet */
+                        break;
+
                 case ARG_INTROSPECT: {
                         const char * const * i = NULL;
 
@@ -1168,30 +1174,84 @@ static void test_cgroups(void) {
 }
 
 static int do_switch_root(const char *switch_root) {
-        int r;
+        int r=0;
+        /*  Don't try to unmount the old "/", there's no way to do it. */
+        const char *umounts[] = { "/dev", "/proc", "/sys", "/run", NULL };
+        int i;
+        int cfd = -1;
+        struct stat switch_root_stat, sb;
+        bool remove_old_root;
 
         if (path_equal(switch_root, "/"))
                 return 0;
 
-        if (chdir(switch_root) < 0) {
+        if (stat(switch_root, &switch_root_stat) != 0) {
                 r = -errno;
+                log_error("failed to stat directory %s", switch_root);
                 goto fail;
         }
 
+        remove_old_root = in_initrd();
+
+        for (i = 0; umounts[i] != NULL; i++) {
+                char newmount[PATH_MAX];
+
+                snprintf(newmount, sizeof(newmount), "%s%s", switch_root, umounts[i]);
+
+                if ((stat(newmount, &sb) != 0) || (sb.st_dev != switch_root_stat.st_dev)) {
+                        /* mount point seems to be mounted already or stat failed */
+                        umount2(umounts[i], MNT_DETACH);
+                        continue;
+                }
+
+                if (mount(umounts[i], newmount, NULL, MS_MOVE, NULL) < 0) {
+                        log_error("failed to mount moving %s to %s",
+                                  umounts[i], newmount);
+                        log_error("forcing unmount of %s", umounts[i]);
+                        umount2(umounts[i], MNT_FORCE);
+                }
+        }
+
+        if (chdir(switch_root)) {
+                r = -errno;
+                log_error("failed to change directory to %s", switch_root);
+                goto fail;
+        }
+
+        if (remove_old_root)
+                cfd = open("/", O_RDONLY);
+
         if (mount(switch_root, "/", NULL, MS_MOVE, NULL) < 0) {
                 r = -errno;
-                chdir("/");
+                log_error("failed to mount moving %s to /", switch_root);
                 goto fail;
         }
 
-        if (chroot(".") < 0)
-                log_warning("Failed to change root, ignoring: %m");
+        if (chroot(".")) {
+                r = -errno;
+                log_error("failed to change root");
+                goto fail;
+        }
 
-        /* FIXME: remove old root */
+        if (cfd >= 0) {
+                struct stat rb;
+
+                if (fstat(cfd, &rb)) {
+                        log_error("failed to stat old root directory");
+                        goto fail;
+                }
+
+                rm_rf_children(cfd, false, false, &rb);
+                close(cfd);
+                cfd=-1;
+        }
 
         return 0;
 
 fail:
+        if (cfd >= 0)
+                close(cfd);
+
         log_error("Failed to switch root, ignoring: %s", strerror(-r));
 
         return r;
@@ -1235,6 +1295,13 @@ int main(int argc, char *argv[]) {
                         break;
                 }
 
+        /* If we have switched root, do all the special things */
+        for (j = 1; j < argc; j++)
+                if (streq(argv[j], "--switchedroot")) {
+                        is_reexec = false;
+                        break;
+                }
+
         /* If we get started via the /sbin/init symlink then we are
            called 'init'. After a subsequent reexecution we are then
            called 'systemd'. That is confusing, hence let's call us
@@ -1250,6 +1317,19 @@ int main(int argc, char *argv[]) {
         log_set_max_level(LOG_INFO);
 
         if (getpid() == 1) {
+                if (in_initrd()) {
+                        char *rd_timestamp = NULL;
+
+                        dual_timestamp_get(&initrd_timestamp);
+                        asprintf(&rd_timestamp, "%llu %llu",
+                                 (unsigned long long) initrd_timestamp.realtime,
+                                 (unsigned long long) initrd_timestamp.monotonic);
+                        if (rd_timestamp) {
+                                setenv("RD_TIMESTAMP", rd_timestamp, 1);
+                                free(rd_timestamp);
+                        }
+                }
+
                 arg_running_as = MANAGER_SYSTEM;
                 log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_JOURNAL : LOG_TARGET_JOURNAL_OR_KMSG);
 
@@ -1377,7 +1457,8 @@ int main(int argc, char *argv[]) {
                 /* Parse the data passed to us. We leave this
                  * variables set, but the manager later on will not
                  * pass them on to our children. */
-                parse_initrd_timestamp(&initrd_timestamp);
+                if(!in_initrd())
+                        parse_initrd_timestamp(&initrd_timestamp);
 
                 /* Unset some environment variables passed in from the
                  * kernel that don't really make sense for us. */
@@ -1650,7 +1731,7 @@ finish:
                 if (switch_root)
                         do_switch_root(switch_root);
 
-                args_size = MAX(5, argc+1);
+                args_size = MAX(6, argc+1);
                 args = newa(const char*, args_size);
 
                 if (!switch_root_init) {
@@ -1669,6 +1750,8 @@ finish:
 
                         i = 0;
                         args[i++] = SYSTEMD_BINARY_PATH;
+                        if (switch_root)
+                                args[i++] = "--switchedroot";
                         args[i++] = arg_running_as == MANAGER_SYSTEM ? "--system" : "--user";
                         args[i++] = "--deserialize";
                         args[i++] = sfd;
@@ -1684,22 +1767,36 @@ finish:
                  * getopt() in argv[], and some cleanups in envp[],
                  * but let's hope that doesn't matter.) */
 
-                if (serialization)
+                if (serialization) {
                         fclose(serialization);
+                        serialization = NULL;
+                }
 
-                if (fds)
+                if (fds) {
                         fdset_free(fds);
+                        fds = NULL;
+                }
 
-                i = 0;
-                args[i++] = switch_root_init ? switch_root_init : "/sbin/init";
-                for (j = 1; j < argc; j++)
+                for (j = 1, i = 1; j < argc; j++)
                         args[i++] = argv[j];
                 args[i++] = NULL;
-
                 assert(i <= args_size);
+
+                if (switch_root_init) {
+                        args[0] = switch_root_init;
+                        execv(args[0], (char* const*) args);
+                        log_warning("Failed to execute configured init, trying fallback: %m");
+                }
+
+                args[0] = "/sbin/init";
                 execv(args[0], (char* const*) args);
 
-                log_error("Failed to reexecute: %m");
+                log_warning("Failed to execute /sbin/init, trying fallback: %m");
+
+                args[0] = "/bin/sh";
+                args[1] = NULL;
+                execv(args[0], (char* const*) args);
+                log_error("Failed to execute /bin/sh, giving up: %m");
         }
 
         if (serialization)