chiark / gitweb /
nspawn: fix argv[0] for getent
[elogind.git] / src / nspawn / nspawn.c
index bef866a966d6034c7e57e33f9b69cac9c1845b2b..b2c974d97016b8334b31a9799468fa273daf041d 100644 (file)
@@ -1258,7 +1258,7 @@ static int register_machine(pid_t pid) {
                         return r;
                 }
 
-                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 8,
+                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 10,
                                           /* Allow the container to
                                            * access and create the API
                                            * device nodes, so that
@@ -1277,7 +1277,18 @@ static int register_machine(pid_t pid) {
                                            * container to ever create
                                            * these device nodes. */
                                           "/dev/pts/ptmx", "rw",
-                                          "char-pts", "rw");
+                                          "char-pts", "rw",
+                                          /* Allow the container
+                                           * access to all kdbus
+                                           * devices. Again, the
+                                           * container cannot create
+                                           * these nodes, only use
+                                           * them. We use a pretty
+                                           * open match here, so that
+                                           * the kernel API can still
+                                           * change. */
+                                          "char-kdbus", "rw",
+                                          "char-kdbus/*", "rw");
                 if (r < 0) {
                         log_error("Failed to add device whitelist: %s", strerror(-r));
                         return r;
@@ -2219,10 +2230,276 @@ static void loop_remove(int nr, int *image_fd) {
         ioctl(control, LOOP_CTL_REMOVE, nr);
 }
 
+static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
+        int pipe_fds[2];
+        pid_t pid;
+
+        assert(database);
+        assert(key);
+        assert(rpid);
+
+        if (pipe2(pipe_fds, O_CLOEXEC) < 0) {
+                log_error("Failed to allocate pipe: %m");
+                return -errno;
+        }
+
+        pid = fork();
+        if (pid < 0) {
+                log_error("Failed to fork getent child: %m");
+                return -errno;
+        } else if (pid == 0) {
+                int nullfd;
+                char *empty_env = NULL;
+
+                if (dup3(pipe_fds[1], STDOUT_FILENO, 0) < 0)
+                        _exit(EXIT_FAILURE);
+
+                if (pipe_fds[0] > 2)
+                        close_nointr_nofail(pipe_fds[0]);
+                if (pipe_fds[1] > 2)
+                        close_nointr_nofail(pipe_fds[1]);
+
+                nullfd = open("/dev/null", O_RDWR);
+                if (nullfd < 0)
+                        _exit(EXIT_FAILURE);
+
+                if (dup3(nullfd, STDIN_FILENO, 0) < 0)
+                        _exit(EXIT_FAILURE);
+
+                if (dup3(nullfd, STDERR_FILENO, 0) < 0)
+                        _exit(EXIT_FAILURE);
+
+                if (nullfd > 2)
+                        close_nointr_nofail(nullfd);
+
+                reset_all_signal_handlers();
+                close_all_fds(NULL, 0);
+
+                execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
+                execle("/bin/getent", "getent", database, key, NULL, &empty_env);
+                _exit(EXIT_FAILURE);
+        }
+
+        close_nointr_nofail(pipe_fds[1]);
+        pipe_fds[1] = -1;
+
+        *rpid = pid;
+
+        return pipe_fds[0];
+}
+
+static int change_uid_gid(char **_home) {
+
+        _cleanup_strv_free_ char **passwd = NULL;
+        char line[LINE_MAX], *w, *x, *state, *u, *g, *h;
+        _cleanup_free_ uid_t *uids = NULL;
+        _cleanup_free_ char *home = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        _cleanup_close_ int fd = -1;
+        unsigned n_uids = 0;
+        size_t sz, l;
+        uid_t uid;
+        gid_t gid;
+        pid_t pid;
+        int r;
+
+        assert(_home);
+
+        if (!arg_user || streq(arg_user, "root") || streq(arg_user, "0")) {
+                /* Reset everything fully to 0, just in case */
+
+                if (setgroups(0, NULL) < 0) {
+                        log_error("setgroups() failed: %m");
+                        return -errno;
+                }
+
+                if (setresgid(0, 0, 0) < 0) {
+                        log_error("setregid() failed: %m");
+                        return -errno;
+                }
+
+                if (setresuid(0, 0, 0) < 0) {
+                        log_error("setreuid() failed: %m");
+                        return -errno;
+                }
+
+                *_home = NULL;
+                return 0;
+        }
+
+        /* First, get user credentials */
+        fd = spawn_getent("passwd", arg_user, &pid);
+        if (fd < 0)
+                return fd;
+
+        f = fdopen(fd, "r");
+        if (!f)
+                return log_oom();
+        fd = -1;
+
+        if (!fgets(line, sizeof(line), f)) {
+
+                if (!ferror(f)) {
+                        log_error("Failed to resolve user %s.", arg_user);
+                        return -ESRCH;
+                }
+
+                log_error("Failed to read from getent: %m");
+                return -errno;
+        }
+
+        truncate_nl(line);
+
+        wait_for_terminate_and_warn("getent passwd", pid);
+
+        x = strchr(line, ':');
+        if (!x) {
+                log_error("/etc/passwd entry has invalid user field.");
+                return -EIO;
+        }
+
+        u = strchr(x+1, ':');
+        if (!u) {
+                log_error("/etc/passwd entry has invalid password field.");
+                return -EIO;
+        }
+
+        u++;
+        g = strchr(u, ':');
+        if (!g) {
+                log_error("/etc/passwd entry has invalid UID field.");
+                return -EIO;
+        }
+
+        *g = 0;
+        g++;
+        x = strchr(g, ':');
+        if (!x) {
+                log_error("/etc/passwd entry has invalid GID field.");
+                return -EIO;
+        }
+
+        *x = 0;
+        h = strchr(x+1, ':');
+        if (!h) {
+                log_error("/etc/passwd entry has invalid GECOS field.");
+                return -EIO;
+        }
+
+        h++;
+        x = strchr(h, ':');
+        if (!x) {
+                log_error("/etc/passwd entry has invalid home directory field.");
+                return -EIO;
+        }
+
+        *x = 0;
+
+        r = parse_uid(u, &uid);
+        if (r < 0) {
+                log_error("Failed to parse UID of user.");
+                return -EIO;
+        }
+
+        r = parse_gid(g, &gid);
+        if (r < 0) {
+                log_error("Failed to parse GID of user.");
+                return -EIO;
+        }
+
+        home = strdup(h);
+        if (!home)
+                return log_oom();
+
+        /* Second, get group memberships */
+        fd = spawn_getent("initgroups", arg_user, &pid);
+        if (fd < 0)
+                return fd;
+
+        fclose(f);
+        f = fdopen(fd, "r");
+        if (!f)
+                return log_oom();
+        fd = -1;
+
+        if (!fgets(line, sizeof(line), f)) {
+                if (!ferror(f)) {
+                        log_error("Failed to resolve user %s.", arg_user);
+                        return -ESRCH;
+                }
+
+                log_error("Failed to read from getent: %m");
+                return -errno;
+        }
+
+        truncate_nl(line);
+
+        wait_for_terminate_and_warn("getent initgroups", pid);
+
+        /* Skip over the username and subsequent separator whitespace */
+        x = line;
+        x += strcspn(x, WHITESPACE);
+        x += strspn(x, WHITESPACE);
+
+        FOREACH_WORD(w, l, x, state) {
+                char c[l+1];
+
+                memcpy(c, w, l);
+                c[l] = 0;
+
+                if (!GREEDY_REALLOC(uids, sz, n_uids+1))
+                        return log_oom();
+
+                r = parse_uid(c, &uids[n_uids++]);
+                if (r < 0) {
+                        log_error("Failed to parse group data from getent.");
+                        return -EIO;
+                }
+        }
+
+        r = mkdir_parents(home, 0775);
+        if (r < 0) {
+                log_error("Failed to make home root directory: %s", strerror(-r));
+                return r;
+        }
+
+        r = mkdir_safe(home, 0755, uid, gid);
+        if (r < 0) {
+                log_error("Failed to make home directory: %s", strerror(-r));
+                return r;
+        }
+
+        fchown(STDIN_FILENO, uid, gid);
+        fchown(STDOUT_FILENO, uid, gid);
+        fchown(STDERR_FILENO, uid, gid);
+
+        if (setgroups(n_uids, uids) < 0) {
+                log_error("Failed to set auxiliary groups: %m");
+                return -errno;
+        }
+
+        if (setresgid(gid, gid, gid) < 0) {
+                log_error("setregid() failed: %m");
+                return -errno;
+        }
+
+        if (setresuid(uid, uid, uid) < 0) {
+                log_error("setreuid() failed: %m");
+                return -errno;
+        }
+
+        if (_home) {
+                *_home = home;
+                home = NULL;
+        }
+
+        return 0;
+}
+
 int main(int argc, char *argv[]) {
 
         _cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
-        _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1, image_fd = -1;
+        _cleanup_close_ int master = -1, kdbus_fd = -1, image_fd = -1;
         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
         _cleanup_fdset_free_ FDSet *fds = NULL;
         int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
@@ -2396,10 +2673,18 @@ int main(int argc, char *argv[]) {
         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
 
         for (;;) {
+                int parent_ready_fd = -1, child_ready_fd = -1;
                 siginfo_t status;
+                eventfd_t x;
+
+                parent_ready_fd = eventfd(0, EFD_CLOEXEC);
+                if (parent_ready_fd < 0) {
+                        log_error("Failed to create event fd: %m");
+                        goto finish;
+                }
 
-                sync_fd = eventfd(0, EFD_CLOEXEC);
-                if (sync_fd < 0) {
+                child_ready_fd = eventfd(0, EFD_CLOEXEC);
+                if (child_ready_fd < 0) {
                         log_error("Failed to create event fd: %m");
                         goto finish;
                 }
@@ -2419,9 +2704,7 @@ int main(int argc, char *argv[]) {
 
                 if (pid == 0) {
                         /* child */
-                        const char *home = NULL;
-                        uid_t uid = (uid_t) -1;
-                        gid_t gid = (gid_t) -1;
+                        _cleanup_free_ char *home = NULL;
                         unsigned n_env = 2;
                         const char *envp[] = {
                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
@@ -2436,7 +2719,6 @@ int main(int argc, char *argv[]) {
                                 NULL
                         };
                         char **env_use;
-                        eventfd_t x;
 
                         envp[n_env] = strv_find_prefix(environ, "TERM=");
                         if (envp[n_env])
@@ -2554,6 +2836,13 @@ int main(int argc, char *argv[]) {
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
                                 goto child_fail;
 
+                        /* Tell the parent that we are ready, and that
+                         * it can cgroupify us to that we lack access
+                         * to certain devices and resources. */
+                        eventfd_write(child_ready_fd, 1);
+                        close_nointr_nofail(child_ready_fd);
+                        child_ready_fd = -1;
+
                         if (chdir(arg_directory) < 0) {
                                 log_error("chdir(%s) failed: %m", arg_directory);
                                 goto child_fail;
@@ -2584,61 +2873,9 @@ int main(int argc, char *argv[]) {
                                 goto child_fail;
                         }
 
-                        if (arg_user) {
-
-                                /* Note that this resolves user names
-                                 * inside the container, and hence
-                                 * accesses the NSS modules from the
-                                 * container and not the host. This is
-                                 * a bit weird... */
-
-                                if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
-                                        log_error("get_user_creds() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (mkdir_parents_label(home, 0775) < 0) {
-                                        log_error("mkdir_parents_label() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
-                                        log_error("mkdir_safe_label() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (initgroups((const char*)arg_user, gid) < 0) {
-                                        log_error("initgroups() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (setresgid(gid, gid, gid) < 0) {
-                                        log_error("setregid() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (setresuid(uid, uid, uid) < 0) {
-                                        log_error("setreuid() failed: %m");
-                                        goto child_fail;
-                                }
-                        } else {
-                                /* Reset everything fully to 0, just in case */
-
-                                if (setgroups(0, NULL) < 0) {
-                                        log_error("setgroups() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (setresgid(0, 0, 0) < 0) {
-                                        log_error("setregid() failed: %m");
-                                        goto child_fail;
-                                }
-
-                                if (setresuid(0, 0, 0) < 0) {
-                                        log_error("setreuid() failed: %m");
-                                        goto child_fail;
-                                }
-                        }
+                        r = change_uid_gid(&home);
+                        if (r < 0)
+                                goto child_fail;
 
                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
@@ -2682,9 +2919,13 @@ int main(int argc, char *argv[]) {
                                 }
                         }
 
-                        eventfd_read(sync_fd, &x);
-                        close_nointr_nofail(sync_fd);
-                        sync_fd = -1;
+#ifdef HAVE_SELINUX
+                        if (arg_selinux_context)
+                                if (setexeccon((security_context_t) arg_selinux_context) < 0) {
+                                        log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
+                                        goto child_fail;
+                                }
+#endif
 
                         if (!strv_isempty(arg_setenv)) {
                                 char **n;
@@ -2699,11 +2940,11 @@ int main(int argc, char *argv[]) {
                         } else
                                 env_use = (char**) envp;
 
-#ifdef HAVE_SELINUX
-                        if (arg_selinux_context)
-                                if (setexeccon((security_context_t) arg_selinux_context) < 0)
-                                        log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
-#endif
+                        /* Wait until the parent is ready with the setup, too... */
+                        eventfd_read(parent_ready_fd, &x);
+                        close_nointr_nofail(parent_ready_fd);
+                        parent_ready_fd = -1;
+
                         if (arg_boot) {
                                 char **a;
                                 size_t l;
@@ -2739,6 +2980,12 @@ int main(int argc, char *argv[]) {
                 fdset_free(fds);
                 fds = NULL;
 
+                /* Wait until the child reported that it is ready with
+                 * all it needs to do with priviliges. After we got
+                 * the notification we can make the process join its
+                 * cgroup which might limit what it can do */
+                eventfd_read(child_ready_fd, &x);
+
                 r = register_machine(pid);
                 if (r < 0)
                         goto finish;
@@ -2759,9 +3006,10 @@ int main(int argc, char *argv[]) {
                 if (r < 0)
                         goto finish;
 
-                eventfd_write(sync_fd, 1);
-                close_nointr_nofail(sync_fd);
-                sync_fd = -1;
+                /* Notify the child that the parent is ready with all
+                 * its setup, and thtat the child can now hand over
+                 * control to the code to run inside the container. */
+                eventfd_write(parent_ready_fd, 1);
 
                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
                 if (k < 0) {