return r;
}
- r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 8,
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 10,
/* Allow the container to
* access and create the API
* device nodes, so that
* container to ever create
* these device nodes. */
"/dev/pts/ptmx", "rw",
- "char-pts", "rw");
+ "char-pts", "rw",
+ /* Allow the container
+ * access to all kdbus
+ * devices. Again, the
+ * container cannot create
+ * these nodes, only use
+ * them. We use a pretty
+ * open match here, so that
+ * the kernel API can still
+ * change. */
+ "char-kdbus", "rw",
+ "char-kdbus/*", "rw");
if (r < 0) {
log_error("Failed to add device whitelist: %s", strerror(-r));
return r;
ioctl(control, LOOP_CTL_REMOVE, nr);
}
+static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
+ int pipe_fds[2];
+ pid_t pid;
+
+ assert(database);
+ assert(key);
+ assert(rpid);
+
+ if (pipe2(pipe_fds, O_CLOEXEC) < 0) {
+ log_error("Failed to allocate pipe: %m");
+ return -errno;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ log_error("Failed to fork getent child: %m");
+ return -errno;
+ } else if (pid == 0) {
+ int nullfd;
+ char *empty_env = NULL;
+
+ if (dup3(pipe_fds[1], STDOUT_FILENO, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (pipe_fds[0] > 2)
+ close_nointr_nofail(pipe_fds[0]);
+ if (pipe_fds[1] > 2)
+ close_nointr_nofail(pipe_fds[1]);
+
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup3(nullfd, STDIN_FILENO, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup3(nullfd, STDERR_FILENO, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (nullfd > 2)
+ close_nointr_nofail(nullfd);
+
+ reset_all_signal_handlers();
+ close_all_fds(NULL, 0);
+
+ execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
+ execle("/bin/getent", "getent", database, key, NULL, &empty_env);
+ _exit(EXIT_FAILURE);
+ }
+
+ close_nointr_nofail(pipe_fds[1]);
+ pipe_fds[1] = -1;
+
+ *rpid = pid;
+
+ return pipe_fds[0];
+}
+
+static int change_uid_gid(char **_home) {
+
+ _cleanup_strv_free_ char **passwd = NULL;
+ char line[LINE_MAX], *w, *x, *state, *u, *g, *h;
+ _cleanup_free_ uid_t *uids = NULL;
+ _cleanup_free_ char *home = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned n_uids = 0;
+ size_t sz, l;
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ int r;
+
+ assert(_home);
+
+ if (!arg_user || streq(arg_user, "root") || streq(arg_user, "0")) {
+ /* Reset everything fully to 0, just in case */
+
+ if (setgroups(0, NULL) < 0) {
+ log_error("setgroups() failed: %m");
+ return -errno;
+ }
+
+ if (setresgid(0, 0, 0) < 0) {
+ log_error("setregid() failed: %m");
+ return -errno;
+ }
+
+ if (setresuid(0, 0, 0) < 0) {
+ log_error("setreuid() failed: %m");
+ return -errno;
+ }
+
+ *_home = NULL;
+ return 0;
+ }
+
+ /* First, get user credentials */
+ fd = spawn_getent("passwd", arg_user, &pid);
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_oom();
+ fd = -1;
+
+ if (!fgets(line, sizeof(line), f)) {
+
+ if (!ferror(f)) {
+ log_error("Failed to resolve user %s.", arg_user);
+ return -ESRCH;
+ }
+
+ log_error("Failed to read from getent: %m");
+ return -errno;
+ }
+
+ truncate_nl(line);
+
+ wait_for_terminate_and_warn("getent passwd", pid);
+
+ x = strchr(line, ':');
+ if (!x) {
+ log_error("/etc/passwd entry has invalid user field.");
+ return -EIO;
+ }
+
+ u = strchr(x+1, ':');
+ if (!u) {
+ log_error("/etc/passwd entry has invalid password field.");
+ return -EIO;
+ }
+
+ u++;
+ g = strchr(u, ':');
+ if (!g) {
+ log_error("/etc/passwd entry has invalid UID field.");
+ return -EIO;
+ }
+
+ *g = 0;
+ g++;
+ x = strchr(g, ':');
+ if (!x) {
+ log_error("/etc/passwd entry has invalid GID field.");
+ return -EIO;
+ }
+
+ *x = 0;
+ h = strchr(x+1, ':');
+ if (!h) {
+ log_error("/etc/passwd entry has invalid GECOS field.");
+ return -EIO;
+ }
+
+ h++;
+ x = strchr(h, ':');
+ if (!x) {
+ log_error("/etc/passwd entry has invalid home directory field.");
+ return -EIO;
+ }
+
+ *x = 0;
+
+ r = parse_uid(u, &uid);
+ if (r < 0) {
+ log_error("Failed to parse UID of user.");
+ return -EIO;
+ }
+
+ r = parse_gid(g, &gid);
+ if (r < 0) {
+ log_error("Failed to parse GID of user.");
+ return -EIO;
+ }
+
+ home = strdup(h);
+ if (!home)
+ return log_oom();
+
+ /* Second, get group memberships */
+ fd = spawn_getent("initgroups", arg_user, &pid);
+ if (fd < 0)
+ return fd;
+
+ fclose(f);
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_oom();
+ fd = -1;
+
+ if (!fgets(line, sizeof(line), f)) {
+ if (!ferror(f)) {
+ log_error("Failed to resolve user %s.", arg_user);
+ return -ESRCH;
+ }
+
+ log_error("Failed to read from getent: %m");
+ return -errno;
+ }
+
+ truncate_nl(line);
+
+ wait_for_terminate_and_warn("getent initgroups", pid);
+
+ /* Skip over the username and subsequent separator whitespace */
+ x = line;
+ x += strcspn(x, WHITESPACE);
+ x += strspn(x, WHITESPACE);
+
+ FOREACH_WORD(w, l, x, state) {
+ char c[l+1];
+
+ memcpy(c, w, l);
+ c[l] = 0;
+
+ if (!GREEDY_REALLOC(uids, sz, n_uids+1))
+ return log_oom();
+
+ r = parse_uid(c, &uids[n_uids++]);
+ if (r < 0) {
+ log_error("Failed to parse group data from getent.");
+ return -EIO;
+ }
+ }
+
+ r = mkdir_parents(home, 0775);
+ if (r < 0) {
+ log_error("Failed to make home root directory: %s", strerror(-r));
+ return r;
+ }
+
+ r = mkdir_safe(home, 0755, uid, gid);
+ if (r < 0) {
+ log_error("Failed to make home directory: %s", strerror(-r));
+ return r;
+ }
+
+ fchown(STDIN_FILENO, uid, gid);
+ fchown(STDOUT_FILENO, uid, gid);
+ fchown(STDERR_FILENO, uid, gid);
+
+ if (setgroups(n_uids, uids) < 0) {
+ log_error("Failed to set auxiliary groups: %m");
+ return -errno;
+ }
+
+ if (setresgid(gid, gid, gid) < 0) {
+ log_error("setregid() failed: %m");
+ return -errno;
+ }
+
+ if (setresuid(uid, uid, uid) < 0) {
+ log_error("setreuid() failed: %m");
+ return -errno;
+ }
+
+ if (_home) {
+ *_home = home;
+ home = NULL;
+ }
+
+ return 0;
+}
+
int main(int argc, char *argv[]) {
_cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
- _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1, image_fd = -1;
+ _cleanup_close_ int master = -1, kdbus_fd = -1, image_fd = -1;
_cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
_cleanup_fdset_free_ FDSet *fds = NULL;
int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
for (;;) {
+ int parent_ready_fd = -1, child_ready_fd = -1;
siginfo_t status;
+ eventfd_t x;
+
+ parent_ready_fd = eventfd(0, EFD_CLOEXEC);
+ if (parent_ready_fd < 0) {
+ log_error("Failed to create event fd: %m");
+ goto finish;
+ }
- sync_fd = eventfd(0, EFD_CLOEXEC);
- if (sync_fd < 0) {
+ child_ready_fd = eventfd(0, EFD_CLOEXEC);
+ if (child_ready_fd < 0) {
log_error("Failed to create event fd: %m");
goto finish;
}
if (pid == 0) {
/* child */
- const char *home = NULL;
- uid_t uid = (uid_t) -1;
- gid_t gid = (gid_t) -1;
+ _cleanup_free_ char *home = NULL;
unsigned n_env = 2;
const char *envp[] = {
"PATH=" DEFAULT_PATH_SPLIT_USR,
NULL
};
char **env_use;
- eventfd_t x;
envp[n_env] = strv_find_prefix(environ, "TERM=");
if (envp[n_env])
if (setup_kdbus(arg_directory, kdbus_domain) < 0)
goto child_fail;
+ /* Tell the parent that we are ready, and that
+ * it can cgroupify us to that we lack access
+ * to certain devices and resources. */
+ eventfd_write(child_ready_fd, 1);
+ close_nointr_nofail(child_ready_fd);
+ child_ready_fd = -1;
+
if (chdir(arg_directory) < 0) {
log_error("chdir(%s) failed: %m", arg_directory);
goto child_fail;
goto child_fail;
}
- if (arg_user) {
-
- /* Note that this resolves user names
- * inside the container, and hence
- * accesses the NSS modules from the
- * container and not the host. This is
- * a bit weird... */
-
- if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
- log_error("get_user_creds() failed: %m");
- goto child_fail;
- }
-
- if (mkdir_parents_label(home, 0775) < 0) {
- log_error("mkdir_parents_label() failed: %m");
- goto child_fail;
- }
-
- if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
- log_error("mkdir_safe_label() failed: %m");
- goto child_fail;
- }
-
- if (initgroups((const char*)arg_user, gid) < 0) {
- log_error("initgroups() failed: %m");
- goto child_fail;
- }
-
- if (setresgid(gid, gid, gid) < 0) {
- log_error("setregid() failed: %m");
- goto child_fail;
- }
-
- if (setresuid(uid, uid, uid) < 0) {
- log_error("setreuid() failed: %m");
- goto child_fail;
- }
- } else {
- /* Reset everything fully to 0, just in case */
-
- if (setgroups(0, NULL) < 0) {
- log_error("setgroups() failed: %m");
- goto child_fail;
- }
-
- if (setresgid(0, 0, 0) < 0) {
- log_error("setregid() failed: %m");
- goto child_fail;
- }
-
- if (setresuid(0, 0, 0) < 0) {
- log_error("setreuid() failed: %m");
- goto child_fail;
- }
- }
+ r = change_uid_gid(&home);
+ if (r < 0)
+ goto child_fail;
if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
(asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
}
}
- eventfd_read(sync_fd, &x);
- close_nointr_nofail(sync_fd);
- sync_fd = -1;
+#ifdef HAVE_SELINUX
+ if (arg_selinux_context)
+ if (setexeccon((security_context_t) arg_selinux_context) < 0) {
+ log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
+ goto child_fail;
+ }
+#endif
if (!strv_isempty(arg_setenv)) {
char **n;
} else
env_use = (char**) envp;
-#ifdef HAVE_SELINUX
- if (arg_selinux_context)
- if (setexeccon((security_context_t) arg_selinux_context) < 0)
- log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
-#endif
+ /* Wait until the parent is ready with the setup, too... */
+ eventfd_read(parent_ready_fd, &x);
+ close_nointr_nofail(parent_ready_fd);
+ parent_ready_fd = -1;
+
if (arg_boot) {
char **a;
size_t l;
fdset_free(fds);
fds = NULL;
+ /* Wait until the child reported that it is ready with
+ * all it needs to do with priviliges. After we got
+ * the notification we can make the process join its
+ * cgroup which might limit what it can do */
+ eventfd_read(child_ready_fd, &x);
+
r = register_machine(pid);
if (r < 0)
goto finish;
if (r < 0)
goto finish;
- eventfd_write(sync_fd, 1);
- close_nointr_nofail(sync_fd);
- sync_fd = -1;
+ /* Notify the child that the parent is ready with all
+ * its setup, and thtat the child can now hand over
+ * control to the code to run inside the container. */
+ eventfd_write(parent_ready_fd, 1);
k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
if (k < 0) {