1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
36 #include <sys/epoll.h>
38 #include <sys/signalfd.h>
42 #include <sys/socket.h>
44 #include <systemd/sd-daemon.h>
51 #include "cgroup-util.h"
53 #include "path-util.h"
54 #include "loopback-setup.h"
56 static char *arg_directory = NULL;
57 static char *arg_user = NULL;
58 static char **arg_controllers = NULL;
59 static char *arg_uuid = NULL;
60 static bool arg_private_network = false;
61 static bool arg_read_only = false;
62 static bool arg_boot = false;
63 static uint64_t arg_retain =
65 (1ULL << CAP_DAC_OVERRIDE) |
66 (1ULL << CAP_DAC_READ_SEARCH) |
67 (1ULL << CAP_FOWNER) |
68 (1ULL << CAP_FSETID) |
69 (1ULL << CAP_IPC_OWNER) |
72 (1ULL << CAP_LINUX_IMMUTABLE) |
73 (1ULL << CAP_NET_BIND_SERVICE) |
74 (1ULL << CAP_NET_BROADCAST) |
75 (1ULL << CAP_NET_RAW) |
76 (1ULL << CAP_SETGID) |
77 (1ULL << CAP_SETFCAP) |
78 (1ULL << CAP_SETPCAP) |
79 (1ULL << CAP_SETUID) |
80 (1ULL << CAP_SYS_ADMIN) |
81 (1ULL << CAP_SYS_CHROOT) |
82 (1ULL << CAP_SYS_NICE) |
83 (1ULL << CAP_SYS_PTRACE) |
84 (1ULL << CAP_SYS_TTY_CONFIG) |
85 (1ULL << CAP_SYS_RESOURCE);
87 static int help(void) {
89 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
90 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
91 " -h --help Show this help\n"
92 " -D --directory=NAME Root directory for the container\n"
93 " -b --boot Boot up full system (i.e. invoke init)\n"
94 " -u --user=USER Run the command under specified user or uid\n"
95 " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n"
96 " --uuid=UUID Set a specific machine UUID for the container\n"
97 " --private-network Disable network in container\n"
98 " --read-only Mount the root directory read-only\n"
99 " --capability=CAP In addition to the default, retain specified capability\n",
100 program_invocation_short_name);
105 static int parse_argv(int argc, char *argv[]) {
108 ARG_PRIVATE_NETWORK = 0x100,
114 static const struct option options[] = {
115 { "help", no_argument, NULL, 'h' },
116 { "directory", required_argument, NULL, 'D' },
117 { "user", required_argument, NULL, 'u' },
118 { "controllers", required_argument, NULL, 'C' },
119 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
120 { "boot", no_argument, NULL, 'b' },
121 { "uuid", required_argument, NULL, ARG_UUID },
122 { "read-only", no_argument, NULL, ARG_READ_ONLY },
123 { "capability", required_argument, NULL, ARG_CAPABILITY },
132 while ((c = getopt_long(argc, argv, "+hD:u:C:b", options, NULL)) >= 0) {
142 arg_directory = canonicalize_file_name(optarg);
143 if (!arg_directory) {
144 log_error("Failed to canonicalize root directory.");
152 if (!(arg_user = strdup(optarg))) {
153 log_error("Failed to duplicate user name.");
160 strv_free(arg_controllers);
161 arg_controllers = strv_split(optarg, ",");
162 if (!arg_controllers) {
163 log_error("Failed to split controllers list.");
166 strv_uniq(arg_controllers);
170 case ARG_PRIVATE_NETWORK:
171 arg_private_network = true;
183 arg_read_only = true;
186 case ARG_CAPABILITY: {
190 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
194 t = strndup(word, length);
196 log_error("Out of memory.");
200 if (cap_from_name(t, &cap) < 0) {
201 log_error("Failed to parse capability %s.", t);
207 arg_retain |= 1ULL << (uint64_t) cap;
217 log_error("Unknown option code %c", c);
225 static int mount_all(const char *dest) {
227 typedef struct MountPoint {
236 static const MountPoint mount_table[] = {
237 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
238 { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */
239 { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
240 { "/sys", "/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */
241 { "/sys", "/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
242 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
243 { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND, true },
244 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
246 { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND, false }, /* Bind mount first */
247 { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
255 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
258 if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) {
259 log_error("Out of memory");
267 t = path_is_mount_point(where, false);
269 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
278 mkdir_p_label(where, 0755);
280 if (mount(mount_table[k].what,
283 mount_table[k].flags,
284 mount_table[k].options) < 0 &&
285 mount_table[k].fatal) {
287 log_error("mount(%s) failed: %m", where);
299 static int setup_timezone(const char *dest) {
304 /* Fix the timezone, if possible */
305 if (asprintf(&where, "%s/etc/localtime", dest) < 0) {
306 log_error("Out of memory");
310 if (mount("/etc/localtime", where, "bind", MS_BIND, NULL) >= 0)
311 mount("/etc/localtime", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
315 if (asprintf(&where, "%s/etc/timezone", dest) < 0) {
316 log_error("Out of memory");
320 if (mount("/etc/timezone", where, "bind", MS_BIND, NULL) >= 0)
321 mount("/etc/timezone", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
328 static int setup_resolv_conf(const char *dest) {
333 if (arg_private_network)
336 /* Fix resolv.conf, if possible */
337 if (asprintf(&where, "%s/etc/resolv.conf", dest) < 0) {
338 log_error("Out of memory");
342 if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0)
343 mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
350 static int copy_devnodes(const char *dest) {
352 static const char devnodes[] =
370 NULSTR_FOREACH(d, devnodes) {
372 char *from = NULL, *to = NULL;
374 asprintf(&from, "/dev/%s", d);
375 asprintf(&to, "%s/dev/%s", dest, d);
378 log_error("Failed to allocate devnode path");
391 if (stat(from, &st) < 0) {
393 if (errno != ENOENT) {
394 log_error("Failed to stat %s: %m", from);
399 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
401 log_error("%s is not a char or block device, cannot copy.", from);
405 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
407 log_error("mknod(%s) failed: %m", dest);
421 static int setup_dev_console(const char *dest, const char *console) {
432 if (stat(console, &st) < 0) {
433 log_error("Failed to stat %s: %m", console);
437 } else if (!S_ISCHR(st.st_mode)) {
438 log_error("/dev/console is not a char device.");
443 r = chmod_and_chown(console, 0600, 0, 0);
445 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
449 if (asprintf(&to, "%s/dev/console", dest) < 0) {
450 log_error("Out of memory");
455 /* We need to bind mount the right tty to /dev/console since
456 * ptys can only exist on pts file systems. To have something
457 * to bind mount things on we create a device node first, that
458 * has the right major/minor (note that the major minor
459 * doesn't actually matter here, since we mount it over
462 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
463 log_error("mknod() for /dev/console failed: %m");
468 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
469 log_error("Bind mount for /dev/console failed: %m");
481 static int setup_kmsg(const char *dest, int kmsg_socket) {
482 char *from = NULL, *to = NULL;
486 struct cmsghdr cmsghdr;
487 uint8_t buf[CMSG_SPACE(sizeof(int))];
490 struct cmsghdr *cmsg;
493 assert(kmsg_socket >= 0);
497 /* We create the kmsg FIFO as /dev/kmsg, but immediately
498 * delete it after bind mounting it to /proc/kmsg. While FIFOs
499 * on the reading side behave very similar to /proc/kmsg,
500 * their writing side behaves differently from /dev/kmsg in
501 * that writing blocks when nothing is reading. In order to
502 * avoid any problems with containers deadlocking due to this
503 * we simply make /dev/kmsg unavailable to the container. */
504 if (asprintf(&from, "%s/dev/kmsg", dest) < 0) {
505 log_error("Out of memory");
510 if (asprintf(&to, "%s/proc/kmsg", dest) < 0) {
511 log_error("Out of memory");
516 if (mkfifo(from, 0600) < 0) {
517 log_error("mkfifo() for /dev/kmsg failed: %m");
522 r = chmod_and_chown(from, 0600, 0, 0);
524 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
528 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
529 log_error("Bind mount for /proc/kmsg failed: %m");
534 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
536 log_error("Failed to open fifo: %m");
544 mh.msg_control = &control;
545 mh.msg_controllen = sizeof(control);
547 cmsg = CMSG_FIRSTHDR(&mh);
548 cmsg->cmsg_level = SOL_SOCKET;
549 cmsg->cmsg_type = SCM_RIGHTS;
550 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
551 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
553 mh.msg_controllen = cmsg->cmsg_len;
555 /* Store away the fd in the socket, so that it stays open as
556 * long as we run the child */
557 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
558 close_nointr_nofail(fd);
561 log_error("Failed to send FIFO fd: %m");
566 /* And now make the FIFO unavailable as /dev/kmsg... */
577 static int setup_hostname(void) {
581 hn = path_get_file_name(arg_directory);
587 hostname_cleanup(hn);
590 if (sethostname(hn, strlen(hn)) < 0)
599 static int drop_capabilities(void) {
600 return capability_bounding_set_drop(~arg_retain, false);
603 static int is_os_tree(const char *path) {
606 /* We use /bin/sh as flag file if something is an OS */
608 if (asprintf(&p, "%s/bin/sh", path) < 0)
614 return r < 0 ? 0 : 1;
617 static int process_pty(int master, sigset_t *mask) {
619 char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
620 size_t in_buffer_full = 0, out_buffer_full = 0;
621 struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev;
622 bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false;
623 int ep = -1, signal_fd = -1, r;
625 fd_nonblock(STDIN_FILENO, 1);
626 fd_nonblock(STDOUT_FILENO, 1);
627 fd_nonblock(master, 1);
629 if ((signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) {
630 log_error("signalfd(): %m");
635 if ((ep = epoll_create1(EPOLL_CLOEXEC)) < 0) {
636 log_error("Failed to create epoll: %m");
642 stdin_ev.events = EPOLLIN|EPOLLET;
643 stdin_ev.data.fd = STDIN_FILENO;
646 stdout_ev.events = EPOLLOUT|EPOLLET;
647 stdout_ev.data.fd = STDOUT_FILENO;
650 master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET;
651 master_ev.data.fd = master;
654 signal_ev.events = EPOLLIN;
655 signal_ev.data.fd = signal_fd;
657 if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 ||
658 epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 ||
659 epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 ||
660 epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) {
661 log_error("Failed to regiser fds in epoll: %m");
667 struct epoll_event ev[16];
671 if ((nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1)) < 0) {
673 if (errno == EINTR || errno == EAGAIN)
676 log_error("epoll_wait(): %m");
683 for (i = 0; i < nfds; i++) {
684 if (ev[i].data.fd == STDIN_FILENO) {
686 if (ev[i].events & (EPOLLIN|EPOLLHUP))
687 stdin_readable = true;
689 } else if (ev[i].data.fd == STDOUT_FILENO) {
691 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
692 stdout_writable = true;
694 } else if (ev[i].data.fd == master) {
696 if (ev[i].events & (EPOLLIN|EPOLLHUP))
697 master_readable = true;
699 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
700 master_writable = true;
702 } else if (ev[i].data.fd == signal_fd) {
703 struct signalfd_siginfo sfsi;
706 if ((n = read(signal_fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) {
709 log_error("Failed to read from signalfd: invalid block size");
714 if (errno != EINTR && errno != EAGAIN) {
715 log_error("Failed to read from signalfd: %m");
721 if (sfsi.ssi_signo == SIGWINCH) {
724 /* The window size changed, let's forward that. */
725 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
726 ioctl(master, TIOCSWINSZ, &ws);
735 while ((stdin_readable && in_buffer_full <= 0) ||
736 (master_writable && in_buffer_full > 0) ||
737 (master_readable && out_buffer_full <= 0) ||
738 (stdout_writable && out_buffer_full > 0)) {
740 if (stdin_readable && in_buffer_full < LINE_MAX) {
742 if ((k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full)) < 0) {
744 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
745 stdin_readable = false;
747 log_error("read(): %m");
752 in_buffer_full += (size_t) k;
755 if (master_writable && in_buffer_full > 0) {
757 if ((k = write(master, in_buffer, in_buffer_full)) < 0) {
759 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
760 master_writable = false;
762 log_error("write(): %m");
768 assert(in_buffer_full >= (size_t) k);
769 memmove(in_buffer, in_buffer + k, in_buffer_full - k);
774 if (master_readable && out_buffer_full < LINE_MAX) {
776 if ((k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full)) < 0) {
778 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
779 master_readable = false;
781 log_error("read(): %m");
786 out_buffer_full += (size_t) k;
789 if (stdout_writable && out_buffer_full > 0) {
791 if ((k = write(STDOUT_FILENO, out_buffer, out_buffer_full)) < 0) {
793 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
794 stdout_writable = false;
796 log_error("write(): %m");
802 assert(out_buffer_full >= (size_t) k);
803 memmove(out_buffer, out_buffer + k, out_buffer_full - k);
804 out_buffer_full -= k;
812 close_nointr_nofail(ep);
815 close_nointr_nofail(signal_fd);
820 int main(int argc, char *argv[]) {
822 int r = EXIT_FAILURE, k;
823 char *oldcg = NULL, *newcg = NULL;
824 char **controller = NULL;
826 const char *console = NULL;
827 struct termios saved_attr, raw_attr;
829 bool saved_attr_valid = false;
831 int kmsg_socket_pair[2] = { -1, -1 };
833 log_parse_environment();
836 if ((r = parse_argv(argc, argv)) <= 0)
842 p = path_make_absolute_cwd(arg_directory);
846 arg_directory = get_current_dir_name();
848 if (!arg_directory) {
849 log_error("Failed to determine path");
853 path_kill_slashes(arg_directory);
855 if (geteuid() != 0) {
856 log_error("Need to be root.");
860 if (sd_booted() <= 0) {
861 log_error("Not running on a systemd system.");
865 if (path_equal(arg_directory, "/")) {
866 log_error("Spawning container on root directory not supported.");
870 if (is_os_tree(arg_directory) <= 0) {
871 log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory);
875 if ((k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg)) < 0) {
876 log_error("Failed to determine current cgroup: %s", strerror(-k));
880 if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) {
881 log_error("Failed to allocate cgroup path.");
885 k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0);
887 log_error("Failed to create cgroup: %s", strerror(-k));
891 STRV_FOREACH(controller,arg_controllers) {
892 k = cg_create_and_attach(*controller, newcg, 0);
894 log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k));
897 if ((master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY)) < 0) {
898 log_error("Failed to acquire pseudo tty: %m");
902 if (!(console = ptsname(master))) {
903 log_error("Failed to determine tty name: %m");
907 log_info("Spawning namespace container on %s (console is %s).", arg_directory, console);
909 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
910 ioctl(master, TIOCSWINSZ, &ws);
912 if (unlockpt(master) < 0) {
913 log_error("Failed to unlock tty: %m");
917 if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) {
918 log_error("Failed to get terminal attributes: %m");
922 saved_attr_valid = true;
924 raw_attr = saved_attr;
925 cfmakeraw(&raw_attr);
926 raw_attr.c_lflag &= ~ECHO;
928 if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
929 log_error("Failed to set terminal attributes: %m");
933 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
934 log_error("Failed to create kmsg socket pair");
938 assert_se(sigemptyset(&mask) == 0);
939 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
940 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
942 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
945 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
947 log_error("clone() failed: %m");
955 const char *home = NULL;
956 uid_t uid = (uid_t) -1;
957 gid_t gid = (gid_t) -1;
958 const char *envp[] = {
959 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
960 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
965 NULL, /* container_uuid */
969 envp[2] = strv_find_prefix(environ, "TERM=");
971 close_nointr_nofail(master);
973 close_nointr(STDIN_FILENO);
974 close_nointr(STDOUT_FILENO);
975 close_nointr(STDERR_FILENO);
977 close_all_fds(&kmsg_socket_pair[1], 1);
979 reset_all_signal_handlers();
981 assert_se(sigemptyset(&mask) == 0);
982 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
987 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
990 /* Mark / as private, in case somebody marked it shared */
991 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0)
994 /* Turn directory into bind mount */
995 if (mount(arg_directory, arg_directory, "bind", MS_BIND, NULL) < 0) {
996 log_error("Failed to make bind mount.");
1001 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
1002 log_error("Failed to make read-only.");
1006 if (mount_all(arg_directory) < 0)
1009 if (copy_devnodes(arg_directory) < 0)
1012 if (setup_dev_console(arg_directory, console) < 0)
1015 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1018 close_nointr_nofail(kmsg_socket_pair[1]);
1020 if (setup_timezone(arg_directory) < 0)
1023 if (setup_resolv_conf(arg_directory) < 0)
1026 if (chdir(arg_directory) < 0) {
1027 log_error("chdir(%s) failed: %m", arg_directory);
1031 if (open_terminal("dev/console", O_RDWR) != STDIN_FILENO ||
1032 dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1033 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
1036 if (mount(arg_directory, "/", "bind", MS_MOVE, NULL) < 0) {
1037 log_error("mount(MS_BIND) failed: %m");
1041 if (chroot(".") < 0) {
1042 log_error("chroot() failed: %m");
1046 if (chdir("/") < 0) {
1047 log_error("chdir() failed: %m");
1055 if (drop_capabilities() < 0) {
1056 log_error("drop_capabilities() failed: %m");
1062 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home) < 0) {
1063 log_error("get_user_creds() failed: %m");
1067 if (mkdir_parents_label(home, 0775) < 0) {
1068 log_error("mkdir_parents_label() failed: %m");
1072 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1073 log_error("mkdir_safe_label() failed: %m");
1077 if (initgroups((const char*)arg_user, gid) < 0) {
1078 log_error("initgroups() failed: %m");
1082 if (setresgid(gid, gid, gid) < 0) {
1083 log_error("setregid() failed: %m");
1087 if (setresuid(uid, uid, uid) < 0) {
1088 log_error("setreuid() failed: %m");
1093 if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) ||
1094 (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1095 (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1096 log_error("Out of memory");
1101 if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) {
1102 log_error("Out of memory");
1113 /* Automatically search for the init system */
1115 l = 1 + argc - optind;
1116 a = newa(char*, l + 1);
1117 memcpy(a + 1, argv + optind, l * sizeof(char*));
1119 a[0] = (char*) "/usr/lib/systemd/systemd";
1120 execve(a[0], a, (char**) envp);
1122 a[0] = (char*) "/lib/systemd/systemd";
1123 execve(a[0], a, (char**) envp);
1125 a[0] = (char*) "/sbin/init";
1126 execve(a[0], a, (char**) envp);
1127 } else if (argc > optind)
1128 execvpe(argv[optind], argv + optind, (char**) envp);
1130 chdir(home ? home : "/root");
1131 execle("/bin/bash", "-bash", NULL, (char**) envp);
1134 log_error("execv() failed: %m");
1137 _exit(EXIT_FAILURE);
1140 if (process_pty(master, &mask) < 0)
1143 if (saved_attr_valid) {
1144 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1145 saved_attr_valid = false;
1148 r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid);
1154 if (saved_attr_valid)
1155 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1158 close_nointr_nofail(master);
1160 close_pipe(kmsg_socket_pair);
1163 cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0);
1166 cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
1168 free(arg_directory);
1169 strv_free(arg_controllers);