From 613b411c947635136637f8cdd66b94512f761eab Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 27 Nov 2013 20:23:18 +0100 Subject: [PATCH 1/1] service: add the ability for units to join other unit's PrivateNetwork= and PrivateTmp= namespaces --- man/systemd.exec.xml | 30 ++- man/systemd.unit.xml | 31 +++ src/core/dbus-unit.c | 1 + src/core/execute.c | 348 ++++++++++++++++++++------ src/core/execute.h | 27 +- src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/load-fragment.c | 8 +- src/core/mount.c | 31 +-- src/core/mount.h | 2 + src/core/namespace.c | 232 +++++++++++++---- src/core/namespace.h | 11 +- src/core/service.c | 36 +-- src/core/service.h | 3 + src/core/socket.c | 41 ++- src/core/socket.h | 1 + src/core/swap.c | 34 +-- src/core/swap.h | 2 + src/core/unit.c | 118 +++++++-- src/core/unit.h | 11 + src/machine/machinectl.c | 10 +- src/shared/util.c | 50 ---- src/shared/util.h | 1 - src/test/test-namespace.c | 99 +++++++- src/test/test-ns.c | 1 - 24 files changed, 790 insertions(+), 339 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 8c44071d5..207592dda 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -846,9 +846,9 @@ system namespace for the executed processes and mounts private /tmp and - /var/tmp directories - inside it, that are not shared by - processes outside of the + /var/tmp + directories inside it, that are not + shared by processes outside of the namespace. This is useful to secure access to temporary files of the process, but makes sharing between @@ -856,9 +856,17 @@ /tmp or /var/tmp impossible. All temporary data created - by service will be removed after service - is stopped. Defaults to - false. + by service will be removed after + service is stopped. Defaults to + false. Note that it is possible to run + two or more units within the same + private /tmp and + /var/tmp + namespace by using the + JoinsNamespaceOf= + directive, see + systemd.unit5 + for details. @@ -874,8 +882,14 @@ available to the executed process. This is useful to securely turn off network access by the executed - process. Defaults to - false. + process. Defaults to false. Note that + it is possible to run two or more + units within the same private network + namespace by using the + JoinsNamespaceOf= + directive, see + systemd.unit5 + for details. diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml index 2c8bfbef7..4dc427c30 100644 --- a/man/systemd.unit.xml +++ b/man/systemd.unit.xml @@ -656,6 +656,37 @@ settings. + + JoinsNamespaceOf= + + For units that start + processes (such as service units), + lists one or more other units whose + network and/or temporary file + namespace to join. This only applies + to unit types which support the + PrivateNetwork= and + PrivateTmp= + directives (see + systemd.exec5 + for details). If a unit that has this + setting set is started its processes + will see the same + /tmp, + /tmp/var and + network namespace as one listed unit + that is started. If multiple listed + units are already started it is not + defined which namespace is + joined. Note that this setting only + has an effect if + PrivateNetwork= + and/or PrivateTmp= + is enabled for both the unit that + joins the namespace and the unit whose + namespace is joined. + + RequiresMountsFor= diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index f33e8db83..1fec0e3b1 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -522,6 +522,7 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("TriggeredBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_TRIGGERED_BY]), 0), SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), 0), SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), 0), + SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), 0), SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), 0), SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), 0), SD_BUS_PROPERTY("Description", "s", property_get_description, 0, 0), diff --git a/src/core/execute.c b/src/core/execute.c index 1e571da69..a0d63a41f 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -61,7 +61,6 @@ #include "missing.h" #include "utmp-wtmp.h" #include "def.h" -#include "loopback-setup.h" #include "path-util.h" #include "syscall-list.h" #include "env-util.h" @@ -176,24 +175,13 @@ static bool is_terminal_output(ExecOutput o) { o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE; } -void exec_context_serialize(const ExecContext *context, Unit *u, FILE *f) { - assert(context); - assert(u); - assert(f); - - if (context->tmp_dir) - unit_serialize_item(u, f, "tmp-dir", context->tmp_dir); - - if (context->var_tmp_dir) - unit_serialize_item(u, f, "var-tmp-dir", context->var_tmp_dir); -} - static int open_null_as(int flags, int nfd) { int fd, r; assert(nfd >= 0); - if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0) + fd = open("/dev/null", flags|O_NOCTTY); + if (fd < 0) return -errno; if (fd != nfd) { @@ -1037,6 +1025,7 @@ int exec_spawn(ExecCommand *command, const char *cgroup_path, const char *unit_id, int idle_pipe[4], + ExecRuntime *runtime, pid_t *ret) { _cleanup_strv_free_ char **files_env = NULL; @@ -1088,25 +1077,19 @@ int exec_spawn(ExecCommand *command, NULL); free(line); - if (context->private_tmp && !context->tmp_dir && !context->var_tmp_dir) { - r = setup_tmpdirs(unit_id, &context->tmp_dir, &context->var_tmp_dir); - if (r < 0) - return r; - } - pid = fork(); if (pid < 0) return -errno; if (pid == 0) { - int i, err; - sigset_t ss; + _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL; const char *username = NULL, *home = NULL, *shell = NULL; + unsigned n_dont_close = 0, n_env = 0; + int dont_close[n_fds + 3]; uid_t uid = (uid_t) -1; gid_t gid = (gid_t) -1; - _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, - **final_env = NULL, **final_argv = NULL; - unsigned n_env = 0; + sigset_t ss; + int i, err; /* child */ @@ -1137,8 +1120,21 @@ int exec_spawn(ExecCommand *command, * block init reexecution because it cannot bind its * sockets */ log_forget_fds(); - err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds, - socket_fd >= 0 ? 1 : n_fds); + + if (socket_fd >= 0) + dont_close[n_dont_close++] = socket_fd; + if (n_fds > 0) { + memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds); + n_dont_close += n_fds; + } + if (runtime) { + if (runtime->netns_storage_socket[0] >= 0) + dont_close[n_dont_close++] = runtime->netns_storage_socket[0]; + if (runtime->netns_storage_socket[1] >= 0) + dont_close[n_dont_close++] = runtime->netns_storage_socket[1]; + } + + err = close_all_fds(dont_close, n_dont_close); if (err < 0) { r = EXIT_FDS; goto fail_child; @@ -1335,28 +1331,43 @@ int exec_spawn(ExecCommand *command, } } #endif - if (context->private_network) { - if (unshare(CLONE_NEWNET) < 0) { - err = -errno; + if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) { + err = setup_netns(runtime->netns_storage_socket); + if (err < 0) { r = EXIT_NETWORK; goto fail_child; } - - loopback_setup(); } - if (strv_length(context->read_write_dirs) > 0 || - strv_length(context->read_only_dirs) > 0 || - strv_length(context->inaccessible_dirs) > 0 || + if (!strv_isempty(context->read_write_dirs) || + !strv_isempty(context->read_only_dirs) || + !strv_isempty(context->inaccessible_dirs) || context->mount_flags != 0 || - context->private_tmp) { - err = setup_namespace(context->read_write_dirs, - context->read_only_dirs, - context->inaccessible_dirs, - context->tmp_dir, - context->var_tmp_dir, - context->private_tmp, - context->mount_flags); + (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))) { + + char *tmp = NULL, *var = NULL; + + /* The runtime struct only contains the parent + * of the private /tmp, which is + * non-accessible to world users. Inside of it + * there's a /tmp that is sticky, and that's + * the one we want to use here. */ + + if (context->private_tmp && runtime) { + if (runtime->tmp_dir) + tmp = strappenda(runtime->tmp_dir, "/tmp"); + if (runtime->var_tmp_dir) + var = strappenda(runtime->var_tmp_dir, "/tmp"); + } + + err = setup_namespace( + context->read_write_dirs, + context->read_only_dirs, + context->inaccessible_dirs, + tmp, + var, + context->mount_flags); + if (err < 0) { r = EXIT_NAMESPACE; goto fail_child; @@ -1580,43 +1591,7 @@ void exec_context_init(ExecContext *c) { c->timer_slack_nsec = (nsec_t) -1; } -static void *remove_tmpdir_thread(void *p) { - int r; - _cleanup_free_ char *dirp = p; - char *dir; - - assert(dirp); - - r = rm_rf_dangerous(dirp, false, true, false); - dir = dirname(dirp); - if (r < 0) - log_warning("Failed to remove content of temporary directory %s: %s", - dir, strerror(-r)); - else { - r = rmdir(dir); - if (r < 0) - log_warning("Failed to remove temporary directory %s: %s", - dir, strerror(-r)); - } - - return NULL; -} - -void exec_context_tmp_dirs_done(ExecContext *c) { - char* dirs[] = {c->tmp_dir ? c->tmp_dir : c->var_tmp_dir, - c->tmp_dir ? c->var_tmp_dir : NULL, - NULL}; - char **dirp; - - for(dirp = dirs; *dirp; dirp++) { - log_debug("Spawning thread to nuke %s", *dirp); - asynchronous_job(remove_tmpdir_thread, *dirp); - } - - c->tmp_dir = c->var_tmp_dir = NULL; -} - -void exec_context_done(ExecContext *c, bool reloading_or_reexecuting) { +void exec_context_done(ExecContext *c) { unsigned l; assert(c); @@ -1680,9 +1655,6 @@ void exec_context_done(ExecContext *c, bool reloading_or_reexecuting) { free(c->syscall_filter); c->syscall_filter = NULL; - - if (!reloading_or_reexecuting) - exec_context_tmp_dirs_done(c); } void exec_command_done(ExecCommand *c) { @@ -2229,6 +2201,216 @@ int exec_command_set(ExecCommand *c, const char *path, ...) { return 0; } +static int exec_runtime_allocate(ExecRuntime **rt) { + + if (*rt) + return 0; + + *rt = new0(ExecRuntime, 1); + if (!rt) + return -ENOMEM; + + (*rt)->n_ref = 1; + (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1; + + return 0; +} + +int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) { + int r; + + assert(rt); + assert(c); + assert(id); + + if (*rt) + return 1; + + if (!c->private_network && !c->private_tmp) + return 0; + + r = exec_runtime_allocate(rt); + if (r < 0) + return r; + + if (c->private_network && (*rt)->netns_storage_socket[0] < 0) { + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0) + return -errno; + } + + if (c->private_tmp && !(*rt)->tmp_dir) { + r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir); + if (r < 0) + return r; + } + + return 1; +} + +ExecRuntime *exec_runtime_ref(ExecRuntime *r) { + assert(r); + assert(r->n_ref > 0); + + r->n_ref++; + return r; +} + +ExecRuntime *exec_runtime_unref(ExecRuntime *r) { + + if (!r) + return NULL; + + assert(r->n_ref > 0); + + r->n_ref--; + if (r->n_ref <= 0) { + free(r->tmp_dir); + free(r->var_tmp_dir); + close_pipe(r->netns_storage_socket); + free(r); + } + + return NULL; +} + +int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) { + assert(u); + assert(f); + assert(fds); + + if (!rt) + return 0; + + if (rt->tmp_dir) + unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir); + + if (rt->var_tmp_dir) + unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir); + + if (rt->netns_storage_socket[0] >= 0) { + int copy; + + copy = fdset_put_dup(fds, rt->netns_storage_socket[0]); + if (copy < 0) + return copy; + + unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy); + } + + if (rt->netns_storage_socket[1] >= 0) { + int copy; + + copy = fdset_put_dup(fds, rt->netns_storage_socket[1]); + if (copy < 0) + return copy; + + unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy); + } + + return 0; +} + +int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) { + int r; + + assert(rt); + assert(key); + assert(value); + + if (streq(key, "tmp-dir")) { + char *copy; + + r = exec_runtime_allocate(rt); + if (r < 0) + return r; + + copy = strdup(value); + if (!copy) + return log_oom(); + + free((*rt)->tmp_dir); + (*rt)->tmp_dir = copy; + + } else if (streq(key, "var-tmp-dir")) { + char *copy; + + r = exec_runtime_allocate(rt); + if (r < 0) + return r; + + copy = strdup(value); + if (!copy) + return log_oom(); + + free((*rt)->var_tmp_dir); + (*rt)->var_tmp_dir = copy; + + } else if (streq(key, "netns-socket-0")) { + int fd; + + r = exec_runtime_allocate(rt); + if (r < 0) + return r; + + if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) + log_debug_unit(u->id, "Failed to parse netns socket value %s", value); + else { + if ((*rt)->netns_storage_socket[0] >= 0) + close_nointr_nofail((*rt)->netns_storage_socket[0]); + + (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd); + } + } else if (streq(key, "netns-socket-1")) { + int fd; + + r = exec_runtime_allocate(rt); + if (r < 0) + return r; + + if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) + log_debug_unit(u->id, "Failed to parse netns socket value %s", value); + else { + if ((*rt)->netns_storage_socket[1] >= 0) + close_nointr_nofail((*rt)->netns_storage_socket[1]); + + (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd); + } + } else + return 0; + + return 1; +} + +static void *remove_tmpdir_thread(void *p) { + _cleanup_free_ char *path = p; + + rm_rf_dangerous(path, false, true, false); + return NULL; +} + +void exec_runtime_destroy(ExecRuntime *rt) { + if (!rt) + return; + + /* If there are multiple users of this, let's leave the stuff around */ + if (rt->n_ref > 1) + return; + + if (rt->tmp_dir) { + log_debug("Spawning thread to nuke %s", rt->tmp_dir); + asynchronous_job(remove_tmpdir_thread, rt->tmp_dir); + rt->tmp_dir = NULL; + } + + if (rt->var_tmp_dir) { + log_debug("Spawning thread to nuke %s", rt->var_tmp_dir); + asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir); + rt->var_tmp_dir = NULL; + } + + close_pipe(rt->netns_storage_socket); +} + static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_NULL] = "null", [EXEC_INPUT_TTY] = "tty", diff --git a/src/core/execute.h b/src/core/execute.h index 368c9f00b..5143fcaa5 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -24,6 +24,7 @@ typedef struct ExecStatus ExecStatus; typedef struct ExecCommand ExecCommand; typedef struct ExecContext ExecContext; +typedef struct ExecRuntime ExecRuntime; #include #include @@ -35,6 +36,7 @@ typedef struct ExecContext ExecContext; #include "list.h" #include "util.h" +#include "fdset.h" typedef struct Unit Unit; @@ -79,6 +81,15 @@ struct ExecCommand { bool ignore; }; +struct ExecRuntime { + int n_ref; + + char *tmp_dir; + char *var_tmp_dir; + + int netns_storage_socket[2]; +}; + struct ExecContext { char **environment; char **environment_files; @@ -140,8 +151,6 @@ struct ExecContext { bool non_blocking; bool private_tmp; bool private_network; - char *tmp_dir; - char *var_tmp_dir; bool no_new_privileges; @@ -175,6 +184,7 @@ int exec_spawn(ExecCommand *command, const char *cgroup_path, const char *unit_id, int pipe_fd[2], + ExecRuntime *runtime, pid_t *ret); void exec_command_done(ExecCommand *c); @@ -191,19 +201,26 @@ void exec_command_append_list(ExecCommand **l, ExecCommand *e); int exec_command_set(ExecCommand *c, const char *path, ...); void exec_context_init(ExecContext *c); -void exec_context_done(ExecContext *c, bool reloading_or_reexecuting); -void exec_context_tmp_dirs_done(ExecContext *c); +void exec_context_done(ExecContext *c); void exec_context_dump(ExecContext *c, FILE* f, const char *prefix); int exec_context_load_environment(const ExecContext *c, char ***l); bool exec_context_may_touch_console(ExecContext *c); -void exec_context_serialize(const ExecContext *c, Unit *u, FILE *f); void exec_status_start(ExecStatus *s, pid_t pid); void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status); void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix); +int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id); +ExecRuntime *exec_runtime_ref(ExecRuntime *r); +ExecRuntime *exec_runtime_unref(ExecRuntime *r); + +int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds); +int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds); + +void exec_runtime_destroy(ExecRuntime *rt); + const char* exec_output_to_string(ExecOutput i) _const_; ExecOutput exec_output_from_string(const char *s) _pure_; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index c06255041..663deaebd 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -116,6 +116,7 @@ Unit.PropagateReloadTo, config_parse_unit_deps, UNIT_PROPAG Unit.ReloadPropagatedFrom, config_parse_unit_deps, UNIT_RELOAD_PROPAGATED_FROM, 0 Unit.PropagateReloadFrom, config_parse_unit_deps, UNIT_RELOAD_PROPAGATED_FROM, 0 Unit.PartOf, config_parse_unit_deps, UNIT_PART_OF, 0 +Unit.JoinsNamespaceOf, config_parse_unit_deps, UNIT_JOINS_NAMESPACE_OF, 0 Unit.RequiresMountsFor, config_parse_unit_requires_mounts_for, 0, 0 Unit.StopWhenUnneeded, config_parse_bool, 0, offsetof(Unit, stop_when_unneeded) Unit.RefuseManualStart, config_parse_bool, 0, offsetof(Unit, refuse_manual_start) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index d9dd6faae..8a42e739b 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -1350,15 +1350,17 @@ int config_parse_socket_service(const char *unit, assert(data); r = unit_name_printf(UNIT(s), rvalue, &p); - if (r < 0) + if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, -r, "Failed to resolve specifiers, ignoring: %s", rvalue); + return 0; + } - if (!endswith(p ?: rvalue, ".service")) { + if (!endswith(p, ".service")) { log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Unit must be of type service, ignoring: %s", rvalue); return 0; } - r = manager_load_unit(UNIT(s)->manager, p ?: rvalue, NULL, &error, &x); + r = manager_load_unit(UNIT(s)->manager, p, NULL, &error, &x); if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Failed to load unit %s, ignoring: %s", rvalue, bus_error_message(&error, r)); return 0; diff --git a/src/core/mount.c b/src/core/mount.c index 634dff19a..e46c72a64 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -217,7 +217,8 @@ static void mount_done(Unit *u) { mount_parameters_done(&m->parameters_fragment); cgroup_context_done(&m->cgroup_context); - exec_context_done(&m->exec_context, manager_is_reloading_or_reexecuting(u->manager)); + exec_context_done(&m->exec_context); + m->exec_runtime = exec_runtime_unref(m->exec_runtime); exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX); m->control_command = NULL; @@ -772,6 +773,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { unit_realize_cgroup(UNIT(m)); + r = unit_setup_exec_runtime(UNIT(m)); + if (r < 0) + goto fail; + r = mount_arm_timer(m); if (r < 0) goto fail; @@ -789,6 +794,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { UNIT(m)->cgroup_path, UNIT(m)->id, NULL, + m->exec_runtime, &pid); if (r < 0) goto fail; @@ -814,7 +820,9 @@ static void mount_enter_dead(Mount *m, MountResult f) { if (f != MOUNT_SUCCESS) m->result = f; - exec_context_tmp_dirs_done(&m->exec_context); + exec_runtime_destroy(m->exec_runtime); + m->exec_runtime = exec_runtime_unref(m->exec_runtime); + mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); } @@ -1095,8 +1103,6 @@ static int mount_serialize(Unit *u, FILE *f, FDSet *fds) { if (m->control_command_id >= 0) unit_serialize_item(u, f, "control-command", mount_exec_command_to_string(m->control_command_id)); - exec_context_serialize(&m->exec_context, UNIT(m), f); - return 0; } @@ -1153,22 +1159,6 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F m->control_command_id = id; m->control_command = m->exec_command + id; } - } else if (streq(key, "tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - m->exec_context.tmp_dir = t; - } else if (streq(key, "var-tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - m->exec_context.var_tmp_dir = t; } else log_debug_unit(UNIT(m)->id, "Unknown serialization key '%s'", key); @@ -1770,6 +1760,7 @@ const UnitVTable mount_vtable = { .exec_context_offset = offsetof(Mount, exec_context), .cgroup_context_offset = offsetof(Mount, cgroup_context), .kill_context_offset = offsetof(Mount, kill_context), + .exec_runtime_offset = offsetof(Mount, exec_runtime), .sections = "Unit\0" diff --git a/src/core/mount.h b/src/core/mount.h index 22a14e1a3..a53423cc2 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -101,6 +101,8 @@ struct Mount { KillContext kill_context; CGroupContext cgroup_context; + ExecRuntime *exec_runtime; + MountState state, deserialized_state; ExecCommand* control_command; diff --git a/src/core/namespace.c b/src/core/namespace.c index b453f8dc0..092e08f72 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "strv.h" #include "util.h" @@ -37,6 +38,7 @@ #include "namespace.h" #include "missing.h" #include "execute.h" +#include "loopback-setup.h" typedef enum MountMode { /* This is ordered by priority! */ @@ -57,6 +59,8 @@ typedef struct BindMount { static int append_mounts(BindMount **p, char **strv, MountMode mode) { char **i; + assert(p); + STRV_FOREACH(i, strv) { (*p)->ignore = false; @@ -184,68 +188,50 @@ static int make_read_only(BindMount *m) { return 0; } -int setup_tmpdirs(const char *unit_id, - char **tmp_dir, - char **var_tmp_dir) { - int r = 0; - _cleanup_free_ char *tmp = NULL, *var = NULL; - - assert(tmp_dir); - assert(var_tmp_dir); - - tmp = strjoin("/tmp/systemd-", unit_id, "-XXXXXXX", NULL); - var = strjoin("/var/tmp/systemd-", unit_id, "-XXXXXXX", NULL); - - r = create_tmp_dir(tmp, tmp_dir); - if (r < 0) - return r; - - r = create_tmp_dir(var, var_tmp_dir); - if (r == 0) - return 0; +int setup_namespace( + char** read_write_dirs, + char** read_only_dirs, + char** inaccessible_dirs, + char* tmp_dir, + char* var_tmp_dir, + unsigned mount_flags) { - /* failure */ - rmdir(*tmp_dir); - rmdir(tmp); - free(*tmp_dir); - *tmp_dir = NULL; - - return r; -} - -int setup_namespace(char** read_write_dirs, - char** read_only_dirs, - char** inaccessible_dirs, - char* tmp_dir, - char* var_tmp_dir, - bool private_tmp, - unsigned mount_flags) { - - unsigned n = strv_length(read_write_dirs) + - strv_length(read_only_dirs) + - strv_length(inaccessible_dirs) + - (private_tmp ? 2 : 0); BindMount *m, *mounts = NULL; + unsigned n; int r = 0; - if (!mount_flags) + if (mount_flags == 0) mount_flags = MS_SHARED; if (unshare(CLONE_NEWNS) < 0) return -errno; - if (n) { + n = !!tmp_dir + !!var_tmp_dir + + strv_length(read_write_dirs) + + strv_length(read_only_dirs) + + strv_length(inaccessible_dirs); + + if (n > 0) { m = mounts = (BindMount *) alloca(n * sizeof(BindMount)); - if ((r = append_mounts(&m, read_write_dirs, READWRITE)) < 0 || - (r = append_mounts(&m, read_only_dirs, READONLY)) < 0 || - (r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE)) < 0) + r = append_mounts(&m, read_write_dirs, READWRITE); + if (r < 0) + return r; + + r = append_mounts(&m, read_only_dirs, READONLY); + if (r < 0) + return r; + + r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE); + if (r < 0) return r; - if (private_tmp) { + if (tmp_dir) { m->path = "/tmp"; m->mode = PRIVATE_TMP; m++; + } + if (var_tmp_dir) { m->path = "/var/tmp"; m->mode = PRIVATE_VAR_TMP; m++; @@ -265,28 +251,172 @@ int setup_namespace(char** read_write_dirs, for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto undo_mounts; + goto fail; } for (m = mounts; m < mounts + n; ++m) { r = make_read_only(m); if (r < 0) - goto undo_mounts; + goto fail; } /* Remount / as the desired mode */ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { r = -errno; - goto undo_mounts; + goto fail; } return 0; -undo_mounts: - for (m = mounts; m < mounts + n; ++m) { +fail: + for (m = mounts; m < mounts + n; ++m) if (m->done) umount2(m->path, MNT_DETACH); + + return r; +} + +static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { + _cleanup_free_ char *x = NULL; + + assert(id); + assert(prefix); + assert(path); + + x = strjoin(prefix, "/systemd-", id, "-XXXXXX", NULL); + if (!x) + return -ENOMEM; + + RUN_WITH_UMASK(0077) + if (!mkdtemp(x)) + return -errno; + + RUN_WITH_UMASK(0000) { + char *y; + + y = strappenda(x, "/tmp"); + + if (mkdir(y, 0777 | S_ISVTX) < 0) + return -errno; } + *path = x; + x = NULL; + + return 0; +} + +int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) { + char *a, *b; + int r; + + assert(id); + assert(tmp_dir); + assert(var_tmp_dir); + + r = setup_one_tmp_dir(id, "/tmp", &a); + if (r < 0) + return r; + + r = setup_one_tmp_dir(id, "/var/tmp", &b); + if (r < 0) { + char *t; + + t = strappenda(a, "/tmp"); + rmdir(t); + rmdir(a); + + free(a); + return r; + } + + *tmp_dir = a; + *var_tmp_dir = b; + + return 0; +} + +int setup_netns(int netns_storage_socket[2]) { + _cleanup_close_ int netns = -1; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + struct cmsghdr *cmsg; + int r; + + assert(netns_storage_socket); + assert(netns_storage_socket[0] >= 0); + assert(netns_storage_socket[1] >= 0); + + /* We use the passed socketpair as a storage buffer for our + * namespace socket. Whatever process runs this first shall + * create a new namespace, all others should just join it. To + * serialize that we use a file lock on the socket pair. + * + * It's a bit crazy, but hey, works great! */ + + if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) { + if (errno != EAGAIN) { + r = -errno; + goto fail; + } + + /* Nothing stored yet, so let's create a new namespace */ + + if (unshare(CLONE_NEWNET) < 0) { + r = -errno; + goto fail; + } + + loopback_setup(); + + netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (netns < 0) { + r = -errno; + goto fail; + } + + r = 1; + } else { + /* Yay, found something, so let's join the namespace */ + + for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int))); + netns = *(int*) CMSG_DATA(cmsg); + } + } + + if (setns(netns, CLONE_NEWNET) < 0) { + r = -errno; + goto fail; + } + + r = 0; + } + + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg), &netns, sizeof(int)); + mh.msg_controllen = cmsg->cmsg_len; + + if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) { + r = -errno; + goto fail; + } + +fail: + lockf(netns_storage_socket[0], F_ULOCK, 0); + return r; } diff --git a/src/core/namespace.h b/src/core/namespace.h index ee7416b7b..af0c01df9 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -23,14 +23,15 @@ #include -int setup_tmpdirs(const char *unit_id, - char **tmp_dir, - char **var_tmp_dir); - int setup_namespace(char **read_write_dirs, char **read_only_dirs, char **inaccessible_dirs, char *tmp_dir, char *var_tmp_dir, - bool private_tmp, unsigned mount_flags); + +int setup_tmp_dirs(const char *id, + char **tmp_dir, + char **var_tmp_dir); + +int setup_netns(int netns_storage_socket[2]); diff --git a/src/core/service.c b/src/core/service.c index 7c5d5d868..5b41c3638 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -304,7 +304,8 @@ static void service_done(Unit *u) { s->status_text = NULL; cgroup_context_done(&s->cgroup_context); - exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager)); + exec_context_done(&s->exec_context); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX); s->control_command = NULL; s->main_command = NULL; @@ -1751,6 +1752,10 @@ static int service_spawn( unit_realize_cgroup(UNIT(s)); + r = unit_setup_exec_runtime(UNIT(s)); + if (r < 0) + goto fail; + if (pass_fds || s->exec_context.std_input == EXEC_INPUT_SOCKET || s->exec_context.std_output == EXEC_OUTPUT_SOCKET || @@ -1834,6 +1839,7 @@ static int service_spawn( path, UNIT(s)->id, s->type == SERVICE_IDLE ? UNIT(s)->manager->idle_pipe : NULL, + s->exec_runtime, &pid); if (r < 0) goto fail; @@ -1932,7 +1938,8 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) s->forbid_restart = false; /* we want fresh tmpdirs in case service is started again immediately */ - exec_context_tmp_dirs_done(&s->exec_context); + exec_runtime_destroy(s->exec_runtime); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); /* Try to delete the pid file. At this point it will be * out-of-date, and some software might be confused by it, so @@ -2642,12 +2649,6 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp); - if (s->exec_context.tmp_dir) - unit_serialize_item(u, f, "tmp-dir", s->exec_context.tmp_dir); - - if (s->exec_context.var_tmp_dir) - unit_serialize_item(u, f, "var-tmp-dir", s->exec_context.var_tmp_dir); - if (s->forbid_restart) unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart)); @@ -2771,23 +2772,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp); else if (streq(key, "watchdog-timestamp")) dual_timestamp_deserialize(value, &s->watchdog_timestamp); - else if (streq(key, "tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - s->exec_context.tmp_dir = t; - } else if (streq(key, "var-tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - s->exec_context.var_tmp_dir = t; - } else if (streq(key, "forbid-restart")) { + else if (streq(key, "forbid-restart")) { int b; b = parse_boolean(value); @@ -3835,6 +3820,7 @@ const UnitVTable service_vtable = { .exec_context_offset = offsetof(Service, exec_context), .cgroup_context_offset = offsetof(Service, cgroup_context), .kill_context_offset = offsetof(Service, kill_context), + .exec_runtime_offset = offsetof(Service, exec_runtime), .sections = "Unit\0" diff --git a/src/core/service.h b/src/core/service.h index 5c5379175..b5750244a 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -154,6 +154,9 @@ struct Service { /* The ID of the control command currently being executed */ ServiceExecCommand control_command_id; + /* Runtime data of the execution context */ + ExecRuntime *exec_runtime; + pid_t main_pid, control_pid; int socket_fd; diff --git a/src/core/socket.c b/src/core/socket.c index eae946578..9e14ffd29 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -134,9 +134,9 @@ static void socket_done(Unit *u) { socket_free_ports(s); - exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager)); - cgroup_context_init(&s->cgroup_context); - + cgroup_context_done(&s->cgroup_context); + exec_context_done(&s->exec_context); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX); s->control_command = NULL; @@ -1232,6 +1232,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { unit_realize_cgroup(UNIT(s)); + r = unit_setup_exec_runtime(UNIT(s)); + if (r < 0) + goto fail; + r = socket_arm_timer(s); if (r < 0) goto fail; @@ -1253,6 +1257,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { UNIT(s)->cgroup_path, UNIT(s)->id, NULL, + s->exec_runtime, &pid); strv_free(argv); @@ -1280,7 +1285,9 @@ static void socket_enter_dead(Socket *s, SocketResult f) { if (f != SOCKET_SUCCESS) s->result = f; - exec_context_tmp_dirs_done(&s->exec_context); + exec_runtime_destroy(s->exec_runtime); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); + socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); } @@ -1736,11 +1743,12 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { if (p->fd < 0) continue; - if ((copy = fdset_put_dup(fds, p->fd)) < 0) + copy = fdset_put_dup(fds, p->fd); + if (copy < 0) return copy; if (p->type == SOCKET_SOCKET) { - char *t; + _cleanup_free_ char *t = NULL; r = socket_address_print(&p->address, &t); if (r < 0) @@ -1750,7 +1758,7 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { unit_serialize_item_format(u, f, "netlink", "%i %s", copy, t); else unit_serialize_item_format(u, f, "socket", "%i %i %s", copy, p->address.type, t); - free(t); + } else if (p->type == SOCKET_SPECIAL) unit_serialize_item_format(u, f, "special", "%i %s", copy, p->path); else if (p->type == SOCKET_MQUEUE) @@ -1761,8 +1769,6 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { } } - exec_context_serialize(&s->exec_context, UNIT(s), f); - return 0; } @@ -1922,22 +1928,6 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, p->fd = fdset_remove(fds, fd); } } - } else if (streq(key, "tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - s->exec_context.tmp_dir = t; - } else if (streq(key, "var-tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - s->exec_context.var_tmp_dir = t; } else log_debug_unit(UNIT(s)->id, "Unknown serialization key '%s'", key); @@ -2428,6 +2418,7 @@ const UnitVTable socket_vtable = { .exec_context_offset = offsetof(Socket, exec_context), .cgroup_context_offset = offsetof(Socket, cgroup_context), .kill_context_offset = offsetof(Socket, kill_context), + .exec_runtime_offset = offsetof(Socket, exec_runtime), .sections = "Unit\0" diff --git a/src/core/socket.h b/src/core/socket.h index db0796c4a..076a18369 100644 --- a/src/core/socket.h +++ b/src/core/socket.h @@ -105,6 +105,7 @@ struct Socket { ExecContext exec_context; KillContext kill_context; CGroupContext cgroup_context; + ExecRuntime *exec_runtime; /* For Accept=no sockets refers to the one service we'll activate. For Accept=yes sockets is either NULL, or filled diff --git a/src/core/swap.c b/src/core/swap.c index 4e65c701f..1d02eb297 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -155,12 +155,12 @@ static void swap_done(Unit *u) { free(s->parameters_fragment.what); s->parameters_fragment.what = NULL; - exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager)); + cgroup_context_done(&s->cgroup_context); + exec_context_done(&s->exec_context); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX); s->control_command = NULL; - cgroup_context_done(&s->cgroup_context); - swap_unwatch_control_pid(s); s->timer_event_source = sd_event_source_unref(s->timer_event_source); @@ -625,6 +625,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { unit_realize_cgroup(UNIT(s)); + r = unit_setup_exec_runtime(UNIT(s)); + if (r < 0) + goto fail; + r = swap_arm_timer(s); if (r < 0) goto fail; @@ -642,6 +646,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { UNIT(s)->cgroup_path, UNIT(s)->id, NULL, + s->exec_runtime, &pid); if (r < 0) goto fail; @@ -667,7 +672,9 @@ static void swap_enter_dead(Swap *s, SwapResult f) { if (f != SWAP_SUCCESS) s->result = f; - exec_context_tmp_dirs_done(&s->exec_context); + exec_runtime_destroy(s->exec_runtime); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); + swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); } @@ -867,8 +874,6 @@ static int swap_serialize(Unit *u, FILE *f, FDSet *fds) { if (s->control_command_id >= 0) unit_serialize_item(u, f, "control-command", swap_exec_command_to_string(s->control_command_id)); - exec_context_serialize(&s->exec_context, UNIT(s), f); - return 0; } @@ -912,22 +917,6 @@ static int swap_deserialize_item(Unit *u, const char *key, const char *value, FD s->control_command_id = id; s->control_command = s->exec_command + id; } - } else if (streq(key, "tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - s->exec_context.tmp_dir = t; - } else if (streq(key, "var-tmp-dir")) { - char *t; - - t = strdup(value); - if (!t) - return log_oom(); - - s->exec_context.var_tmp_dir = t; } else log_debug_unit(u->id, "Unknown serialization key '%s'", key); @@ -1420,6 +1409,7 @@ const UnitVTable swap_vtable = { .exec_context_offset = offsetof(Swap, exec_context), .cgroup_context_offset = offsetof(Swap, cgroup_context), .kill_context_offset = offsetof(Swap, kill_context), + .exec_runtime_offset = offsetof(Swap, exec_runtime), .sections = "Unit\0" diff --git a/src/core/swap.h b/src/core/swap.h index 3005abb2d..f2ae49b1d 100644 --- a/src/core/swap.h +++ b/src/core/swap.h @@ -98,6 +98,8 @@ struct Swap { KillContext kill_context; CGroupContext cgroup_context; + ExecRuntime *exec_runtime; + SwapState state, deserialized_state; ExecCommand* control_command; diff --git a/src/core/unit.c b/src/core/unit.c index f4d60bcf3..d82408ccd 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -49,6 +49,7 @@ #include "fileio-label.h" #include "bus-errors.h" #include "dbus.h" +#include "execute.h" const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = { [UNIT_SERVICE] = &service_vtable, @@ -1745,6 +1746,7 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen [UNIT_TRIGGERED_BY] = UNIT_TRIGGERS, [UNIT_PROPAGATES_RELOAD_TO] = UNIT_RELOAD_PROPAGATED_FROM, [UNIT_RELOAD_PROPAGATED_FROM] = UNIT_PROPAGATES_RELOAD_TO, + [UNIT_JOINS_NAMESPACE_OF] = UNIT_JOINS_NAMESPACE_OF, }; int r, q = 0, v = 0, w = 0; @@ -1760,34 +1762,47 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen if (u == other) return 0; - if ((r = set_ensure_allocated(&u->dependencies[d], trivial_hash_func, trivial_compare_func)) < 0) + r = set_ensure_allocated(&u->dependencies[d], trivial_hash_func, trivial_compare_func); + if (r < 0) return r; - if (inverse_table[d] != _UNIT_DEPENDENCY_INVALID) - if ((r = set_ensure_allocated(&other->dependencies[inverse_table[d]], trivial_hash_func, trivial_compare_func)) < 0) + if (inverse_table[d] != _UNIT_DEPENDENCY_INVALID) { + r = set_ensure_allocated(&other->dependencies[inverse_table[d]], trivial_hash_func, trivial_compare_func); + if (r < 0) + return r; + } + + if (add_reference) { + r = set_ensure_allocated(&u->dependencies[UNIT_REFERENCES], trivial_hash_func, trivial_compare_func); + if (r < 0) return r; - if (add_reference) - if ((r = set_ensure_allocated(&u->dependencies[UNIT_REFERENCES], trivial_hash_func, trivial_compare_func)) < 0 || - (r = set_ensure_allocated(&other->dependencies[UNIT_REFERENCED_BY], trivial_hash_func, trivial_compare_func)) < 0) + r = set_ensure_allocated(&other->dependencies[UNIT_REFERENCED_BY], trivial_hash_func, trivial_compare_func); + if (r < 0) return r; + } - if ((q = set_put(u->dependencies[d], other)) < 0) + q = set_put(u->dependencies[d], other); + if (q < 0) return q; - if (inverse_table[d] != _UNIT_DEPENDENCY_INVALID) - if ((v = set_put(other->dependencies[inverse_table[d]], u)) < 0) { + if (inverse_table[d] != _UNIT_DEPENDENCY_INVALID && inverse_table[d] != d) { + v = set_put(other->dependencies[inverse_table[d]], u); + if (v < 0) { r = v; goto fail; } + } if (add_reference) { - if ((w = set_put(u->dependencies[UNIT_REFERENCES], other)) < 0) { + w = set_put(u->dependencies[UNIT_REFERENCES], other); + if (w < 0) { r = w; goto fail; } - if ((r = set_put(other->dependencies[UNIT_REFERENCED_BY], u)) < 0) + r = set_put(other->dependencies[UNIT_REFERENCED_BY], u); + if (r < 0) goto fail; } @@ -2082,6 +2097,7 @@ bool unit_can_serialize(Unit *u) { } int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { + ExecRuntime *rt; int r; assert(u); @@ -2095,17 +2111,11 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { if (r < 0) return r; - - if (serialize_jobs) { - if (u->job) { - fprintf(f, "job\n"); - job_serialize(u->job, f, fds); - } - - if (u->nop_job) { - fprintf(f, "job\n"); - job_serialize(u->nop_job, f, fds); - } + rt = unit_get_exec_runtime(u); + if (rt) { + r = exec_runtime_serialize(rt, u, f, fds); + if (r < 0) + return r; } dual_timestamp_serialize(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp); @@ -2122,6 +2132,18 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { if (u->cgroup_path) unit_serialize_item(u, f, "cgroup", u->cgroup_path); + if (serialize_jobs) { + if (u->job) { + fprintf(f, "job\n"); + job_serialize(u->job, f, fds); + } + + if (u->nop_job) { + fprintf(f, "job\n"); + job_serialize(u->nop_job, f, fds); + } + } + /* End marker */ fputc('\n', f); return 0; @@ -2155,6 +2177,8 @@ void unit_serialize_item(Unit *u, FILE *f, const char *key, const char *value) { } int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { + size_t offset; + ExecRuntime **rt; int r; assert(u); @@ -2164,6 +2188,10 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { if (!unit_can_serialize(u)) return 0; + offset = UNIT_VTABLE(u)->exec_runtime_offset; + if (offset > 0) + rt = (ExecRuntime**) ((uint8_t*) u + offset); + for (;;) { char line[LINE_MAX], *l, *v; size_t k; @@ -2276,6 +2304,14 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; } + if (rt) { + r = exec_runtime_deserialize_item(rt, u, l, v, fds); + if (r < 0) + return r; + if (r > 0) + continue; + } + r = UNIT_VTABLE(u)->deserialize_item(u, l, v, fds); if (r < 0) return r; @@ -2660,6 +2696,16 @@ CGroupContext *unit_get_cgroup_context(Unit *u) { return (CGroupContext*) ((uint8_t*) u + offset); } +ExecRuntime *unit_get_exec_runtime(Unit *u) { + size_t offset; + + offset = UNIT_VTABLE(u)->exec_runtime_offset; + if (offset <= 0) + return NULL; + + return *(ExecRuntime**) ((uint8_t*) u + offset); +} + static int drop_in_file(Unit *u, UnitSetPropertiesMode mode, const char *name, char **_p, char **_q) { _cleanup_free_ char *b = NULL; char *p, *q; @@ -3010,6 +3056,33 @@ int unit_require_mounts_for(Unit *u, const char *path) { return 0; } +int unit_setup_exec_runtime(Unit *u) { + ExecRuntime **rt; + size_t offset; + Iterator i; + Unit *other; + + offset = UNIT_VTABLE(u)->exec_runtime_offset; + assert(offset > 0); + + /* Check if ther already is an ExecRuntime for this unit? */ + rt = (ExecRuntime**) ((uint8_t*) u + offset); + if (*rt) + return 0; + + /* Try to get it from somebody else */ + SET_FOREACH(other, u->dependencies[UNIT_JOINS_NAMESPACE_OF], i) { + + *rt = unit_get_exec_runtime(other); + if (*rt) { + exec_runtime_ref(*rt); + return 0; + } + } + + return exec_runtime_make(rt, unit_get_exec_context(u), u->id); +} + static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = { [UNIT_ACTIVE] = "active", [UNIT_RELOADING] = "reloading", @@ -3045,6 +3118,7 @@ static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = { [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom", [UNIT_REFERENCES] = "References", [UNIT_REFERENCED_BY] = "ReferencedBy", + [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf", }; DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency); diff --git a/src/core/unit.h b/src/core/unit.h index a6dbe8ddb..299ded66a 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -106,6 +106,9 @@ enum UnitDependency { UNIT_PROPAGATES_RELOAD_TO, UNIT_RELOAD_PROPAGATED_FROM, + /* Joins namespace of */ + UNIT_JOINS_NAMESPACE_OF, + /* Reference information for GC logic */ UNIT_REFERENCES, /* Inverse of 'references' is 'referenced_by' */ UNIT_REFERENCED_BY, @@ -301,6 +304,11 @@ struct UnitVTable { * KillContext is found, if the unit type has that */ size_t kill_context_offset; + /* If greater than 0, the offset into the object where the + * pointer to ExecRuntime is found, if the unit type has + * that */ + size_t exec_runtime_offset; + /* The name of the configuration file section with the private settings of this unit*/ const char *private_section; @@ -586,6 +594,9 @@ int unit_exec_context_defaults(Unit *u, ExecContext *c); ExecContext *unit_get_exec_context(Unit *u) _pure_; KillContext *unit_get_kill_context(Unit *u) _pure_; CGroupContext *unit_get_cgroup_context(Unit *u) _pure_; +ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_; + +int unit_setup_exec_runtime(Unit *u); int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data); int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *format, ...) _printf_(4,5); diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index a1890aa85..61fb81381 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -399,7 +399,6 @@ static int terminate_machine(sd_bus *bus, char **args, unsigned n) { static int openpt_in_namespace(pid_t pid, int flags) { _cleanup_close_ int nsfd = -1, rootfd = -1; - _cleanup_free_ char *ns = NULL, *root = NULL; _cleanup_close_pipe_ int sock[2] = { -1, -1 }; union { struct cmsghdr cmsghdr; @@ -411,20 +410,17 @@ static int openpt_in_namespace(pid_t pid, int flags) { }; struct cmsghdr *cmsg; int master = -1, r; + char *ns, *root; pid_t child; siginfo_t si; - r = asprintf(&ns, "/proc/%lu/ns/mnt", (unsigned long) pid); - if (r < 0) - return -ENOMEM; + ns = procfs_file_alloca(pid, "ns/mnt"); nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (nsfd < 0) return -errno; - r = asprintf(&root, "/proc/%lu/root", (unsigned long) pid); - if (r < 0) - return -ENOMEM; + root = procfs_file_alloca(pid, "root"); rootfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); if (rootfd < 0) diff --git a/src/shared/util.c b/src/shared/util.c index 3a4d1965a..d31bd5294 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -5705,56 +5705,6 @@ int search_and_fopen_nulstr(const char *path, const char *mode, const char *sear return search_and_fopen_internal(path, mode, s, _f); } -int create_tmp_dir(char template[], char** dir_name) { - int r = 0; - char *d = NULL, *dt; - - assert(dir_name); - - RUN_WITH_UMASK(0077) { - d = mkdtemp(template); - } - if (!d) { - log_error("Can't create directory %s: %m", template); - return -errno; - } - - dt = strjoin(d, "/tmp", NULL); - if (!dt) { - r = log_oom(); - goto fail3; - } - - RUN_WITH_UMASK(0000) { - r = mkdir(dt, 0777); - } - if (r < 0) { - log_error("Can't create directory %s: %m", dt); - r = -errno; - goto fail2; - } - log_debug("Created temporary directory %s", dt); - - r = chmod(dt, 0777 | S_ISVTX); - if (r < 0) { - log_error("Failed to chmod %s: %m", dt); - r = -errno; - goto fail1; - } - log_debug("Set sticky bit on %s", dt); - - *dir_name = dt; - - return 0; -fail1: - rmdir(dt); -fail2: - free(dt); -fail3: - rmdir(template); - return r; -} - char *strextend(char **x, ...) { va_list ap; size_t f, l; diff --git a/src/shared/util.h b/src/shared/util.h index e46438c92..1662bd445 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -636,7 +636,6 @@ int on_ac_power(void); int search_and_fopen(const char *path, const char *mode, const char **search, FILE **_f); int search_and_fopen_nulstr(const char *path, const char *mode, const char *search, FILE **_f); -int create_tmp_dir(char template[], char** dir_name); #define FOREACH_LINE(line, f, on_error) \ for (;;) \ diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 1c04676cc..6454a1bd6 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -20,27 +20,102 @@ ***/ #include +#include #include "namespace.h" #include "util.h" static void test_tmpdir(const char *id, const char *A, const char *B) { _cleanup_free_ char *a, *b; + struct stat x, y; + char *c, *d; - assert_se(setup_tmpdirs(id, &a, &b) == 0); - assert(startswith(a, A)); - assert(startswith(b, B)); - assert(access(a, F_OK) == 0); - assert(access(b, F_OK) == 0); + assert_se(setup_tmp_dirs(id, &a, &b) == 0); + assert_se(startswith(a, A)); + assert_se(startswith(b, B)); - assert_se(rmdir(a) == 0); - assert_se(rmdir(b) == 0); + assert_se(stat(a, &x) >= 0); + assert_se(stat(b, &y) >= 0); - assert(endswith(a, "/tmp")); - assert(endswith(b, "/tmp")); + assert_se(S_ISDIR(x.st_mode)); + assert_se(S_ISDIR(y.st_mode)); - assert_se(rmdir(dirname(a)) == 0); - assert_se(rmdir(dirname(b)) == 0); + assert_se((x.st_mode & 01777) == 0700); + assert_se((y.st_mode & 01777) == 0700); + + c = strappenda(a, "/tmp"); + d = strappenda(b, "/tmp"); + + assert_se(stat(c, &x) >= 0); + assert_se(stat(d, &y) >= 0); + + assert_se(S_ISDIR(x.st_mode)); + assert_se(S_ISDIR(y.st_mode)); + + assert_se((x.st_mode & 01777) == 01777); + assert_se((y.st_mode & 01777) == 01777); + + assert_se(rmdir(c) >= 0); + assert_se(rmdir(d) >= 0); + + assert_se(rmdir(a) >= 0); + assert_se(rmdir(b) >= 0); +} + +static void test_netns(void) { + _cleanup_close_pipe_ int s[2] = { -1, -1 }; + pid_t pid1, pid2, pid3; + int r, n = 0; + siginfo_t si; + + if (geteuid() > 0) + return; + + assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, s) >= 0); + + pid1 = fork(); + assert_se(pid1 >= 0); + + if (pid1 == 0) { + r = setup_netns(s); + assert_se(r >= 0); + _exit(r); + } + + pid2 = fork(); + assert_se(pid2 >= 0); + + if (pid2 == 0) { + r = setup_netns(s); + assert_se(r >= 0); + exit(r); + } + + pid3 = fork(); + assert_se(pid3 >= 0); + + if (pid3 == 0) { + r = setup_netns(s); + assert_se(r >= 0); + exit(r); + } + + r = wait_for_terminate(pid1, &si); + assert_se(r >= 0); + assert_se(si.si_code == CLD_EXITED); + n += si.si_status; + + r = wait_for_terminate(pid2, &si); + assert_se(r >= 0); + assert_se(si.si_code == CLD_EXITED); + n += si.si_status; + + r = wait_for_terminate(pid3, &si); + assert_se(r >= 0); + assert_se(si.si_code == CLD_EXITED); + n += si.si_status; + + assert_se(n == 1); } int main(int argc, char *argv[]) { @@ -52,5 +127,7 @@ int main(int argc, char *argv[]) { "/tmp/systemd-sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device-", "/var/tmp/systemd-sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device-"); + test_netns(); + return 0; } diff --git a/src/test/test-ns.c b/src/test/test-ns.c index ad0d0419c..37d0998e5 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -59,7 +59,6 @@ int main(int argc, char *argv[]) { (char **) inaccessible, tmp_dir, var_tmp_dir, - true, 0); if (r < 0) { log_error("Failed to setup namespace: %s", strerror(-r)); -- 2.30.2