chiark / gitweb /
core: drop CAP_MKNOD when PrivateDevices= is set
[elogind.git] / src / core / unit.c
index 0ad679ba622f6154f742a9cacee2a794c1872a24..20b139d31be6446aeb2090332a952337d6843988 100644 (file)
@@ -50,6 +50,7 @@
 #include "bus-errors.h"
 #include "dbus.h"
 #include "execute.h"
+#include "virt.h"
 
 const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {
         [UNIT_SERVICE] = &service_vtable,
@@ -258,9 +259,6 @@ int unit_set_description(Unit *u, const char *description) {
 bool unit_check_gc(Unit *u) {
         assert(u);
 
-        if (u->load_state == UNIT_STUB)
-                return true;
-
         if (UNIT_VTABLE(u)->no_gc)
                 return true;
 
@@ -330,7 +328,8 @@ void unit_add_to_dbus_queue(Unit *u) {
                 return;
 
         /* Shortcut things if nobody cares */
-        if (set_isempty(u->manager->subscribed)) {
+        if (sd_bus_track_count(u->manager->subscribed) <= 0 &&
+            set_isempty(u->manager->private_buses)) {
                 u->sent_dbus_new_signal = true;
                 return;
         }
@@ -472,6 +471,8 @@ void unit_free(Unit *u) {
                 free(u->cgroup_path);
         }
 
+        set_remove(u->manager->failed_units, u);
+
         free(u->description);
         strv_free(u->documentation);
         free(u->fragment_path);
@@ -481,6 +482,8 @@ void unit_free(Unit *u) {
 
         set_free_free(u->names);
 
+        unit_unwatch_all_pids(u);
+
         condition_free_list(u->conditions);
 
         unit_ref_unset(&u->slice);
@@ -775,7 +778,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, strna(unit_slice_name(u)),
                 prefix, strna(u->cgroup_path),
                 prefix, yes_no(u->cgroup_realized),
-                prefix, u->cgroup_mask,
+                prefix, u->cgroup_realized_mask,
                 prefix, u->cgroup_members_mask);
 
         SET_FOREACH(t, u->names, i)
@@ -944,7 +947,7 @@ int unit_add_default_target_dependency(Unit *u, Unit *target) {
         return unit_add_dependency(target, UNIT_AFTER, u, true);
 }
 
-static int unit_add_default_dependencies(Unit *u) {
+static int unit_add_target_dependencies(Unit *u) {
 
         static const UnitDependency deps[] = {
                 UNIT_REQUIRED_BY,
@@ -955,8 +958,8 @@ static int unit_add_default_dependencies(Unit *u) {
 
         Unit *target;
         Iterator i;
-        int r;
         unsigned k;
+        int r = 0;
 
         assert(u);
 
@@ -967,20 +970,22 @@ static int unit_add_default_dependencies(Unit *u) {
                                 return r;
                 }
 
-        if (u->default_dependencies && unit_get_cgroup_context(u)) {
-                if (UNIT_ISSET(u->slice))
-                        r = unit_add_two_dependencies(u, UNIT_AFTER, UNIT_WANTS, UNIT_DEREF(u->slice), true);
-                else
-                        r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_WANTS, SPECIAL_ROOT_SLICE, NULL, true);
+        return r;
+}
 
-                if (r < 0)
-                        return r;
-        }
+static int unit_add_slice_dependencies(Unit *u) {
+        assert(u);
 
-        return 0;
+        if (!unit_get_cgroup_context(u))
+                return 0;
+
+        if (UNIT_ISSET(u->slice))
+                return unit_add_two_dependencies(u, UNIT_AFTER, UNIT_WANTS, UNIT_DEREF(u->slice), true);
+
+        return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_WANTS, SPECIAL_ROOT_SLICE, NULL, true);
 }
 
-static int unit_add_mount_links(Unit *u) {
+static int unit_add_mount_dependencies(Unit *u) {
         char **i;
         int r;
 
@@ -1047,27 +1052,25 @@ int unit_load(Unit *u) {
 
         if (u->load_state == UNIT_LOADED) {
 
-                if (u->default_dependencies) {
-                        r = unit_add_default_dependencies(u);
-                        if (r < 0)
-                                goto fail;
-                }
-
-                unit_update_member_masks(u);
-
-                r = unit_add_mount_links(u);
+                r = unit_add_target_dependencies(u);
                 if (r < 0)
                         goto fail;
 
-                if (u->on_failure_job_mode == JOB_ISOLATE &&
-                    set_size(u->dependencies[UNIT_ON_FAILURE]) > 1) {
+                r = unit_add_slice_dependencies(u);
+                if (r < 0)
+                        goto fail;
 
-                        log_error_unit(u->id,
-                                       "More than one OnFailure= dependencies specified for %s but OnFailureJobMode=isolate set. Refusing.", u->id);
+                r = unit_add_mount_dependencies(u);
+                if (r < 0)
+                        goto fail;
 
+                if (u->on_failure_job_mode == JOB_ISOLATE && set_size(u->dependencies[UNIT_ON_FAILURE]) > 1) {
+                        log_error_unit(u->id, "More than one OnFailure= dependencies specified for %s but OnFailureJobMode=isolate set. Refusing.", u->id);
                         r = -EINVAL;
                         goto fail;
                 }
+
+                unit_update_cgroup_members_masks(u);
         }
 
         assert((u->load_state != UNIT_MERGED) == !u->merged_into);
@@ -1137,8 +1140,6 @@ _pure_ static const char *unit_get_status_message_format_try_harder(Unit *u, Job
         return NULL;
 }
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat-nonliteral"
 static void unit_status_print_starting_stopping(Unit *u, JobType t) {
         const char *format;
 
@@ -1151,12 +1152,11 @@ static void unit_status_print_starting_stopping(Unit *u, JobType t) {
         if (!format)
                 return;
 
+        DISABLE_WARNING_FORMAT_NONLITERAL;
         unit_status_printf(u, "", format);
+        REENABLE_WARNING;
 }
-#pragma GCC diagnostic pop
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat-nonliteral"
 static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
         const char *format;
         char buf[LINE_MAX];
@@ -1176,8 +1176,10 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
         if (!format)
                 return;
 
+        DISABLE_WARNING_FORMAT_NONLITERAL;
         snprintf(buf, sizeof(buf), format, unit_description(u));
         char_array_0(buf);
+        REENABLE_WARNING;
 
         mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING :
               t == JOB_STOP  ? SD_MESSAGE_UNIT_STOPPING :
@@ -1189,7 +1191,6 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
                         "MESSAGE=%s", buf,
                         NULL);
 }
-#pragma GCC diagnostic pop
 
 /* Errors:
  *         -EBADR:     This unit type does not support starting.
@@ -1508,6 +1509,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
 
         m = u->manager;
 
+        /* Update timestamps for state changes */
         if (m->n_reloading <= 0) {
                 dual_timestamp ts;
 
@@ -1524,14 +1526,23 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
                         u->active_exit_timestamp = ts;
         }
 
+        /* Keep track of failed of units */
+        if (ns == UNIT_FAILED && os != UNIT_FAILED)
+                set_put(u->manager->failed_units, u);
+        else if (os == UNIT_FAILED && ns != UNIT_FAILED)
+                set_remove(u->manager->failed_units, u);
+
+        /* Make sure the cgroup is always removed when we become inactive */
         if (UNIT_IS_INACTIVE_OR_FAILED(ns))
                 unit_destroy_cgroup(u);
 
         /* Note that this doesn't apply to RemainAfterExit services exiting
-         * sucessfully, since there's no change of state in that case. Which is
+         * successfully, since there's no change of state in that case. Which is
          * why it is handled in service_set_state() */
         if (UNIT_IS_INACTIVE_OR_FAILED(os) != UNIT_IS_INACTIVE_OR_FAILED(ns)) {
-                ExecContext *ec = unit_get_exec_context(u);
+                ExecContext *ec;
+
+                ec = unit_get_exec_context(u);
                 if (ec && exec_context_may_touch_console(ec)) {
                         if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
                                 m->n_on_console --;
@@ -1627,12 +1638,11 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
                 }
 
                 /* stop unneeded units regardless if going down was expected or not */
-                if (UNIT_IS_ACTIVE_OR_ACTIVATING(os) && UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
+                if (UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
                         check_unneeded_dependencies(u);
 
                 if (ns != os && ns == UNIT_FAILED) {
-                        log_notice_unit(u->id,
-                                        "Unit %s entered failed state.", u->id);
+                        log_notice_unit(u->id, "Unit %s entered failed state.", u->id);
                         unit_start_on_failure(u);
                 }
         }
@@ -1696,20 +1706,136 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
 }
 
 int unit_watch_pid(Unit *u, pid_t pid) {
+        int q, r;
+
         assert(u);
         assert(pid >= 1);
 
-        /* Watch a specific PID. We only support one unit watching
-         * each PID for now. */
+        /* Watch a specific PID. We only support one or two units
+         * watching each PID for now, not more. */
+
+        r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
+        r = hashmap_ensure_allocated(&u->manager->watch_pids1, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
+        r = hashmap_put(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+        if (r == -EEXIST) {
+                r = hashmap_ensure_allocated(&u->manager->watch_pids2, trivial_hash_func, trivial_compare_func);
+                if (r < 0)
+                        return r;
+
+                r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
+        }
+
+        q = set_put(u->pids, LONG_TO_PTR(pid));
+        if (q < 0)
+                return q;
 
-        return hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        return r;
 }
 
 void unit_unwatch_pid(Unit *u, pid_t pid) {
         assert(u);
         assert(pid >= 1);
 
-        hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+        hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
+        set_remove(u->pids, LONG_TO_PTR(pid));
+}
+
+void unit_unwatch_all_pids(Unit *u) {
+        assert(u);
+
+        while (!set_isempty(u->pids))
+                unit_unwatch_pid(u, PTR_TO_LONG(set_first(u->pids)));
+
+        set_free(u->pids);
+        u->pids = NULL;
+}
+
+static int unit_watch_pids_in_path(Unit *u, const char *path) {
+        _cleanup_closedir_ DIR *d = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        int ret = 0, r;
+
+        assert(u);
+        assert(path);
+
+        /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */
+
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
+        if (r >= 0) {
+                pid_t pid;
+
+                while ((r = cg_read_pid(f, &pid)) > 0) {
+                        r = unit_watch_pid(u, pid);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+        if (r >= 0) {
+                char *fn;
+
+                while ((r = cg_read_subgroup(d, &fn)) > 0) {
+                        _cleanup_free_ char *p = NULL;
+
+                        p = strjoin(path, "/", fn, NULL);
+                        free(fn);
+
+                        if (!p)
+                                return -ENOMEM;
+
+                        r = unit_watch_pids_in_path(u, p);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        return ret;
+}
+
+int unit_watch_all_pids(Unit *u) {
+        assert(u);
+
+        /* Adds all PIDs from our cgroup to the set of PIDs we watch */
+
+        if (!u->cgroup_path)
+                return -ENOENT;
+
+        return unit_watch_pids_in_path(u, u->cgroup_path);
+}
+
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        /* Cleans dead PIDs from our list */
+
+        SET_FOREACH(e, u->pids, i) {
+                pid_t pid = PTR_TO_LONG(e);
+
+                if (pid == except1 || pid == except2)
+                        continue;
+
+                if (!pid_is_unwaited(pid))
+                        unit_unwatch_pid(u, pid);
+        }
 }
 
 bool unit_job_is_applicable(Unit *u, JobType j) {
@@ -2399,12 +2525,11 @@ int unit_coldplug(Unit *u) {
         return 0;
 }
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat-nonliteral"
 void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) {
+        DISABLE_WARNING_FORMAT_NONLITERAL;
         manager_status_printf(u->manager, false, status, unit_status_msg_format, unit_description(u));
+        REENABLE_WARNING;
 }
-#pragma GCC diagnostic pop
 
 bool unit_need_daemon_reload(Unit *u) {
         _cleanup_strv_free_ char **t = NULL;
@@ -2657,15 +2782,32 @@ void unit_ref_unset(UnitRef *ref) {
         ref->unit = NULL;
 }
 
-int unit_exec_context_defaults(Unit *u, ExecContext *c) {
+int unit_cgroup_context_init_defaults(Unit *u, CGroupContext *c) {
+        assert(u);
+        assert(c);
+
+        /* Copy in the manager defaults into the cgroup context,
+         * _before_ the rest of the settings have been initialized */
+
+        c->cpu_accounting = u->manager->default_cpu_accounting;
+        c->blockio_accounting = u->manager->default_blockio_accounting;
+        c->memory_accounting = u->manager->default_memory_accounting;
+
+        return 0;
+}
+
+int unit_exec_context_patch_defaults(Unit *u, ExecContext *c) {
         unsigned i;
         int r;
 
         assert(u);
         assert(c);
 
+        /* Patch in the manager defaults into the exec context,
+         * _after_ the rest of the settings have been initialized */
+
         /* This only copies in the ones that need memory */
-        for (i = 0; i < RLIMIT_NLIMITS; i++)
+        for (i = 0; i < _RLIMIT_MAX; i++)
                 if (u->manager->rlimit[i] && !c->rlimit[i]) {
                         c->rlimit[i] = newdup(struct rlimit, u->manager->rlimit[i], 1);
                         if (!c->rlimit[i])
@@ -2680,6 +2822,17 @@ int unit_exec_context_defaults(Unit *u, ExecContext *c) {
                         return r;
         }
 
+        if (u->manager->running_as == SYSTEMD_USER &&
+            (c->syscall_whitelist ||
+             !set_isempty(c->syscall_filter) ||
+             !set_isempty(c->syscall_archs) ||
+             c->address_families_whitelist ||
+             !set_isempty(c->address_families)))
+                c->no_new_privileges = true;
+
+        if (c->private_devices)
+                c->capability_bounding_set_drop |= (uint64_t) 1ULL << (uint64_t) CAP_MKNOD;
+
         return 0;
 }
 
@@ -2734,7 +2887,6 @@ static int drop_in_file(Unit *u, UnitSetPropertiesMode mode, const char *name, c
         assert(name);
         assert(_p);
         assert(_q);
-        assert(mode & (UNIT_PERSISTENT|UNIT_RUNTIME));
 
         b = xescape(name, "/.");
         if (!b)
@@ -2753,7 +2905,7 @@ static int drop_in_file(Unit *u, UnitSetPropertiesMode mode, const char *name, c
                         return -ENOENT;
 
                 p = strjoin(c, "/", u->id, ".d", NULL);
-        } else if (mode & UNIT_PERSISTENT)
+        } else if (mode == UNIT_PERSISTENT && !u->transient)
                 p = strjoin("/etc/systemd/system/", u->id, ".d", NULL);
         else
                 p = strjoin("/run/systemd/system/", u->id, ".d", NULL);
@@ -2779,7 +2931,7 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co
         assert(name);
         assert(data);
 
-        if (!(mode & (UNIT_PERSISTENT|UNIT_RUNTIME)))
+        if (!IN_SET(mode, UNIT_PERSISTENT, UNIT_RUNTIME))
                 return 0;
 
         r = drop_in_file(u, mode, name, &p, &q);
@@ -2799,7 +2951,7 @@ int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *n
         assert(name);
         assert(format);
 
-        if (!(mode & (UNIT_PERSISTENT|UNIT_RUNTIME)))
+        if (!IN_SET(mode, UNIT_PERSISTENT, UNIT_RUNTIME))
                 return 0;
 
         va_start(ap, format);
@@ -2822,7 +2974,7 @@ int unit_write_drop_in_private(Unit *u, UnitSetPropertiesMode mode, const char *
         if (!UNIT_VTABLE(u)->private_section)
                 return -EINVAL;
 
-        if (!(mode & (UNIT_PERSISTENT|UNIT_RUNTIME)))
+        if (!IN_SET(mode, UNIT_PERSISTENT, UNIT_RUNTIME))
                 return 0;
 
         ndata = strjoin("[", UNIT_VTABLE(u)->private_section, "]\n", data, NULL);
@@ -2841,7 +2993,7 @@ int unit_write_drop_in_private_format(Unit *u, UnitSetPropertiesMode mode, const
         assert(name);
         assert(format);
 
-        if (!(mode & (UNIT_PERSISTENT|UNIT_RUNTIME)))
+        if (!IN_SET(mode, UNIT_PERSISTENT, UNIT_RUNTIME))
                 return 0;
 
         va_start(ap, format);
@@ -2860,7 +3012,7 @@ int unit_remove_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name) {
 
         assert(u);
 
-        if (!(mode & (UNIT_PERSISTENT|UNIT_RUNTIME)))
+        if (!IN_SET(mode, UNIT_PERSISTENT, UNIT_RUNTIME))
                 return 0;
 
         r = drop_in_file(u, mode, name, &p, &q);
@@ -2921,7 +3073,7 @@ int unit_kill_context(
                 pid_t control_pid,
                 bool main_pid_alien) {
 
-        int sig, wait_for_exit = 0, r;
+        int sig, wait_for_exit = false, r;
 
         assert(u);
         assert(c);
@@ -2938,12 +3090,12 @@ int unit_kill_context(
                         _cleanup_free_ char *comm = NULL;
                         get_process_comm(main_pid, &comm);
 
-                        log_warning_unit(u->id, "Failed to kill main process %li (%s): %s",
-                                         (long) main_pid, strna(comm), strerror(-r));
+                        log_warning_unit(u->id, "Failed to kill main process " PID_FMT " (%s): %s", main_pid, strna(comm), strerror(-r));
                 } else {
-                        wait_for_exit = !main_pid_alien;
+                        if (!main_pid_alien)
+                                wait_for_exit = true;
 
-                        if (c->send_sighup)
+                        if (c->send_sighup && !sigkill)
                                 kill(main_pid, SIGHUP);
                 }
         }
@@ -2955,13 +3107,11 @@ int unit_kill_context(
                         _cleanup_free_ char *comm = NULL;
                         get_process_comm(control_pid, &comm);
 
-                        log_warning_unit(u->id,
-                                         "Failed to kill control process %li (%s): %s",
-                                         (long) control_pid, strna(comm), strerror(-r));
+                        log_warning_unit(u->id, "Failed to kill control process " PID_FMT " (%s): %s", control_pid, strna(comm), strerror(-r));
                 } else {
                         wait_for_exit = true;
 
-                        if (c->send_sighup)
+                        if (c->send_sighup && !sigkill)
                                 kill(control_pid, SIGHUP);
                 }
         }
@@ -2979,16 +3129,25 @@ int unit_kill_context(
                         if (r != -EAGAIN && r != -ESRCH && r != -ENOENT)
                                 log_warning_unit(u->id, "Failed to kill control group: %s", strerror(-r));
                 } else if (r > 0) {
-                        wait_for_exit = true;
 
-                        if (c->send_sighup) {
+                        /* FIXME: For now, we will not wait for the
+                         * cgroup members to die, simply because
+                         * cgroup notification is unreliable. It
+                         * doesn't work at all in containers, and
+                         * outside of containers it can be confused
+                         * easily by leaving directories in the
+                         * cgroup. */
+
+                        /* wait_for_exit = true; */
+
+                        if (c->send_sighup && !sigkill) {
                                 set_free(pid_set);
 
                                 pid_set = unit_pid_set(main_pid, control_pid);
                                 if (!pid_set)
                                         return -ENOMEM;
 
-                                cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, true, true, false, pid_set);
+                                cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set);
                         }
                 }
         }
@@ -3029,11 +3188,9 @@ int unit_require_mounts_for(Unit *u, const char *path) {
                 return 0;
         }
 
-        r = strv_push(&u->requires_mounts_for, p);
-        if (r < 0) {
-                free(p);
+        r = strv_consume(&u->requires_mounts_for, p);
+        if (r < 0)
                 return r;
-        }
 
         PATH_FOREACH_PREFIX_MORE(prefix, p) {
                 Set *x;
@@ -3134,9 +3291,9 @@ static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = {
         [UNIT_TRIGGERED_BY] = "TriggeredBy",
         [UNIT_PROPAGATES_RELOAD_TO] = "PropagatesReloadTo",
         [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom",
+        [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf",
         [UNIT_REFERENCES] = "References",
         [UNIT_REFERENCED_BY] = "ReferencedBy",
-        [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency);