chiark / gitweb /
main: don't mount cgroup controller unless PID == 1
[elogind.git] / src / core / main.c
index 9d2d55154c46c7cf425063f2d8bbeb07a65caa23..21c0d274c615117f3bf9ba6f3f7cb4d43c3881fc 100644 (file)
 #include "switch-root.h"
 #include "capability.h"
 #include "killall.h"
+#include "env-util.h"
+#include "hwclock.h"
+#include "sd-daemon.h"
 
 #include "mount-setup.h"
 #include "loopback-setup.h"
+#ifdef HAVE_KMOD
 #include "kmod-setup.h"
+#endif
 #include "hostname-setup.h"
 #include "machine-id-setup.h"
 #include "locale-setup.h"
-#include "hwclock.h"
 #include "selinux-setup.h"
 #include "ima-setup.h"
+#include "fileio.h"
+#include "smack-setup.h"
 
 static enum {
         ACTION_RUN,
@@ -73,7 +79,7 @@ static enum {
 } arg_action = ACTION_RUN;
 
 static char *arg_default_unit = NULL;
-static ManagerRunningAs arg_running_as = _MANAGER_RUNNING_AS_INVALID;
+static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
 
 static bool arg_dump_core = true;
 static bool arg_crash_shell = false;
@@ -339,7 +345,8 @@ static int parse_proc_cmdline_word(const char *word) {
                 else
                         arg_default_std_error = r;
         } else if (startswith(word, "systemd.setenv=")) {
-                char *cenv, *eq;
+                _cleanup_free_ char *cenv = NULL;
+                char *eq;
                 int r;
 
                 cenv = strdup(word + 15);
@@ -348,40 +355,58 @@ static int parse_proc_cmdline_word(const char *word) {
 
                 eq = strchr(cenv, '=');
                 if (!eq) {
-                        r = unsetenv(cenv);
-                        if (r < 0)
-                                log_warning("unsetenv failed %m. Ignoring.");
+                        if (!env_name_is_valid(cenv))
+                                log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
+                        else  {
+                                r = unsetenv(cenv);
+                                if (r < 0)
+                                        log_warning("Unsetting environment variable '%s' failed, ignoring: %m", cenv);
+                        }
                 } else {
-                        *eq = 0;
-                        r = setenv(cenv, eq + 1, 1);
-                        if (r < 0)
-                                log_warning("setenv failed %m. Ignoring.");
+                        if (!env_assignment_is_valid(cenv))
+                                log_warning("Environment variable assignment '%s' is not valid. Ignoring.", cenv);
+                        else {
+                                *eq = 0;
+                                r = setenv(cenv, eq + 1, 1);
+                                if (r < 0)
+                                        log_warning("Setting environment variable '%s=%s' failed, ignoring: %m", cenv, eq + 1);
+                        }
                 }
-                free(cenv);
 
         } else if (startswith(word, "systemd.") ||
                    (in_initrd() && startswith(word, "rd.systemd."))) {
 
-                log_warning("Unknown kernel switch %s. Ignoring.", word);
-
-                log_info("Supported kernel switches:\n"
-                         "systemd.unit=UNIT                        Default unit to start\n"
-                         "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
-                         "systemd.dump_core=0|1                    Dump core on crash\n"
-                         "systemd.crash_shell=0|1                  Run shell on crash\n"
-                         "systemd.crash_chvt=N                     Change to VT #N on crash\n"
-                         "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
-                         "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
-                         "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
-                         "                                         Log target\n"
-                         "systemd.log_level=LEVEL                  Log level\n"
-                         "systemd.log_color=0|1                    Highlight important log messages\n"
-                         "systemd.log_location=0|1                 Include code location in log messages\n"
-                         "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
-                         "                                         Set default log output for services\n"
-                         "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
-                         "                                         Set default log error output for services\n"
-                         "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
+                const char *c;
+
+                /* Ignore systemd.journald.xyz and friends */
+                c = word;
+                if (startswith(c, "rd."))
+                        c += 3;
+                if (startswith(c, "systemd."))
+                        c += 8;
+                if (c[strcspn(c, ".=")] != '.')  {
+
+                        log_warning("Unknown kernel switch %s. Ignoring.", word);
+
+                        log_info("Supported kernel switches:\n"
+                                 "systemd.unit=UNIT                        Default unit to start\n"
+                                 "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
+                                 "systemd.dump_core=0|1                    Dump core on crash\n"
+                                 "systemd.crash_shell=0|1                  Run shell on crash\n"
+                                 "systemd.crash_chvt=N                     Change to VT #N on crash\n"
+                                 "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
+                                 "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
+                                 "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
+                                 "                                         Log target\n"
+                                 "systemd.log_level=LEVEL                  Log level\n"
+                                 "systemd.log_color=0|1                    Highlight important log messages\n"
+                                 "systemd.log_location=0|1                 Include code location in log messages\n"
+                                 "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
+                                 "                                         Set default log output for services\n"
+                                 "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
+                                 "                                         Set default log error output for services\n"
+                                 "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
+                }
 
         } else if (streq(word, "quiet"))
                 arg_show_status = false;
@@ -684,7 +709,7 @@ static int parse_config_file(void) {
         const char *fn;
         int r;
 
-        fn = arg_running_as == MANAGER_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE;
+        fn = arg_running_as == SYSTEMD_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE;
         f = fopen(fn, "re");
         if (!f) {
                 if (errno == ENOENT)
@@ -872,11 +897,11 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_SYSTEM:
-                        arg_running_as = MANAGER_SYSTEM;
+                        arg_running_as = SYSTEMD_SYSTEM;
                         break;
 
                 case ARG_USER:
-                        arg_running_as = MANAGER_USER;
+                        arg_running_as = SYSTEMD_USER;
                         break;
 
                 case ARG_TEST:
@@ -931,14 +956,18 @@ static int parse_argv(int argc, char *argv[]) {
                         int fd;
                         FILE *f;
 
-                        if ((r = safe_atoi(optarg, &fd)) < 0 || fd < 0) {
+                        r = safe_atoi(optarg, &fd);
+                        if (r < 0 || fd < 0) {
                                 log_error("Failed to parse deserialize option %s.", optarg);
-                                return r;
+                                return r < 0 ? r : -EINVAL;
                         }
 
-                        if (!(f = fdopen(fd, "r"))) {
+                        fd_cloexec(fd, true);
+
+                        f = fdopen(fd, "r");
+                        if (!f) {
                                 log_error("Failed to open serialization fd: %m");
-                                return r;
+                                return -errno;
                         }
 
                         if (serialization)
@@ -1057,7 +1086,6 @@ static int help(void) {
 
 static int version(void) {
         puts(PACKAGE_STRING);
-        puts(DISTRIBUTION);
         puts(SYSTEMD_FEATURES);
 
         return 0;
@@ -1227,6 +1255,28 @@ static void test_cgroups(void) {
         sleep(10);
 }
 
+static int initialize_join_controllers(void) {
+        /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
+         * + "net_prio". We'd like to add "cpuset" to the mix, but
+         * "cpuset" does't really work for groups with no initialized
+         * attributes. */
+
+        arg_join_controllers = new(char**, 3);
+        if (!arg_join_controllers)
+                return -ENOMEM;
+
+        arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
+        if (!arg_join_controllers[0])
+                return -ENOMEM;
+
+        arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
+        if (!arg_join_controllers[1])
+                return -ENOMEM;
+
+        arg_join_controllers[2] = NULL;
+        return 0;
+}
+
 int main(int argc, char *argv[]) {
         Manager *m = NULL;
         int r, retval = EXIT_FAILURE;
@@ -1289,7 +1339,7 @@ int main(int argc, char *argv[]) {
         if (getpid() == 1 && detect_container(NULL) <= 0) {
 
                 /* Running outside of a container as PID 1 */
-                arg_running_as = MANAGER_SYSTEM;
+                arg_running_as = SYSTEMD_SYSTEM;
                 make_null_stdio();
                 log_set_target(LOG_TARGET_KMSG);
                 log_open();
@@ -1308,10 +1358,13 @@ int main(int argc, char *argv[]) {
                 }
 
                 if (!skip_setup) {
+                        mount_setup_early();
                         if (selinux_setup(&loaded_policy) < 0)
                                 goto finish;
                         if (ima_setup() < 0)
                                 goto finish;
+                        if (smack_setup() < 0)
+                                goto finish;
                 }
 
                 if (label_init(NULL) < 0)
@@ -1327,8 +1380,15 @@ int main(int argc, char *argv[]) {
                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
                                 else
                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
-                        } else {
-                                /* Do dummy first-time call to seal the kernel's time warp magic */
+                        } else if (!in_initrd()) {
+                                /*
+                                 * Do dummy first-time call to seal the kernel's time warp magic
+                                 *
+                                 * Do not call this this from inside the initrd. The initrd might not
+                                 * carry /etc/adjtime with LOCAL, but the real system could be set up
+                                 * that way. In such case, we need to delay the time-warp or the sealing
+                                 * until we reach the real system.
+                                 */
                                 hwclock_reset_timezone();
 
                                 /* Tell the kernel our time zone */
@@ -1349,7 +1409,7 @@ int main(int argc, char *argv[]) {
         } else if (getpid() == 1) {
 
                 /* Running inside a container, as PID 1 */
-                arg_running_as = MANAGER_SYSTEM;
+                arg_running_as = SYSTEMD_SYSTEM;
                 log_set_target(LOG_TARGET_CONSOLE);
                 log_open();
 
@@ -1359,7 +1419,7 @@ int main(int argc, char *argv[]) {
         } else {
 
                 /* Running as user instance */
-                arg_running_as = MANAGER_USER;
+                arg_running_as = SYSTEMD_USER;
                 log_set_target(LOG_TARGET_AUTO);
                 log_open();
         }
@@ -1371,21 +1431,13 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        /* By default, mount "cpu" and "cpuacct" together */
-        arg_join_controllers = new(char**, 3);
-        if (!arg_join_controllers)
-                goto finish;
-
-        arg_join_controllers[0] = strv_new("cpu", "cpuacct", "cpuset", NULL);
-        arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
-        arg_join_controllers[2] = NULL;
-
-        if (!arg_join_controllers[0])
+        r = initialize_join_controllers();
+        if (r < 0)
                 goto finish;
 
         /* Mount /proc, /sys and friends, so that /proc/cmdline and
          * /proc/$PID/fd is available. */
-        if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) {
+        if (getpid() == 1) {
                 r = mount_setup(loaded_policy);
                 if (r < 0)
                         goto finish;
@@ -1400,7 +1452,7 @@ int main(int argc, char *argv[]) {
         if (parse_config_file() < 0)
                 goto finish;
 
-        if (arg_running_as == MANAGER_SYSTEM)
+        if (arg_running_as == SYSTEMD_SYSTEM)
                 if (parse_proc_cmdline() < 0)
                         goto finish;
 
@@ -1409,12 +1461,20 @@ int main(int argc, char *argv[]) {
         if (parse_argv(argc, argv) < 0)
                 goto finish;
 
-        if (arg_action == ACTION_TEST && geteuid() == 0) {
+        if (arg_action == ACTION_TEST &&
+            geteuid() == 0) {
                 log_error("Don't run test mode as root.");
                 goto finish;
         }
 
-        if (arg_running_as == MANAGER_SYSTEM &&
+        if (arg_running_as == SYSTEMD_USER &&
+            arg_action == ACTION_RUN &&
+            sd_booted() <= 0) {
+                log_error("Trying to run as user instance, but the system has not been booted with systemd.");
+                goto finish;
+        }
+
+        if (arg_running_as == SYSTEMD_SYSTEM &&
             arg_action == ACTION_RUN &&
             running_in_chroot() > 0) {
                 log_error("Cannot be run in a chroot() environment.");
@@ -1442,16 +1502,15 @@ int main(int argc, char *argv[]) {
         log_close();
 
         /* Remember open file descriptors for later deserialization */
-        if (serialization) {
-                r = fdset_new_fill(&fds);
-                if (r < 0) {
-                        log_error("Failed to allocate fd set: %s", strerror(-r));
-                        goto finish;
-                }
+        r = fdset_new_fill(&fds);
+        if (r < 0) {
+                log_error("Failed to allocate fd set: %s", strerror(-r));
+                goto finish;
+        } else
+                fdset_cloexec(fds, true);
 
+        if (serialization)
                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
-        } else
-                close_all_fds(NULL, 0);
 
         /* Set up PATH unless it is already set */
         setenv("PATH",
@@ -1460,9 +1519,9 @@ int main(int argc, char *argv[]) {
 #else
                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin",
 #endif
-               arg_running_as == MANAGER_SYSTEM);
+               arg_running_as == SYSTEMD_SYSTEM);
 
-        if (arg_running_as == MANAGER_SYSTEM) {
+        if (arg_running_as == SYSTEMD_SYSTEM) {
                 /* Parse the data passed to us. We leave this
                  * variables set, but the manager later on will not
                  * pass them on to our children. */
@@ -1486,6 +1545,12 @@ int main(int argc, char *argv[]) {
                 unsetenv("USER");
                 unsetenv("LOGNAME");
 
+                /* We suppress the socket activation env vars, as
+                 * we'll try to match *any* open fd to units if
+                 * possible. */
+                unsetenv("LISTEN_FDS");
+                unsetenv("LISTEN_PID");
+
                 /* All other variables are left as is, so that clients
                  * can still read them via /proc/1/environ */
         }
@@ -1493,7 +1558,7 @@ int main(int argc, char *argv[]) {
         /* Move out of the way, so that we won't block unmounts */
         assert_se(chdir("/")  == 0);
 
-        if (arg_running_as == MANAGER_SYSTEM) {
+        if (arg_running_as == SYSTEMD_SYSTEM) {
                 /* Become a session leader if we aren't one yet. */
                 setsid();
 
@@ -1506,7 +1571,7 @@ int main(int argc, char *argv[]) {
 
         /* Reset the console, but only if this is really init and we
          * are freshly booted */
-        if (arg_running_as == MANAGER_SYSTEM && arg_action == ACTION_RUN)
+        if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
                 console_setup(getpid() == 1 && !skip_setup);
 
         /* Open the logging devices, if possible and necessary */
@@ -1517,16 +1582,16 @@ int main(int argc, char *argv[]) {
         if (getpid() == 1)
                 install_crash_handler();
 
-        if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) {
+        if (getpid() == 1) {
                 r = mount_cgroup_controllers(arg_join_controllers);
                 if (r < 0)
                         goto finish;
         }
 
-        if (arg_running_as == MANAGER_SYSTEM) {
+        if (arg_running_as == SYSTEMD_SYSTEM) {
                 const char *virtualization = NULL;
 
-                log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")");
+                log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
 
                 detect_virtualization(&virtualization);
                 if (virtualization)
@@ -1536,15 +1601,17 @@ int main(int argc, char *argv[]) {
                         log_info("Running in initial RAM disk.");
 
         } else
-                log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")");
+                log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
 
-        if (arg_running_as == MANAGER_SYSTEM && !skip_setup) {
+        if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
                 locale_setup();
 
                 if (arg_show_status || plymouth_running())
                         status_welcome();
 
+#ifdef HAVE_KMOD
                 kmod_setup();
+#endif
                 hostname_setup();
                 machine_id_setup();
                 loopback_setup();
@@ -1554,7 +1621,7 @@ int main(int argc, char *argv[]) {
                 test_cgroups();
         }
 
-        if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0)
+        if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
                 watchdog_set_timeout(&arg_runtime_watchdog);
 
         if (arg_timer_slack_nsec != (nsec_t) -1)
@@ -1574,7 +1641,7 @@ int main(int argc, char *argv[]) {
                 }
         }
 
-        if (arg_running_as == MANAGER_USER) {
+        if (arg_running_as == SYSTEMD_USER) {
                 /* Become reaper of our children */
                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
                         log_warning("Failed to make us a subreaper: %m");
@@ -1583,7 +1650,7 @@ int main(int argc, char *argv[]) {
                 }
         }
 
-        if (arg_running_as == MANAGER_SYSTEM)
+        if (arg_running_as == SYSTEMD_SYSTEM)
                 bump_rlimit_nofile(&saved_rlimit_nofile);
 
         r = manager_new(arg_running_as, &m);
@@ -1619,10 +1686,7 @@ int main(int argc, char *argv[]) {
 
         /* This will close all file descriptors that were opened, but
          * not claimed by any unit. */
-        if (fds) {
-                fdset_free(fds);
-                fds = NULL;
-        }
+        fdset_free(fds);
 
         if (serialization) {
                 fclose(serialization);
@@ -1671,12 +1735,23 @@ int main(int argc, char *argv[]) {
                         manager_dump_units(m, stdout, "\t");
                 }
 
-                r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
-                if (r < 0) {
-                        log_error("Failed to start default target: %s", bus_error(&error, r));
+                r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
+                if (r == -EPERM) {
+                        log_error("Default target could not be isolated, starting instead: %s", bus_error(&error, r));
+                        dbus_error_free(&error);
+
+                        r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
+                        if (r < 0) {
+                                log_error("Failed to start default target: %s", bus_error(&error, r));
+                                dbus_error_free(&error);
+                                goto finish;
+                        }
+                } else if (r < 0) {
+                        log_error("Failed to isolate default target: %s", bus_error(&error, r));
                         dbus_error_free(&error);
                         goto finish;
                 }
+
                 m->default_unit_job_id = default_unit_job->id;
 
                 after_startup = now(CLOCK_MONOTONIC);
@@ -1822,7 +1897,7 @@ finish:
                         args[i++] = SYSTEMD_BINARY_PATH;
                         if (switch_root_dir)
                                 args[i++] = "--switched-root";
-                        args[i++] = arg_running_as == MANAGER_SYSTEM ? "--system" : "--user";
+                        args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
                         args[i++] = "--deserialize";
                         args[i++] = sfd;
                         args[i++] = NULL;