X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fcore%2Fmain.c;h=8abad6d25728216c69e5ed890e2742f2dce80db1;hp=b50a1ca21540de6019b5f120983fcf50856ed422;hb=8be28fb1e0aa57b2a6ba7736440c9bba54cb86d1;hpb=ddfa5101a2e0d94571c10e2bbc7c38b60dc6cba1 diff --git a/src/core/main.c b/src/core/main.c index b50a1ca21..8abad6d25 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -40,7 +40,7 @@ #include "fdset.h" #include "special.h" #include "conf-parser.h" -#include "bus-errors.h" +#include "dbus-common.h" #include "missing.h" #include "label.h" #include "build.h" @@ -52,16 +52,22 @@ #include "switch-root.h" #include "capability.h" #include "killall.h" +#include "env-util.h" +#include "hwclock.h" +#include "sd-daemon.h" +#include "sd-messages.h" #include "mount-setup.h" #include "loopback-setup.h" +#ifdef HAVE_KMOD #include "kmod-setup.h" +#endif #include "hostname-setup.h" #include "machine-id-setup.h" -#include "locale-setup.h" -#include "hwclock.h" #include "selinux-setup.h" #include "ima-setup.h" +#include "fileio.h" +#include "smack-setup.h" static enum { ACTION_RUN, @@ -73,7 +79,7 @@ static enum { } arg_action = ACTION_RUN; static char *arg_default_unit = NULL; -static ManagerRunningAs arg_running_as = _MANAGER_RUNNING_AS_INVALID; +static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID; static bool arg_dump_core = true; static bool arg_crash_shell = false; @@ -81,12 +87,15 @@ static int arg_crash_chvt = -1; static bool arg_confirm_spawn = false; static bool arg_show_status = true; static bool arg_switched_root = false; -static char **arg_default_controllers = NULL; static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; +static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC; +static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC; +static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC; static usec_t arg_runtime_watchdog = 0; static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE; +static char **arg_default_environment = NULL; static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {}; static uint64_t arg_capability_bounding_set_drop = 0; static nsec_t arg_timer_slack_nsec = (nsec_t) -1; @@ -98,37 +107,40 @@ static void nop_handler(int sig) { _noreturn_ static void crash(int sig) { - if (!arg_dump_core) + if (getpid() != 1) + /* Pass this on immediately, if this is not PID 1 */ + raise(sig); + else if (!arg_dump_core) log_error("Caught <%s>, not dumping core.", signal_to_string(sig)); else { - struct sigaction sa; + struct sigaction sa = { + .sa_handler = nop_handler, + .sa_flags = SA_NOCLDSTOP|SA_RESTART, + }; pid_t pid; /* We want to wait for the core process, hence let's enable SIGCHLD */ - zero(sa); - sa.sa_handler = nop_handler; - sa.sa_flags = SA_NOCLDSTOP|SA_RESTART; - assert_se(sigaction(SIGCHLD, &sa, NULL) == 0); + sigaction(SIGCHLD, &sa, NULL); - if ((pid = fork()) < 0) + pid = fork(); + if (pid < 0) log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno)); else if (pid == 0) { - struct rlimit rl; + struct rlimit rl = {}; /* Enable default signal handler for core dump */ zero(sa); sa.sa_handler = SIG_DFL; - assert_se(sigaction(sig, &sa, NULL) == 0); + sigaction(sig, &sa, NULL); /* Don't limit the core dump size */ - zero(rl); rl.rlim_cur = RLIM_INFINITY; rl.rlim_max = RLIM_INFINITY; setrlimit(RLIMIT_CORE, &rl); /* Just to be sure... */ - assert_se(chdir("/") == 0); + chdir("/"); /* Raise the signal again */ raise(sig); @@ -141,7 +153,8 @@ _noreturn_ static void crash(int sig) { int r; /* Order things nicely. */ - if ((r = wait_for_terminate(pid, &status)) < 0) + r = wait_for_terminate(pid, &status); + if (r < 0) log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r)); else if (status.si_code != CLD_DUMPED) log_error("Caught <%s>, core dump failed.", signal_to_string(sig)); @@ -154,16 +167,16 @@ _noreturn_ static void crash(int sig) { chvt(arg_crash_chvt); if (arg_crash_shell) { - struct sigaction sa; + struct sigaction sa = { + .sa_handler = SIG_IGN, + .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART, + }; pid_t pid; log_info("Executing crash shell in 10s..."); sleep(10); /* Let the kernel reap children for us */ - zero(sa); - sa.sa_handler = SIG_IGN; - sa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART; assert_se(sigaction(SIGCHLD, &sa, NULL) == 0); pid = fork(); @@ -185,12 +198,10 @@ _noreturn_ static void crash(int sig) { } static void install_crash_handler(void) { - struct sigaction sa; - - zero(sa); - - sa.sa_handler = crash; - sa.sa_flags = SA_NODEFER; + struct sigaction sa = { + .sa_handler = crash, + .sa_flags = SA_NODEFER, + }; sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1); } @@ -339,53 +350,68 @@ static int parse_proc_cmdline_word(const char *word) { else arg_default_std_error = r; } else if (startswith(word, "systemd.setenv=")) { - char *cenv, *eq; - int r; + _cleanup_free_ char *cenv = NULL; cenv = strdup(word + 15); if (!cenv) return -ENOMEM; - eq = strchr(cenv, '='); - if (!eq) { - r = unsetenv(cenv); - if (r < 0) - log_warning("unsetenv failed %m. Ignoring."); - } else { - *eq = 0; - r = setenv(cenv, eq + 1, 1); - if (r < 0) - log_warning("setenv failed %m. Ignoring."); - } - free(cenv); + if (env_assignment_is_valid(cenv)) { + char **env; + + env = strv_env_set(arg_default_environment, cenv); + if (env) + arg_default_environment = env; + else + log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv); + } else + log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv); } else if (startswith(word, "systemd.") || (in_initrd() && startswith(word, "rd.systemd."))) { - log_warning("Unknown kernel switch %s. Ignoring.", word); - - log_info("Supported kernel switches:\n" - "systemd.unit=UNIT Default unit to start\n" - "rd.systemd.unit=UNIT Default unit to start when run in initrd\n" - "systemd.dump_core=0|1 Dump core on crash\n" - "systemd.crash_shell=0|1 Run shell on crash\n" - "systemd.crash_chvt=N Change to VT #N on crash\n" - "systemd.confirm_spawn=0|1 Confirm every process spawn\n" - "systemd.show_status=0|1 Show status updates on the console during bootup\n" - "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n" - " Log target\n" - "systemd.log_level=LEVEL Log level\n" - "systemd.log_color=0|1 Highlight important log messages\n" - "systemd.log_location=0|1 Include code location in log messages\n" - "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n" - " Set default log output for services\n" - "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n" - " Set default log error output for services\n" - "systemd.setenv=ASSIGNMENT Set an environment variable for all spawned processes\n"); + const char *c; + + /* Ignore systemd.journald.xyz and friends */ + c = word; + if (startswith(c, "rd.")) + c += 3; + if (startswith(c, "systemd.")) + c += 8; + if (c[strcspn(c, ".=")] != '.') { + + log_warning("Unknown kernel switch %s. Ignoring.", word); + + log_info("Supported kernel switches:\n" + "systemd.unit=UNIT Default unit to start\n" + "rd.systemd.unit=UNIT Default unit to start when run in initrd\n" + "systemd.dump_core=0|1 Dump core on crash\n" + "systemd.crash_shell=0|1 Run shell on crash\n" + "systemd.crash_chvt=N Change to VT #N on crash\n" + "systemd.confirm_spawn=0|1 Confirm every process spawn\n" + "systemd.show_status=0|1 Show status updates on the console during bootup\n" + "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n" + " Log target\n" + "systemd.log_level=LEVEL Log level\n" + "systemd.log_color=0|1 Highlight important log messages\n" + "systemd.log_location=0|1 Include code location in log messages\n" + "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n" + " Set default log output for services\n" + "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n" + " Set default log error output for services\n" + "systemd.setenv=ASSIGNMENT Set an environment variable for all spawned processes\n"); + } } else if (streq(word, "quiet")) arg_show_status = false; - else if (!in_initrd()) { + else if (streq(word, "debug")) { + /* Log to kmsg, the journal socket will fill up before the + * journal is started and tools running during that time + * will block with every log message for for 60 seconds, + * before they give up. */ + log_set_max_level(LOG_DEBUG); + log_set_target(LOG_TARGET_KMSG); + } else if (!in_initrd()) { unsigned i; /* SysV compatibility */ @@ -397,87 +423,47 @@ static int parse_proc_cmdline_word(const char *word) { return 0; } -static int config_parse_level2( - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - assert(filename); - assert(lvalue); - assert(rvalue); - - log_set_max_level_from_string(rvalue); - return 0; -} - -static int config_parse_target( - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - assert(filename); - assert(lvalue); - assert(rvalue); - - log_set_target_from_string(rvalue); - return 0; -} - -static int config_parse_color( - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - assert(filename); - assert(lvalue); - assert(rvalue); - - log_show_color_from_string(rvalue); - return 0; -} - -static int config_parse_location( - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +#define DEFINE_SETTER(name, func, descr) \ + static int name(const char *unit, \ + const char *filename, \ + unsigned line, \ + const char *section, \ + const char *lvalue, \ + int ltype, \ + const char *rvalue, \ + void *data, \ + void *userdata) { \ + \ + int r; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + \ + r = func(rvalue); \ + if (r < 0) \ + log_syntax(unit, LOG_ERR, filename, line, -r, \ + "Invalid " descr "'%s': %s", \ + rvalue, strerror(-r)); \ + \ + return 0; \ + } - assert(filename); - assert(lvalue); - assert(rvalue); +DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level") +DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target") +DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" ) +DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location") - log_show_location_from_string(rvalue); - return 0; -} -static int config_parse_cpu_affinity2( - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +static int config_parse_cpu_affinity2(const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { char *w; size_t l; @@ -505,7 +491,8 @@ static int config_parse_cpu_affinity2( return log_oom(); if (r < 0 || cpu >= ncpus) { - log_error("[%s:%u] Failed to parse CPU affinity: %s", filename, line, rvalue); + log_syntax(unit, LOG_ERR, filename, line, -r, + "Failed to parse CPU affinity '%s'", rvalue); CPU_FREE(c); return -EBADMSG; } @@ -515,7 +502,7 @@ static int config_parse_cpu_affinity2( if (c) { if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0) - log_warning("Failed to set CPU affinity: %m"); + log_warning_unit(unit, "Failed to set CPU affinity: %m"); CPU_FREE(c); } @@ -536,22 +523,19 @@ static void strv_free_free(char ***l) { } static void free_join_controllers(void) { - if (!arg_join_controllers) - return; - strv_free_free(arg_join_controllers); arg_join_controllers = NULL; } -static int config_parse_join_controllers( - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +static int config_parse_join_controllers(const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { unsigned n = 0; char *state, *w; @@ -653,14 +637,17 @@ static int parse_config_file(void) { { "Manager", "ShowStatus", config_parse_bool, 0, &arg_show_status }, { "Manager", "CrashChVT", config_parse_int, 0, &arg_crash_chvt }, { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL }, - { "Manager", "DefaultControllers", config_parse_strv, 0, &arg_default_controllers }, { "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output }, { "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error }, + { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec }, + { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec }, + { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec }, { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, - { "Manager", "RuntimeWatchdogSec", config_parse_usec, 0, &arg_runtime_watchdog }, - { "Manager", "ShutdownWatchdogSec", config_parse_usec, 0, &arg_shutdown_watchdog }, + { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog }, + { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog }, { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop }, { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec }, + { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment }, { "Manager", "DefaultLimitCPU", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_CPU]}, { "Manager", "DefaultLimitFSIZE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_FSIZE]}, { "Manager", "DefaultLimitDATA", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_DATA]}, @@ -680,11 +667,11 @@ static int parse_config_file(void) { { NULL, NULL, NULL, 0, NULL } }; - FILE *f; + _cleanup_fclose_ FILE *f; const char *fn; int r; - fn = arg_running_as == MANAGER_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE; + fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf"; f = fopen(fn, "re"); if (!f) { if (errno == ENOENT) @@ -694,52 +681,40 @@ static int parse_config_file(void) { return 0; } - r = config_parse(fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, NULL); + r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL); if (r < 0) log_warning("Failed to parse configuration file: %s", strerror(-r)); - fclose(f); - return 0; } static int parse_proc_cmdline(void) { - char *line, *w, *state; - int r; + _cleanup_free_ char *line = NULL; + char *w, *state; size_t l; + int r; - /* Don't read /proc/cmdline if we are in a container, since - * that is only relevant for the host system */ - if (detect_container(NULL) > 0) - return 0; - - if ((r = read_one_line_file("/proc/cmdline", &line)) < 0) { + r = proc_cmdline(&line); + if (r < 0) log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r)); + if (r <= 0) return 0; - } FOREACH_WORD_QUOTED(w, l, line, state) { - char *word; + _cleanup_free_ char *word; - if (!(word = strndup(w, l))) { - r = -ENOMEM; - goto finish; - } + word = strndup(w, l); + if (!word) + return log_oom(); r = parse_proc_cmdline_word(word); - free(word); - if (r < 0) { log_error("Failed on cmdline argument %s: %s", word, strerror(-r)); - goto finish; + return r; } } - r = 0; - -finish: - free(line); - return r; + return 0; } static int parse_argv(int argc, char *argv[]) { @@ -761,7 +736,6 @@ static int parse_argv(int argc, char *argv[]) { ARG_SHOW_STATUS, ARG_DESERIALIZE, ARG_SWITCHED_ROOT, - ARG_INTROSPECT, ARG_DEFAULT_STD_OUTPUT, ARG_DEFAULT_STD_ERROR }; @@ -784,7 +758,6 @@ static int parse_argv(int argc, char *argv[]) { { "show-status", optional_argument, NULL, ARG_SHOW_STATUS }, { "deserialize", required_argument, NULL, ARG_DESERIALIZE }, { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT }, - { "introspect", optional_argument, NULL, ARG_INTROSPECT }, { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, }, { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, }, { NULL, 0, NULL, 0 } @@ -871,11 +844,11 @@ static int parse_argv(int argc, char *argv[]) { break; case ARG_SYSTEM: - arg_running_as = MANAGER_SYSTEM; + arg_running_as = SYSTEMD_SYSTEM; break; case ARG_USER: - arg_running_as = MANAGER_USER; + arg_running_as = SYSTEMD_USER; break; case ARG_TEST: @@ -930,14 +903,18 @@ static int parse_argv(int argc, char *argv[]) { int fd; FILE *f; - if ((r = safe_atoi(optarg, &fd)) < 0 || fd < 0) { + r = safe_atoi(optarg, &fd); + if (r < 0 || fd < 0) { log_error("Failed to parse deserialize option %s.", optarg); - return r; + return r < 0 ? r : -EINVAL; } - if (!(f = fdopen(fd, "r"))) { + fd_cloexec(fd, true); + + f = fdopen(fd, "r"); + if (!f) { log_error("Failed to open serialization fd: %m"); - return r; + return -errno; } if (serialization) @@ -952,27 +929,6 @@ static int parse_argv(int argc, char *argv[]) { arg_switched_root = true; break; - case ARG_INTROSPECT: { - const char * const * i = NULL; - - for (i = bus_interface_table; *i; i += 2) - if (!optarg || streq(i[0], optarg)) { - fputs(DBUS_INTROSPECT_1_0_XML_DOCTYPE_DECL_NODE - "\n", stdout); - fputs(i[1], stdout); - fputs("\n", stdout); - - if (optarg) - break; - } - - if (!i[0] && optarg) - log_error("Unknown interface %s.", optarg); - - arg_action = ACTION_DONE; - break; - } - case 'h': arg_action = ACTION_HELP; break; @@ -1018,11 +974,13 @@ static int parse_argv(int argc, char *argv[]) { * relevant for the container, hence we rely on argv[] * instead. */ - for (a = argv; a < argv + argc; a++) - if ((r = parse_proc_cmdline_word(*a)) < 0) { + for (a = argv; a < argv + argc; a++) { + r = parse_proc_cmdline_word(*a); + if (r < 0) { log_error("Failed on cmdline argument %s: %s", *a, strerror(-r)); return r; } + } } return 0; @@ -1035,7 +993,6 @@ static int help(void) { " -h --help Show this help\n" " --test Determine startup sequence, dump it and exit\n" " --dump-configuration-items Dump understood unit configuration items\n" - " --introspect[=INTERFACE] Extract D-Bus interface data\n" " --unit=UNIT Set default unit\n" " --system Run a system instance, even if PID != 1\n" " --user Run a user instance\n" @@ -1056,13 +1013,12 @@ static int help(void) { static int version(void) { puts(PACKAGE_STRING); - puts(DISTRIBUTION); puts(SYSTEMD_FEATURES); return 0; } -static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool serialize_jobs) { +static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) { FILE *f = NULL; FDSet *fds = NULL; int r; @@ -1071,15 +1027,16 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool serialize assert(_f); assert(_fds); - /* Make sure nothing is really destructed when we shut down */ - m->n_reloading ++; - r = manager_open_serialization(m, &f); if (r < 0) { log_error("Failed to create serialization file: %s", strerror(-r)); goto fail; } + /* Make sure nothing is really destructed when we shut down */ + m->n_reloading ++; + bus_broadcast_reloading(m, true); + fds = fdset_new(); if (!fds) { r = -ENOMEM; @@ -1087,7 +1044,7 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool serialize goto fail; } - r = manager_serialize(m, f, fds, serialize_jobs); + r = manager_serialize(m, f, fds, switching_root); if (r < 0) { log_error("Failed to serialize state: %s", strerror(-r)); goto fail; @@ -1124,23 +1081,40 @@ fail: return r; } -static struct dual_timestamp* parse_initrd_timestamp(struct dual_timestamp *t) { - const char *e; - unsigned long long a, b; +static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { + struct rlimit nl; + int r; + + assert(saved_rlimit); + + /* Save the original RLIMIT_NOFILE so that we can reset it + * later when transitioning from the initrd to the main + * systemd or suchlike. */ + if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) { + log_error("Reading RLIMIT_NOFILE failed: %m"); + return -errno; + } - assert(t); + /* Make sure forked processes get the default kernel setting */ + if (!arg_default_rlimit[RLIMIT_NOFILE]) { + struct rlimit *rl; - e = getenv("RD_TIMESTAMP"); - if (!e) - return NULL; + rl = newdup(struct rlimit, saved_rlimit, 1); + if (!rl) + return log_oom(); - if (sscanf(e, "%llu %llu", &a, &b) != 2) - return NULL; + arg_default_rlimit[RLIMIT_NOFILE] = rl; + } - t->realtime = (usec_t) a; - t->monotonic = (usec_t) b; + /* Bump up the resource limit for ourselves substantially */ + nl.rlim_cur = nl.rlim_max = 64*1024; + r = setrlimit_closest(RLIMIT_NOFILE, &nl); + if (r < 0) { + log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r)); + return r; + } - return t; + return 0; } static void test_mtab(void) { @@ -1190,6 +1164,28 @@ static void test_cgroups(void) { sleep(10); } +static int initialize_join_controllers(void) { + /* By default, mount "cpu" + "cpuacct" together, and "net_cls" + * + "net_prio". We'd like to add "cpuset" to the mix, but + * "cpuset" does't really work for groups with no initialized + * attributes. */ + + arg_join_controllers = new(char**, 3); + if (!arg_join_controllers) + return -ENOMEM; + + arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL); + arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL); + arg_join_controllers[2] = NULL; + + if (!arg_join_controllers[0] || !arg_join_controllers[1]) { + free_join_controllers(); + return -ENOMEM; + } + + return 0; +} + int main(int argc, char *argv[]) { Manager *m = NULL; int r, retval = EXIT_FAILURE; @@ -1199,6 +1195,8 @@ int main(int argc, char *argv[]) { bool reexecute = false; const char *shutdown_verb = NULL; dual_timestamp initrd_timestamp = { 0ULL, 0ULL }; + dual_timestamp userspace_timestamp = { 0ULL, 0ULL }; + dual_timestamp kernel_timestamp = { 0ULL, 0ULL }; static char systemd[] = "systemd"; bool skip_setup = false; int j; @@ -1206,6 +1204,7 @@ int main(int argc, char *argv[]) { bool arm_reboot_watchdog = false; bool queue_default_job = false; char *switch_root_dir = NULL, *switch_root_init = NULL; + static struct rlimit saved_rlimit_nofile = { 0, 0 }; #ifdef HAVE_SYSV_COMPAT if (getpid() != 1 && strstr(program_invocation_short_name, "init")) { @@ -1219,22 +1218,19 @@ int main(int argc, char *argv[]) { } #endif + dual_timestamp_from_monotonic(&kernel_timestamp, 0); + dual_timestamp_get(&userspace_timestamp); + /* Determine if this is a reexecution or normal bootup. We do * the full command line parsing much later, so let's just * have a quick peek here. */ - for (j = 1; j < argc; j++) - if (streq(argv[j], "--deserialize")) { - skip_setup = true; - break; - } + if (strv_find(argv+1, "--deserialize")) + skip_setup = true; /* If we have switched root, do all the special setup * things */ - for (j = 1; j < argc; j++) - if (streq(argv[j], "--switched-root")) { - skip_setup = false; - break; - } + if (strv_find(argv+1, "--switched-root")) + skip_setup = false; /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we are then @@ -1247,53 +1243,95 @@ int main(int argc, char *argv[]) { saved_argc = argc; log_show_color(isatty(STDERR_FILENO) > 0); - log_show_location(false); - log_set_max_level(LOG_INFO); - if (getpid() == 1) { - if (in_initrd()) { - char *rd_timestamp = NULL; - - dual_timestamp_get(&initrd_timestamp); - asprintf(&rd_timestamp, "%llu %llu", - (unsigned long long) initrd_timestamp.realtime, - (unsigned long long) initrd_timestamp.monotonic); - if (rd_timestamp) { - setenv("RD_TIMESTAMP", rd_timestamp, 1); - free(rd_timestamp); - } - } + /* Disable the umask logic */ + if (getpid() == 1) + umask(0); - arg_running_as = MANAGER_SYSTEM; - log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_JOURNAL : LOG_TARGET_JOURNAL_OR_KMSG); + if (getpid() == 1 && detect_container(NULL) <= 0) { + + /* Running outside of a container as PID 1 */ + arg_running_as = SYSTEMD_SYSTEM; + make_null_stdio(); + log_set_target(LOG_TARGET_KMSG); + log_open(); + + if (in_initrd()) + initrd_timestamp = userspace_timestamp; if (!skip_setup) { + mount_setup_early(); if (selinux_setup(&loaded_policy) < 0) goto finish; if (ima_setup() < 0) goto finish; + if (smack_setup() < 0) + goto finish; } - log_open(); - if (label_init(NULL) < 0) goto finish; - if (!skip_setup) + if (!skip_setup) { if (hwclock_is_localtime() > 0) { int min; - r = hwclock_apply_localtime_delta(&min); + /* The first-time call to settimeofday() does a time warp in the kernel */ + r = hwclock_set_timezone(&min); if (r < 0) log_error("Failed to apply local time delta, ignoring: %s", strerror(-r)); else log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min); + } else if (!in_initrd()) { + /* + * Do dummy first-time call to seal the kernel's time warp magic + * + * Do not call this this from inside the initrd. The initrd might not + * carry /etc/adjtime with LOCAL, but the real system could be set up + * that way. In such case, we need to delay the time-warp or the sealing + * until we reach the real system. + */ + hwclock_reset_timezone(); + + /* Tell the kernel our timezone */ + r = hwclock_set_timezone(NULL); + if (r < 0) + log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r)); } + } + + /* Set the default for later on, but don't actually + * open the logs like this for now. Note that if we + * are transitioning from the initrd there might still + * be journal fd open, and we shouldn't attempt + * opening that before we parsed /proc/cmdline which + * might redirect output elsewhere. */ + log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); + + } else if (getpid() == 1) { + /* Running inside a container, as PID 1 */ + arg_running_as = SYSTEMD_SYSTEM; + log_set_target(LOG_TARGET_CONSOLE); + log_open(); + + /* For the later on, see above... */ + log_set_target(LOG_TARGET_JOURNAL); + + /* clear the kernel timestamp, + * because we are in a container */ + kernel_timestamp.monotonic = 0ULL; + kernel_timestamp.realtime = 0ULL; } else { - arg_running_as = MANAGER_USER; + /* Running as user instance */ + arg_running_as = SYSTEMD_USER; log_set_target(LOG_TARGET_AUTO); log_open(); + + /* clear the kernel timestamp, + * because we are not PID 1 */ + kernel_timestamp.monotonic = 0ULL; + kernel_timestamp.realtime = 0ULL; } /* Initialize default unit */ @@ -1303,20 +1341,13 @@ int main(int argc, char *argv[]) { goto finish; } - /* By default, mount "cpu" and "cpuacct" together */ - arg_join_controllers = new(char**, 2); - if (!arg_join_controllers) - goto finish; - - arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL); - arg_join_controllers[1] = NULL; - - if (!arg_join_controllers[0]) + r = initialize_join_controllers(); + if (r < 0) goto finish; /* Mount /proc, /sys and friends, so that /proc/cmdline and * /proc/$PID/fd is available. */ - if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) { + if (getpid() == 1) { r = mount_setup(loaded_policy); if (r < 0) goto finish; @@ -1325,13 +1356,12 @@ int main(int argc, char *argv[]) { /* Reset all signal handlers. */ assert_se(reset_all_signal_handlers() == 0); - /* If we are init, we can block sigkill. Yay. */ ignore_signals(SIGNALS_IGNORE, -1); if (parse_config_file() < 0) goto finish; - if (arg_running_as == MANAGER_SYSTEM) + if (arg_running_as == SYSTEMD_SYSTEM) if (parse_proc_cmdline() < 0) goto finish; @@ -1340,12 +1370,20 @@ int main(int argc, char *argv[]) { if (parse_argv(argc, argv) < 0) goto finish; - if (arg_action == ACTION_TEST && geteuid() == 0) { + if (arg_action == ACTION_TEST && + geteuid() == 0) { log_error("Don't run test mode as root."); goto finish; } - if (arg_running_as == MANAGER_SYSTEM && + if (arg_running_as == SYSTEMD_USER && + arg_action == ACTION_RUN && + sd_booted() <= 0) { + log_error("Trying to run as user instance, but the system has not been booted with systemd."); + goto finish; + } + + if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN && running_in_chroot() > 0) { log_error("Cannot be run in a chroot() environment."); @@ -1367,99 +1405,60 @@ int main(int argc, char *argv[]) { goto finish; } + if (arg_running_as == SYSTEMD_USER && + !getenv("XDG_RUNTIME_DIR")) { + log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set."); + goto finish; + } + assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST); /* Close logging fds, in order not to confuse fdset below */ log_close(); /* Remember open file descriptors for later deserialization */ - if (serialization) { - r = fdset_new_fill(&fds); - if (r < 0) { - log_error("Failed to allocate fd set: %s", strerror(-r)); - goto finish; - } - - assert_se(fdset_remove(fds, fileno(serialization)) >= 0); + r = fdset_new_fill(&fds); + if (r < 0) { + log_error("Failed to allocate fd set: %s", strerror(-r)); + goto finish; } else - close_all_fds(NULL, 0); - - /* Set up PATH unless it is already set */ - setenv("PATH", -#ifdef HAVE_SPLIT_USR - "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", -#else - "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin", -#endif - arg_running_as == MANAGER_SYSTEM); - - if (arg_running_as == MANAGER_SYSTEM) { - /* Parse the data passed to us. We leave this - * variables set, but the manager later on will not - * pass them on to our children. */ - if (!in_initrd()) - parse_initrd_timestamp(&initrd_timestamp); - - /* Unset some environment variables passed in from the - * kernel that don't really make sense for us. */ - unsetenv("HOME"); - unsetenv("TERM"); - - /* When we are invoked by a shell, these might be set, - * but make little sense to pass on */ - unsetenv("PWD"); - unsetenv("SHLVL"); - unsetenv("_"); - - /* When we are invoked by a chroot-like tool such as - * nspawn, these might be set, but make little sense - * to pass on */ - unsetenv("USER"); - unsetenv("LOGNAME"); - - /* All other variables are left as is, so that clients - * can still read them via /proc/1/environ */ - } + fdset_cloexec(fds, true); - /* Move out of the way, so that we won't block unmounts */ - assert_se(chdir("/") == 0); + if (serialization) + assert_se(fdset_remove(fds, fileno(serialization)) >= 0); - if (arg_running_as == MANAGER_SYSTEM) { + if (arg_running_as == SYSTEMD_SYSTEM) /* Become a session leader if we aren't one yet. */ setsid(); - /* Disable the umask logic */ - umask(0); - } + /* Move out of the way, so that we won't block unmounts */ + assert_se(chdir("/") == 0); /* Make sure D-Bus doesn't fiddle with the SIGPIPE handlers */ dbus_connection_set_change_sigpipe(FALSE); /* Reset the console, but only if this is really init and we * are freshly booted */ - if (arg_running_as == MANAGER_SYSTEM && arg_action == ACTION_RUN) { + if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN) console_setup(getpid() == 1 && !skip_setup); - make_null_stdio(); - } /* Open the logging devices, if possible and necessary */ log_open(); /* Make sure we leave a core dump without panicing the * kernel. */ - if (getpid() == 1) + if (getpid() == 1) { install_crash_handler(); - if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) { r = mount_cgroup_controllers(arg_join_controllers); if (r < 0) goto finish; } - if (arg_running_as == MANAGER_SYSTEM) { + if (arg_running_as == SYSTEMD_SYSTEM) { const char *virtualization = NULL; - log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")"); + log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")"); detect_virtualization(&virtualization); if (virtualization) @@ -1469,15 +1468,15 @@ int main(int argc, char *argv[]) { log_info("Running in initial RAM disk."); } else - log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")"); - - if (arg_running_as == MANAGER_SYSTEM && !skip_setup) { - locale_setup(); + log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")"); + if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) { if (arg_show_status || plymouth_running()) status_welcome(); +#ifdef HAVE_KMOD kmod_setup(); +#endif hostname_setup(); machine_id_setup(); loopback_setup(); @@ -1487,7 +1486,7 @@ int main(int argc, char *argv[]) { test_cgroups(); } - if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0) + if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0) watchdog_set_timeout(&arg_runtime_watchdog); if (arg_timer_slack_nsec != (nsec_t) -1) @@ -1495,19 +1494,19 @@ int main(int argc, char *argv[]) { log_error("Failed to adjust timer slack: %m"); if (arg_capability_bounding_set_drop) { - r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true); + r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop); if (r < 0) { - log_error("Failed to drop capability bounding set: %s", strerror(-r)); + log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r)); goto finish; } - r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop); + r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true); if (r < 0) { - log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r)); + log_error("Failed to drop capability bounding set: %s", strerror(-r)); goto finish; } } - if (arg_running_as == MANAGER_USER) { + if (arg_running_as == SYSTEMD_USER) { /* Become reaper of our children */ if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) { log_warning("Failed to make us a subreaper: %m"); @@ -1516,7 +1515,10 @@ int main(int argc, char *argv[]) { } } - r = manager_new(arg_running_as, &m); + if (arg_running_as == SYSTEMD_SYSTEM) + bump_rlimit_nofile(&saved_rlimit_nofile); + + r = manager_new(arg_running_as, !!serialization, &m); if (r < 0) { log_error("Failed to allocate manager object: %s", strerror(-r)); goto finish; @@ -1525,16 +1527,19 @@ int main(int argc, char *argv[]) { m->confirm_spawn = arg_confirm_spawn; m->default_std_output = arg_default_std_output; m->default_std_error = arg_default_std_error; + m->default_restart_usec = arg_default_restart_usec; + m->default_timeout_start_usec = arg_default_timeout_start_usec; + m->default_timeout_stop_usec = arg_default_timeout_stop_usec; m->runtime_watchdog = arg_runtime_watchdog; m->shutdown_watchdog = arg_shutdown_watchdog; + m->userspace_timestamp = userspace_timestamp; + m->kernel_timestamp = kernel_timestamp; + m->initrd_timestamp = initrd_timestamp; manager_set_default_rlimits(m, arg_default_rlimit); - if (dual_timestamp_is_set(&initrd_timestamp)) - m->initrd_timestamp = initrd_timestamp; - - if (arg_default_controllers) - manager_set_default_controllers(m, arg_default_controllers); + if (arg_default_environment) + manager_environment_add(m, arg_default_environment); manager_set_show_status(m, arg_show_status); @@ -1549,10 +1554,8 @@ int main(int argc, char *argv[]) { /* This will close all file descriptors that were opened, but * not claimed by any unit. */ - if (fds) { - fdset_free(fds); - fds = NULL; - } + fdset_free(fds); + fds = NULL; if (serialization) { fclose(serialization); @@ -1572,7 +1575,7 @@ int main(int argc, char *argv[]) { if (r < 0) { log_error("Failed to load default target: %s", bus_error(&error, r)); dbus_error_free(&error); - } else if (target->load_state == UNIT_ERROR) + } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) log_error("Failed to load default target: %s", strerror(-target->load_error)); else if (target->load_state == UNIT_MASKED) log_error("Default target masked."); @@ -1585,7 +1588,7 @@ int main(int argc, char *argv[]) { log_error("Failed to load rescue target: %s", bus_error(&error, r)); dbus_error_free(&error); goto finish; - } else if (target->load_state == UNIT_ERROR) { + } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) { log_error("Failed to load rescue target: %s", strerror(-target->load_error)); goto finish; } else if (target->load_state == UNIT_MASKED) { @@ -1601,18 +1604,29 @@ int main(int argc, char *argv[]) { manager_dump_units(m, stdout, "\t"); } - r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job); - if (r < 0) { - log_error("Failed to start default target: %s", bus_error(&error, r)); + r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job); + if (r == -EPERM) { + log_debug("Default target could not be isolated, starting instead: %s", bus_error(&error, r)); + dbus_error_free(&error); + + r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job); + if (r < 0) { + log_error("Failed to start default target: %s", bus_error(&error, r)); + dbus_error_free(&error); + goto finish; + } + } else if (r < 0) { + log_error("Failed to isolate default target: %s", bus_error(&error, r)); dbus_error_free(&error); goto finish; } + m->default_unit_job_id = default_unit_job->id; after_startup = now(CLOCK_MONOTONIC); log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG, "Loaded units and determined initial transaction in %s.", - format_timespan(timespan, sizeof(timespan), after_startup - before_startup)); + format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0)); if (arg_action == ACTION_TEST) { printf("-> By jobs:\n"); @@ -1645,7 +1659,7 @@ int main(int argc, char *argv[]) { case MANAGER_REEXECUTE: - if (prepare_reexecute(m, &serialization, &fds, true) < 0) + if (prepare_reexecute(m, &serialization, &fds, false) < 0) goto finish; reexecute = true; @@ -1659,7 +1673,7 @@ int main(int argc, char *argv[]) { m->switch_root = m->switch_root_init = NULL; if (!switch_root_init) - if (prepare_reexecute(m, &serialization, &fds, false) < 0) + if (prepare_reexecute(m, &serialization, &fds, true) < 0) goto finish; reexecute = true; @@ -1694,10 +1708,9 @@ finish: manager_free(m); for (j = 0; j < RLIMIT_NLIMITS; j++) - free (arg_default_rlimit[j]); + free(arg_default_rlimit[j]); free(arg_default_unit); - strv_free(arg_default_controllers); free_join_controllers(); dbus_shutdown(); @@ -1712,6 +1725,12 @@ finish: * rebooted while we do that */ watchdog_close(true); + /* Reset the RLIMIT_NOFILE to the kernel default, so + * that the new systemd can pass the kernel default to + * its child processes */ + if (saved_rlimit_nofile.rlim_cur > 0) + setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile); + if (switch_root_dir) { /* Kill all remaining processes from the * initrd, but don't wait for them, so that we @@ -1746,11 +1765,15 @@ finish: args[i++] = SYSTEMD_BINARY_PATH; if (switch_root_dir) args[i++] = "--switched-root"; - args[i++] = arg_running_as == MANAGER_SYSTEM ? "--system" : "--user"; + args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user"; args[i++] = "--deserialize"; args[i++] = sfd; args[i++] = NULL; + /* do not pass along the environment we inherit from the kernel or initrd */ + if (switch_root_dir) + clearenv(); + assert(i <= args_size); execv(args[0], (char* const*) args); } @@ -1832,6 +1855,12 @@ finish: watchdog_close(true); } + /* Avoid the creation of new processes forked by the + * kernel; at this point, we will not listen to the + * signals anyway */ + if (detect_container(NULL) <= 0) + cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER); + execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block); free(env_block); log_error("Failed to execute shutdown binary, freezing: %m");