X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fcore%2Fmain.c;h=4da8ecb86453d8336e998f1c400b34dc3ab74f54;hp=9bcedbe71f6e0657f580782ac884b774537c2787;hb=19e65613563dd9c14cf1ce58aa6e151de8fb90c2;hpb=9f28b98ec6461b4e06edd1e149c1ee5e9dcc4be0 diff --git a/src/core/main.c b/src/core/main.c index 9bcedbe71..4da8ecb86 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "manager.h" #include "log.h" @@ -47,6 +48,10 @@ #include "def.h" #include "virt.h" #include "watchdog.h" +#include "path-util.h" +#include "switch-root.h" +#include "capability.h" +#include "killall.h" #include "mount-setup.h" #include "loopback-setup.h" @@ -54,36 +59,38 @@ #include "hostname-setup.h" #include "machine-id-setup.h" #include "locale-setup.h" +#include "hwclock.h" #include "selinux-setup.h" #include "ima-setup.h" +#include "sd-daemon.h" static enum { ACTION_RUN, ACTION_HELP, + ACTION_VERSION, ACTION_TEST, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DONE } arg_action = ACTION_RUN; static char *arg_default_unit = NULL; -static ManagerRunningAs arg_running_as = _MANAGER_RUNNING_AS_INVALID; +static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID; static bool arg_dump_core = true; static bool arg_crash_shell = false; static int arg_crash_chvt = -1; static bool arg_confirm_spawn = false; static bool arg_show_status = true; -#ifdef HAVE_SYSV_COMPAT -static bool arg_sysv_console = true; -#endif -static bool arg_mount_auto = true; -static bool arg_swap_auto = true; +static bool arg_switched_root = false; static char **arg_default_controllers = NULL; static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; static usec_t arg_runtime_watchdog = 0; static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE; +static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {}; +static uint64_t arg_capability_bounding_set_drop = 0; +static nsec_t arg_timer_slack_nsec = (nsec_t) -1; static FILE* serialization = NULL; @@ -160,19 +167,14 @@ _noreturn_ static void crash(int sig) { sa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART; assert_se(sigaction(SIGCHLD, &sa, NULL) == 0); - if ((pid = fork()) < 0) - log_error("Failed to fork off crash shell: %s", strerror(errno)); + pid = fork(); + if (pid < 0) + log_error("Failed to fork off crash shell: %m"); else if (pid == 0) { - int fd, r; - - if ((fd = acquire_terminal("/dev/console", false, true, true)) < 0) - log_error("Failed to acquire terminal: %s", strerror(-fd)); - else if ((r = make_stdio(fd)) < 0) - log_error("Failed to duplicate terminal fd: %s", strerror(-r)); - + make_console_stdio(); execl("/bin/sh", "/bin/sh", NULL); - log_error("execl() failed: %s", strerror(errno)); + log_error("execl() failed: %m"); _exit(1); } @@ -226,11 +228,13 @@ static int set_default_unit(const char *u) { assert(u); - if (!(c = strdup(u))) + c = strdup(u); + if (!c) return -ENOMEM; free(arg_default_unit); arg_default_unit = c; + return 0; } @@ -252,10 +256,17 @@ static int parse_proc_cmdline_word(const char *word) { assert(word); - if (startswith(word, "systemd.unit=")) - return set_default_unit(word + 13); + if (startswith(word, "systemd.unit=")) { + + if (!in_initrd()) + return set_default_unit(word + 13); - else if (startswith(word, "systemd.log_target=")) { + } else if (startswith(word, "rd.systemd.unit=")) { + + if (in_initrd()) + return set_default_unit(word + 16); + + } else if (startswith(word, "systemd.log_target=")) { if (log_set_target_from_string(word + 19) < 0) log_warning("Failed to parse log target %s. Ignoring.", word + 19); @@ -340,38 +351,28 @@ static int parse_proc_cmdline_word(const char *word) { if (!eq) { r = unsetenv(cenv); if (r < 0) - log_warning("unsetenv failed %s. Ignoring.", strerror(errno)); + log_warning("unsetenv failed %m. Ignoring."); } else { *eq = 0; r = setenv(cenv, eq + 1, 1); if (r < 0) - log_warning("setenv failed %s. Ignoring.", strerror(errno)); + log_warning("setenv failed %m. Ignoring."); } free(cenv); -#ifdef HAVE_SYSV_COMPAT - } else if (startswith(word, "systemd.sysv_console=")) { - int r; - if ((r = parse_boolean(word + 21)) < 0) - log_warning("Failed to parse SysV console switch %s. Ignoring.", word + 20); - else - arg_sysv_console = r; -#endif - - } else if (startswith(word, "systemd.")) { + } else if (startswith(word, "systemd.") || + (in_initrd() && startswith(word, "rd.systemd."))) { log_warning("Unknown kernel switch %s. Ignoring.", word); log_info("Supported kernel switches:\n" "systemd.unit=UNIT Default unit to start\n" + "rd.systemd.unit=UNIT Default unit to start when run in initrd\n" "systemd.dump_core=0|1 Dump core on crash\n" "systemd.crash_shell=0|1 Run shell on crash\n" "systemd.crash_chvt=N Change to VT #N on crash\n" "systemd.confirm_spawn=0|1 Confirm every process spawn\n" "systemd.show_status=0|1 Show status updates on the console during bootup\n" -#ifdef HAVE_SYSV_COMPAT - "systemd.sysv_console=0|1 Connect output of SysV scripts to console\n" -#endif "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n" " Log target\n" "systemd.log_level=LEVEL Log level\n" @@ -380,14 +381,12 @@ static int parse_proc_cmdline_word(const char *word) { "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n" " Set default log output for services\n" "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n" - " Set default log error output for services\n"); + " Set default log error output for services\n" + "systemd.setenv=ASSIGNMENT Set an environment variable for all spawned processes\n"); - } else if (streq(word, "quiet")) { + } else if (streq(word, "quiet")) arg_show_status = false; -#ifdef HAVE_SYSV_COMPAT - arg_sysv_console = false; -#endif - } else { + else if (!in_initrd()) { unsigned i; /* SysV compatibility */ @@ -497,14 +496,14 @@ static int config_parse_cpu_affinity2( unsigned cpu; if (!(t = strndup(w, l))) - return -ENOMEM; + return log_oom(); r = safe_atou(t, &cpu); free(t); if (!c) if (!(c = cpu_set_malloc(&ncpus))) - return -ENOMEM; + return log_oom(); if (r < 0 || cpu >= ncpus) { log_error("[%s:%u] Failed to parse CPU affinity: %s", filename, line, rvalue); @@ -570,7 +569,7 @@ static int config_parse_join_controllers( s = strndup(w, length); if (!s) - return -ENOMEM; + return log_oom(); l = strv_split(s, ","); free(s); @@ -586,7 +585,7 @@ static int config_parse_join_controllers( arg_join_controllers = new(char**, 2); if (!arg_join_controllers) { strv_free(l); - return -ENOMEM; + return log_oom(); } arg_join_controllers[0] = l; @@ -600,7 +599,7 @@ static int config_parse_join_controllers( t = new0(char**, n+2); if (!t) { strv_free(l); - return -ENOMEM; + return log_oom(); } n = 0; @@ -614,7 +613,7 @@ static int config_parse_join_controllers( if (!c) { strv_free(l); strv_free_free(t); - return -ENOMEM; + return log_oom(); } strv_free(l); @@ -626,7 +625,7 @@ static int config_parse_join_controllers( if (!c) { strv_free(l); strv_free_free(t); - return -ENOMEM; + return log_oom(); } t[n++] = c; @@ -653,19 +652,32 @@ static int parse_config_file(void) { { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core }, { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell }, { "Manager", "ShowStatus", config_parse_bool, 0, &arg_show_status }, -#ifdef HAVE_SYSV_COMPAT - { "Manager", "SysVConsole", config_parse_bool, 0, &arg_sysv_console }, -#endif { "Manager", "CrashChVT", config_parse_int, 0, &arg_crash_chvt }, { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL }, - { "Manager", "MountAuto", config_parse_bool, 0, &arg_mount_auto }, - { "Manager", "SwapAuto", config_parse_bool, 0, &arg_swap_auto }, { "Manager", "DefaultControllers", config_parse_strv, 0, &arg_default_controllers }, { "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output }, { "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error }, { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, { "Manager", "RuntimeWatchdogSec", config_parse_usec, 0, &arg_runtime_watchdog }, { "Manager", "ShutdownWatchdogSec", config_parse_usec, 0, &arg_shutdown_watchdog }, + { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop }, + { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec }, + { "Manager", "DefaultLimitCPU", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_CPU]}, + { "Manager", "DefaultLimitFSIZE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_FSIZE]}, + { "Manager", "DefaultLimitDATA", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_DATA]}, + { "Manager", "DefaultLimitSTACK", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_STACK]}, + { "Manager", "DefaultLimitCORE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_CORE]}, + { "Manager", "DefaultLimitRSS", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_RSS]}, + { "Manager", "DefaultLimitNOFILE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_NOFILE]}, + { "Manager", "DefaultLimitAS", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_AS]}, + { "Manager", "DefaultLimitNPROC", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_NPROC]}, + { "Manager", "DefaultLimitMEMLOCK", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_MEMLOCK]}, + { "Manager", "DefaultLimitLOCKS", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_LOCKS]}, + { "Manager", "DefaultLimitSIGPENDING",config_parse_limit, 0, &arg_default_rlimit[RLIMIT_SIGPENDING]}, + { "Manager", "DefaultLimitMSGQUEUE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_MSGQUEUE]}, + { "Manager", "DefaultLimitNICE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_NICE]}, + { "Manager", "DefaultLimitRTPRIO", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_RTPRIO]}, + { "Manager", "DefaultLimitRTTIME", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_RTTIME]}, { NULL, NULL, NULL, 0, NULL } }; @@ -673,7 +685,7 @@ static int parse_config_file(void) { const char *fn; int r; - fn = arg_running_as == MANAGER_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE; + fn = arg_running_as == SYSTEMD_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE; f = fopen(fn, "re"); if (!f) { if (errno == ENOENT) @@ -716,10 +728,13 @@ static int parse_proc_cmdline(void) { } r = parse_proc_cmdline_word(word); - free(word); - - if (r < 0) + if (r < 0) { + log_error("Failed on cmdline argument %s: %s", word, strerror(-r)); + free(word); goto finish; + } + + free(word); } r = 0; @@ -740,13 +755,14 @@ static int parse_argv(int argc, char *argv[]) { ARG_SYSTEM, ARG_USER, ARG_TEST, + ARG_VERSION, ARG_DUMP_CONFIGURATION_ITEMS, ARG_DUMP_CORE, ARG_CRASH_SHELL, ARG_CONFIRM_SPAWN, ARG_SHOW_STATUS, - ARG_SYSV_CONSOLE, ARG_DESERIALIZE, + ARG_SWITCHED_ROOT, ARG_INTROSPECT, ARG_DEFAULT_STD_OUTPUT, ARG_DEFAULT_STD_ERROR @@ -762,15 +778,14 @@ static int parse_argv(int argc, char *argv[]) { { "user", no_argument, NULL, ARG_USER }, { "test", no_argument, NULL, ARG_TEST }, { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS }, - { "dump-core", no_argument, NULL, ARG_DUMP_CORE }, - { "crash-shell", no_argument, NULL, ARG_CRASH_SHELL }, - { "confirm-spawn", no_argument, NULL, ARG_CONFIRM_SPAWN }, + { "dump-core", optional_argument, NULL, ARG_DUMP_CORE }, + { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL }, + { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN }, { "show-status", optional_argument, NULL, ARG_SHOW_STATUS }, -#ifdef HAVE_SYSV_COMPAT - { "sysv-console", optional_argument, NULL, ARG_SYSV_CONSOLE }, -#endif { "deserialize", required_argument, NULL, ARG_DESERIALIZE }, + { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT }, { "introspect", optional_argument, NULL, ARG_INTROSPECT }, { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, }, { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, }, @@ -858,57 +873,60 @@ static int parse_argv(int argc, char *argv[]) { break; case ARG_SYSTEM: - arg_running_as = MANAGER_SYSTEM; + arg_running_as = SYSTEMD_SYSTEM; break; case ARG_USER: - arg_running_as = MANAGER_USER; + arg_running_as = SYSTEMD_USER; break; case ARG_TEST: arg_action = ACTION_TEST; break; + case ARG_VERSION: + arg_action = ACTION_VERSION; + break; + case ARG_DUMP_CONFIGURATION_ITEMS: arg_action = ACTION_DUMP_CONFIGURATION_ITEMS; break; case ARG_DUMP_CORE: - arg_dump_core = true; + r = optarg ? parse_boolean(optarg) : 1; + if (r < 0) { + log_error("Failed to parse dump core boolean %s.", optarg); + return r; + } + arg_dump_core = r; break; case ARG_CRASH_SHELL: - arg_crash_shell = true; + r = optarg ? parse_boolean(optarg) : 1; + if (r < 0) { + log_error("Failed to parse crash shell boolean %s.", optarg); + return r; + } + arg_crash_shell = r; break; case ARG_CONFIRM_SPAWN: - arg_confirm_spawn = true; + r = optarg ? parse_boolean(optarg) : 1; + if (r < 0) { + log_error("Failed to parse confirm spawn boolean %s.", optarg); + return r; + } + arg_confirm_spawn = r; break; case ARG_SHOW_STATUS: - - if (optarg) { - if ((r = parse_boolean(optarg)) < 0) { - log_error("Failed to show status boolean %s.", optarg); - return r; - } - arg_show_status = r; - } else - arg_show_status = true; - break; -#ifdef HAVE_SYSV_COMPAT - case ARG_SYSV_CONSOLE: - - if (optarg) { - if ((r = parse_boolean(optarg)) < 0) { - log_error("Failed to SysV console boolean %s.", optarg); - return r; - } - arg_sysv_console = r; - } else - arg_sysv_console = true; + r = optarg ? parse_boolean(optarg) : 1; + if (r < 0) { + log_error("Failed to parse show status boolean %s.", optarg); + return r; + } + arg_show_status = r; break; -#endif case ARG_DESERIALIZE: { int fd; @@ -932,6 +950,10 @@ static int parse_argv(int argc, char *argv[]) { break; } + case ARG_SWITCHED_ROOT: + arg_switched_root = true; + break; + case ARG_INTROSPECT: { const char * const * i = NULL; @@ -999,8 +1021,10 @@ static int parse_argv(int argc, char *argv[]) { * instead. */ for (a = argv; a < argv + argc; a++) - if ((r = parse_proc_cmdline_word(*a)) < 0) + if ((r = parse_proc_cmdline_word(*a)) < 0) { + log_error("Failed on cmdline argument %s: %s", *a, strerror(-r)); return r; + } } return 0; @@ -1017,13 +1041,10 @@ static int help(void) { " --unit=UNIT Set default unit\n" " --system Run a system instance, even if PID != 1\n" " --user Run a user instance\n" - " --dump-core Dump core on crash\n" - " --crash-shell Run shell on crash\n" - " --confirm-spawn Ask for confirmation when spawning processes\n" + " --dump-core[=0|1] Dump core on crash\n" + " --crash-shell[=0|1] Run shell on crash\n" + " --confirm-spawn[=0|1] Ask for confirmation when spawning processes\n" " --show-status[=0|1] Show status updates on the console during bootup\n" -#ifdef HAVE_SYSV_COMPAT - " --sysv-console[=0|1] Connect output of SysV scripts to console\n" -#endif " --log-target=TARGET Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n" " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n" " --log-color[=0|1] Highlight important log messages\n" @@ -1035,7 +1056,15 @@ static int help(void) { return 0; } -static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds) { +static int version(void) { + puts(PACKAGE_STRING); + puts(DISTRIBUTION); + puts(SYSTEMD_FEATURES); + + return 0; +} + +static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool serialize_jobs) { FILE *f = NULL; FDSet *fds = NULL; int r; @@ -1047,18 +1076,21 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds) { /* Make sure nothing is really destructed when we shut down */ m->n_reloading ++; - if ((r = manager_open_serialization(m, &f)) < 0) { + r = manager_open_serialization(m, &f); + if (r < 0) { log_error("Failed to create serialization file: %s", strerror(-r)); goto fail; } - if (!(fds = fdset_new())) { + fds = fdset_new(); + if (!fds) { r = -ENOMEM; log_error("Failed to allocate fd set: %s", strerror(-r)); goto fail; } - if ((r = manager_serialize(m, f, fds)) < 0) { + r = manager_serialize(m, f, fds, serialize_jobs); + if (r < 0) { log_error("Failed to serialize state: %s", strerror(-r)); goto fail; } @@ -1068,12 +1100,14 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds) { goto fail; } - if ((r = fd_cloexec(fileno(f), false)) < 0) { + r = fd_cloexec(fileno(f), false); + if (r < 0) { log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r)); goto fail; } - if ((r = fdset_cloexec(fds, false)) < 0) { + r = fdset_cloexec(fds, false); + if (r < 0) { log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r)); goto fail; } @@ -1092,6 +1126,42 @@ fail: return r; } +static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { + struct rlimit nl; + int r; + + assert(saved_rlimit); + + /* Save the original RLIMIT_NOFILE so that we can reset it + * later when transitioning from the initrd to the main + * systemd or suchlike. */ + if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) { + log_error("Reading RLIMIT_NOFILE failed: %m"); + return -errno; + } + + /* Make sure forked processes get the default kernel setting */ + if (!arg_default_rlimit[RLIMIT_NOFILE]) { + struct rlimit *rl; + + rl = newdup(struct rlimit, saved_rlimit, 1); + if (!rl) + return log_oom(); + + arg_default_rlimit[RLIMIT_NOFILE] = rl; + } + + /* Bump up the resource limit for ourselves substantially */ + nl.rlim_cur = nl.rlim_max = 64*1024; + r = setrlimit_closest(RLIMIT_NOFILE, &nl); + if (r < 0) { + log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r)); + return r; + } + + return 0; +} + static struct dual_timestamp* parse_initrd_timestamp(struct dual_timestamp *t) { const char *e; unsigned long long a, b; @@ -1158,6 +1228,28 @@ static void test_cgroups(void) { sleep(10); } +static int initialize_join_controllers(void) { + /* By default, mount "cpu" + "cpuacct" together, and "net_cls" + * + "net_prio". We'd like to add "cpuset" to the mix, but + * "cpuset" does't really work for groups with no initialized + * attributes. */ + + arg_join_controllers = new(char**, 3); + if (!arg_join_controllers) + return -ENOMEM; + + arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL); + if (!arg_join_controllers[0]) + return -ENOMEM; + + arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL); + if (!arg_join_controllers[1]) + return -ENOMEM; + + arg_join_controllers[2] = NULL; + return 0; +} + int main(int argc, char *argv[]) { Manager *m = NULL; int r, retval = EXIT_FAILURE; @@ -1168,10 +1260,13 @@ int main(int argc, char *argv[]) { const char *shutdown_verb = NULL; dual_timestamp initrd_timestamp = { 0ULL, 0ULL }; static char systemd[] = "systemd"; - bool is_reexec = false; + bool skip_setup = false; int j; bool loaded_policy = false; bool arm_reboot_watchdog = false; + bool queue_default_job = false; + char *switch_root_dir = NULL, *switch_root_init = NULL; + static struct rlimit saved_rlimit_nofile = { 0, 0 }; #ifdef HAVE_SYSV_COMPAT if (getpid() != 1 && strstr(program_invocation_short_name, "init")) { @@ -1188,10 +1283,17 @@ int main(int argc, char *argv[]) { /* Determine if this is a reexecution or normal bootup. We do * the full command line parsing much later, so let's just * have a quick peek here. */ - for (j = 1; j < argc; j++) if (streq(argv[j], "--deserialize")) { - is_reexec = true; + skip_setup = true; + break; + } + + /* If we have switched root, do all the special setup + * things */ + for (j = 1; j < argc; j++) + if (streq(argv[j], "--switched-root")) { + skip_setup = false; break; } @@ -1206,55 +1308,101 @@ int main(int argc, char *argv[]) { saved_argc = argc; log_show_color(isatty(STDERR_FILENO) > 0); - log_show_location(false); - log_set_max_level(LOG_INFO); - if (getpid() == 1) { - arg_running_as = MANAGER_SYSTEM; - log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_JOURNAL_OR_KMSG); + if (getpid() == 1 && detect_container(NULL) <= 0) { + + /* Running outside of a container as PID 1 */ + arg_running_as = SYSTEMD_SYSTEM; + make_null_stdio(); + log_set_target(LOG_TARGET_KMSG); + log_open(); - if (!is_reexec) { + if (in_initrd()) { + char *rd_timestamp = NULL; + + dual_timestamp_get(&initrd_timestamp); + asprintf(&rd_timestamp, "%llu %llu", + (unsigned long long) initrd_timestamp.realtime, + (unsigned long long) initrd_timestamp.monotonic); + if (rd_timestamp) { + setenv("RD_TIMESTAMP", rd_timestamp, 1); + free(rd_timestamp); + } + } + + if (!skip_setup) { if (selinux_setup(&loaded_policy) < 0) goto finish; if (ima_setup() < 0) goto finish; } - log_open(); - - if (label_init() < 0) + if (label_init(NULL) < 0) goto finish; - if (!is_reexec) + if (!skip_setup) { if (hwclock_is_localtime() > 0) { int min; - r = hwclock_apply_localtime_delta(&min); + /* The first-time call to settimeofday() does a time warp in the kernel */ + r = hwclock_set_timezone(&min); if (r < 0) log_error("Failed to apply local time delta, ignoring: %s", strerror(-r)); else log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min); + } else if (!in_initrd()) { + /* + * Do dummy first-time call to seal the kernel's time warp magic + * + * Do not call this this from inside the initrd. The initrd might not + * carry /etc/adjtime with LOCAL, but the real system could be set up + * that way. In such case, we need to delay the time-warp or the sealing + * until we reach the real system. + */ + hwclock_reset_timezone(); + + /* Tell the kernel our time zone */ + r = hwclock_set_timezone(NULL); + if (r < 0) + log_error("Failed to set the kernel's time zone, ignoring: %s", strerror(-r)); } + } + + /* Set the default for later on, but don't actually + * open the logs like this for now. Note that if we + * are transitioning from the initrd there might still + * be journal fd open, and we shouldn't attempt + * opening that before we parsed /proc/cmdline which + * might redirect output elsewhere. */ + log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); + + } else if (getpid() == 1) { + + /* Running inside a container, as PID 1 */ + arg_running_as = SYSTEMD_SYSTEM; + log_set_target(LOG_TARGET_CONSOLE); + log_open(); + + /* For the later on, see above... */ + log_set_target(LOG_TARGET_JOURNAL); } else { - arg_running_as = MANAGER_USER; + + /* Running as user instance */ + arg_running_as = SYSTEMD_USER; log_set_target(LOG_TARGET_AUTO); log_open(); } /* Initialize default unit */ - if (set_default_unit(SPECIAL_DEFAULT_TARGET) < 0) - goto finish; - - /* By default, mount "cpu" and "cpuacct" together */ - arg_join_controllers = new(char**, 2); - if (!arg_join_controllers) + r = set_default_unit(SPECIAL_DEFAULT_TARGET); + if (r < 0) { + log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r)); goto finish; + } - arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL); - arg_join_controllers[1] = NULL; - - if (!arg_join_controllers[0]) + r = initialize_join_controllers(); + if (r < 0) goto finish; /* Mount /proc, /sys and friends, so that /proc/cmdline and @@ -1274,7 +1422,7 @@ int main(int argc, char *argv[]) { if (parse_config_file() < 0) goto finish; - if (arg_running_as == MANAGER_SYSTEM) + if (arg_running_as == SYSTEMD_SYSTEM) if (parse_proc_cmdline() < 0) goto finish; @@ -1283,12 +1431,20 @@ int main(int argc, char *argv[]) { if (parse_argv(argc, argv) < 0) goto finish; - if (arg_action == ACTION_TEST && geteuid() == 0) { + if (arg_action == ACTION_TEST && + geteuid() == 0) { log_error("Don't run test mode as root."); goto finish; } - if (arg_running_as == MANAGER_SYSTEM && + if (arg_running_as == SYSTEMD_USER && + arg_action == ACTION_RUN && + sd_booted() <= 0) { + log_error("Trying to run as user instance, but the system has not been booted with systemd."); + goto finish; + } + + if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN && running_in_chroot() > 0) { log_error("Cannot be run in a chroot() environment."); @@ -1298,6 +1454,9 @@ int main(int argc, char *argv[]) { if (arg_action == ACTION_HELP) { retval = help(); goto finish; + } else if (arg_action == ACTION_VERSION) { + retval = version(); + goto finish; } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) { unit_dump_config_items(stdout); retval = EXIT_SUCCESS; @@ -1331,13 +1490,14 @@ int main(int argc, char *argv[]) { #else "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin", #endif - arg_running_as == MANAGER_SYSTEM); + arg_running_as == SYSTEMD_SYSTEM); - if (arg_running_as == MANAGER_SYSTEM) { + if (arg_running_as == SYSTEMD_SYSTEM) { /* Parse the data passed to us. We leave this * variables set, but the manager later on will not * pass them on to our children. */ - parse_initrd_timestamp(&initrd_timestamp); + if (!in_initrd()) + parse_initrd_timestamp(&initrd_timestamp); /* Unset some environment variables passed in from the * kernel that don't really make sense for us. */ @@ -1350,7 +1510,7 @@ int main(int argc, char *argv[]) { unsetenv("SHLVL"); unsetenv("_"); - /* When we are invoked by a tool chroot-like such as + /* When we are invoked by a chroot-like tool such as * nspawn, these might be set, but make little sense * to pass on */ unsetenv("USER"); @@ -1363,7 +1523,7 @@ int main(int argc, char *argv[]) { /* Move out of the way, so that we won't block unmounts */ assert_se(chdir("/") == 0); - if (arg_running_as == MANAGER_SYSTEM) { + if (arg_running_as == SYSTEMD_SYSTEM) { /* Become a session leader if we aren't one yet. */ setsid(); @@ -1376,10 +1536,8 @@ int main(int argc, char *argv[]) { /* Reset the console, but only if this is really init and we * are freshly booted */ - if (arg_running_as == MANAGER_SYSTEM && arg_action == ACTION_RUN) { - console_setup(getpid() == 1 && !is_reexec); - make_null_stdio(); - } + if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN) + console_setup(getpid() == 1 && !skip_setup); /* Open the logging devices, if possible and necessary */ log_open(); @@ -1395,10 +1553,22 @@ int main(int argc, char *argv[]) { goto finish; } - log_full(arg_running_as == MANAGER_SYSTEM ? LOG_INFO : LOG_DEBUG, - PACKAGE_STRING " running in %s mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")", manager_running_as_to_string(arg_running_as)); + if (arg_running_as == SYSTEMD_SYSTEM) { + const char *virtualization = NULL; + + log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")"); - if (arg_running_as == MANAGER_SYSTEM && !is_reexec) { + detect_virtualization(&virtualization); + if (virtualization) + log_info("Detected virtualization '%s'.", virtualization); + + if (in_initrd()) + log_info("Running in initial RAM disk."); + + } else + log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")"); + + if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) { locale_setup(); if (arg_show_status || plymouth_running()) @@ -1414,9 +1584,38 @@ int main(int argc, char *argv[]) { test_cgroups(); } - if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0) + if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0) watchdog_set_timeout(&arg_runtime_watchdog); + if (arg_timer_slack_nsec != (nsec_t) -1) + if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0) + log_error("Failed to adjust timer slack: %m"); + + if (arg_capability_bounding_set_drop) { + r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true); + if (r < 0) { + log_error("Failed to drop capability bounding set: %s", strerror(-r)); + goto finish; + } + r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop); + if (r < 0) { + log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r)); + goto finish; + } + } + + if (arg_running_as == SYSTEMD_USER) { + /* Become reaper of our children */ + if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) { + log_warning("Failed to make us a subreaper: %m"); + if (errno == EINVAL) + log_info("Perhaps the kernel version is too old (< 3.4?)"); + } + } + + if (arg_running_as == SYSTEMD_SYSTEM) + bump_rlimit_nofile(&saved_rlimit_nofile); + r = manager_new(arg_running_as, &m); if (r < 0) { log_error("Failed to allocate manager object: %s", strerror(-r)); @@ -1424,16 +1623,13 @@ int main(int argc, char *argv[]) { } m->confirm_spawn = arg_confirm_spawn; -#ifdef HAVE_SYSV_COMPAT - m->sysv_console = arg_sysv_console; -#endif - m->mount_auto = arg_mount_auto; - m->swap_auto = arg_swap_auto; m->default_std_output = arg_default_std_output; m->default_std_error = arg_default_std_error; m->runtime_watchdog = arg_runtime_watchdog; m->shutdown_watchdog = arg_shutdown_watchdog; + manager_set_default_rlimits(m, arg_default_rlimit); + if (dual_timestamp_is_set(&initrd_timestamp)) m->initrd_timestamp = initrd_timestamp; @@ -1442,16 +1638,18 @@ int main(int argc, char *argv[]) { manager_set_show_status(m, arg_show_status); + /* Remember whether we should queue the default job */ + queue_default_job = !serialization || arg_switched_root; + before_startup = now(CLOCK_MONOTONIC); r = manager_startup(m, serialization, fds); if (r < 0) log_error("Failed to fully start up daemon: %s", strerror(-r)); + /* This will close all file descriptors that were opened, but + * not claimed by any unit. */ if (fds) { - /* This will close all file descriptors that were opened, but - * not claimed by any unit. */ - fdset_free(fds); fds = NULL; } @@ -1459,7 +1657,9 @@ int main(int argc, char *argv[]) { if (serialization) { fclose(serialization); serialization = NULL; - } else { + } + + if (queue_default_job) { DBusError error; Unit *target = NULL; Job *default_unit_job; @@ -1544,13 +1744,28 @@ int main(int argc, char *argv[]) { break; case MANAGER_REEXECUTE: - if (prepare_reexecute(m, &serialization, &fds) < 0) + + if (prepare_reexecute(m, &serialization, &fds, true) < 0) goto finish; reexecute = true; log_notice("Reexecuting."); goto finish; + case MANAGER_SWITCH_ROOT: + /* Steal the switch root parameters */ + switch_root_dir = m->switch_root; + switch_root_init = m->switch_root_init; + m->switch_root = m->switch_root_init = NULL; + + if (!switch_root_init) + if (prepare_reexecute(m, &serialization, &fds, false) < 0) + goto finish; + + reexecute = true; + log_notice("Switching root."); + goto finish; + case MANAGER_REBOOT: case MANAGER_POWEROFF: case MANAGER_HALT: @@ -1578,74 +1793,116 @@ finish: if (m) manager_free(m); + for (j = 0; j < RLIMIT_NLIMITS; j++) + free(arg_default_rlimit[j]); + free(arg_default_unit); strv_free(arg_default_controllers); free_join_controllers(); dbus_shutdown(); - label_finish(); if (reexecute) { - const char *args[15]; - unsigned i = 0; - char sfd[16]; + const char **args; + unsigned i, args_size; - assert(serialization); - assert(fds); + /* Close and disarm the watchdog, so that the new + * instance can reinitialize it, but doesn't get + * rebooted while we do that */ + watchdog_close(true); - args[i++] = SYSTEMD_BINARY_PATH; + /* Reset the RLIMIT_NOFILE to the kernel default, so + * that the new systemd can pass the kernel default to + * its child processes */ + if (saved_rlimit_nofile.rlim_cur > 0) + setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile); + + if (switch_root_dir) { + /* Kill all remaining processes from the + * initrd, but don't wait for them, so that we + * can handle the SIGCHLD for them after + * deserializing. */ + broadcast_signal(SIGTERM, false); + + /* And switch root */ + r = switch_root(switch_root_dir); + if (r < 0) + log_error("Failed to switch root, ignoring: %s", strerror(-r)); + } - args[i++] = "--log-level"; - args[i++] = log_level_to_string(log_get_max_level()); + args_size = MAX(6, argc+1); + args = newa(const char*, args_size); - args[i++] = "--log-target"; - args[i++] = log_target_to_string(log_get_target()); + if (!switch_root_init) { + char sfd[16]; - if (arg_running_as == MANAGER_SYSTEM) - args[i++] = "--system"; - else - args[i++] = "--user"; + /* First try to spawn ourselves with the right + * path, and with full serialization. We do + * this only if the user didn't specify an + * explicit init to spawn. */ - if (arg_dump_core) - args[i++] = "--dump-core"; + assert(serialization); + assert(fds); - if (arg_crash_shell) - args[i++] = "--crash-shell"; + snprintf(sfd, sizeof(sfd), "%i", fileno(serialization)); + char_array_0(sfd); - if (arg_confirm_spawn) - args[i++] = "--confirm-spawn"; + i = 0; + args[i++] = SYSTEMD_BINARY_PATH; + if (switch_root_dir) + args[i++] = "--switched-root"; + args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user"; + args[i++] = "--deserialize"; + args[i++] = sfd; + args[i++] = NULL; - if (arg_show_status) - args[i++] = "--show-status=1"; - else - args[i++] = "--show-status=0"; + assert(i <= args_size); + execv(args[0], (char* const*) args); + } -#ifdef HAVE_SYSV_COMPAT - if (arg_sysv_console) - args[i++] = "--sysv-console=1"; - else - args[i++] = "--sysv-console=0"; -#endif + /* Try the fallback, if there is any, without any + * serialization. We pass the original argv[] and + * envp[]. (Well, modulo the ordering changes due to + * getopt() in argv[], and some cleanups in envp[], + * but let's hope that doesn't matter.) */ + + if (serialization) { + fclose(serialization); + serialization = NULL; + } - snprintf(sfd, sizeof(sfd), "%i", fileno(serialization)); - char_array_0(sfd); + if (fds) { + fdset_free(fds); + fds = NULL; + } - args[i++] = "--deserialize"; - args[i++] = sfd; + /* Reopen the console */ + make_console_stdio(); + for (j = 1, i = 1; j < argc; j++) + args[i++] = argv[j]; args[i++] = NULL; + assert(i <= args_size); - assert(i <= ELEMENTSOF(args)); - - /* Close and disarm the watchdog, so that the new - * instance can reinitialize it, but doesn't get - * rebooted while we do that */ - watchdog_close(true); + if (switch_root_init) { + args[0] = switch_root_init; + execv(args[0], (char* const*) args); + log_warning("Failed to execute configured init, trying fallback: %m"); + } + args[0] = "/sbin/init"; execv(args[0], (char* const*) args); - log_error("Failed to reexecute: %m"); + if (errno == ENOENT) { + log_warning("No /sbin/init, trying fallback"); + + args[0] = "/bin/sh"; + args[1] = NULL; + execv(args[0], (char* const*) args); + log_error("Failed to execute /bin/sh, giving up: %m"); + } else + log_warning("Failed to execute /sbin/init, giving up: %m"); } if (serialization)