chiark / gitweb /
core: move ManagerRunningAs to shared
[elogind.git] / src / core / main.c
index 58780990c8de4a1aa105d3daff0a45f0adf431e3..04fc0b3b59fda78108d91920f519b35ab72f5f44 100644 (file)
@@ -32,6 +32,7 @@
 #include <sys/wait.h>
 #include <fcntl.h>
 #include <sys/prctl.h>
+#include <sys/mount.h>
 
 #include "manager.h"
 #include "log.h"
 #include "def.h"
 #include "virt.h"
 #include "watchdog.h"
+#include "path-util.h"
+#include "switch-root.h"
+#include "capability.h"
+#include "killall.h"
 
 #include "mount-setup.h"
 #include "loopback-setup.h"
 static enum {
         ACTION_RUN,
         ACTION_HELP,
+        ACTION_VERSION,
         ACTION_TEST,
         ACTION_DUMP_CONFIGURATION_ITEMS,
         ACTION_DONE
 } arg_action = ACTION_RUN;
 
 static char *arg_default_unit = NULL;
-static ManagerRunningAs arg_running_as = _MANAGER_RUNNING_AS_INVALID;
+static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
 
 static bool arg_dump_core = true;
 static bool arg_crash_shell = false;
 static int arg_crash_chvt = -1;
 static bool arg_confirm_spawn = false;
 static bool arg_show_status = true;
-#ifdef HAVE_SYSV_COMPAT
-static bool arg_sysv_console = true;
-#endif
+static bool arg_switched_root = false;
 static char **arg_default_controllers = NULL;
 static char ***arg_join_controllers = NULL;
 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
 static usec_t arg_runtime_watchdog = 0;
 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
+static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
+static uint64_t arg_capability_bounding_set_drop = 0;
+static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
 
 static FILE* serialization = NULL;
 
@@ -159,19 +166,14 @@ _noreturn_ static void crash(int sig) {
                 sa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART;
                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
 
-                if ((pid = fork()) < 0)
-                        log_error("Failed to fork off crash shell: %s", strerror(errno));
+                pid = fork();
+                if (pid < 0)
+                        log_error("Failed to fork off crash shell: %m");
                 else if (pid == 0) {
-                        int fd, r;
-
-                        if ((fd = acquire_terminal("/dev/console", false, true, true)) < 0)
-                                log_error("Failed to acquire terminal: %s", strerror(-fd));
-                        else if ((r = make_stdio(fd)) < 0)
-                                log_error("Failed to duplicate terminal fd: %s", strerror(-r));
-
+                        make_console_stdio();
                         execl("/bin/sh", "/bin/sh", NULL);
 
-                        log_error("execl() failed: %s", strerror(errno));
+                        log_error("execl() failed: %m");
                         _exit(1);
                 }
 
@@ -225,11 +227,13 @@ static int set_default_unit(const char *u) {
 
         assert(u);
 
-        if (!(c = strdup(u)))
+        c = strdup(u);
+        if (!c)
                 return -ENOMEM;
 
         free(arg_default_unit);
         arg_default_unit = c;
+
         return 0;
 }
 
@@ -251,10 +255,17 @@ static int parse_proc_cmdline_word(const char *word) {
 
         assert(word);
 
-        if (startswith(word, "systemd.unit="))
-                return set_default_unit(word + 13);
+        if (startswith(word, "systemd.unit=")) {
+
+                if (!in_initrd())
+                        return set_default_unit(word + 13);
+
+        } else if (startswith(word, "rd.systemd.unit=")) {
 
-        else if (startswith(word, "systemd.log_target=")) {
+                if (in_initrd())
+                        return set_default_unit(word + 16);
+
+        } else if (startswith(word, "systemd.log_target=")) {
 
                 if (log_set_target_from_string(word + 19) < 0)
                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
@@ -339,38 +350,28 @@ static int parse_proc_cmdline_word(const char *word) {
                 if (!eq) {
                         r = unsetenv(cenv);
                         if (r < 0)
-                                log_warning("unsetenv failed %s. Ignoring.", strerror(errno));
+                                log_warning("unsetenv failed %m. Ignoring.");
                 } else {
                         *eq = 0;
                         r = setenv(cenv, eq + 1, 1);
                         if (r < 0)
-                                log_warning("setenv failed %s. Ignoring.", strerror(errno));
+                                log_warning("setenv failed %m. Ignoring.");
                 }
                 free(cenv);
-#ifdef HAVE_SYSV_COMPAT
-        } else if (startswith(word, "systemd.sysv_console=")) {
-                int r;
 
-                if ((r = parse_boolean(word + 21)) < 0)
-                        log_warning("Failed to parse SysV console switch %s. Ignoring.", word + 20);
-                else
-                        arg_sysv_console = r;
-#endif
-
-        } else if (startswith(word, "systemd.")) {
+        } else if (startswith(word, "systemd.") ||
+                   (in_initrd() && startswith(word, "rd.systemd."))) {
 
                 log_warning("Unknown kernel switch %s. Ignoring.", word);
 
                 log_info("Supported kernel switches:\n"
                          "systemd.unit=UNIT                        Default unit to start\n"
+                         "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
                          "systemd.dump_core=0|1                    Dump core on crash\n"
                          "systemd.crash_shell=0|1                  Run shell on crash\n"
                          "systemd.crash_chvt=N                     Change to VT #N on crash\n"
                          "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
                          "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
-#ifdef HAVE_SYSV_COMPAT
-                         "systemd.sysv_console=0|1                 Connect output of SysV scripts to console\n"
-#endif
                          "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
                          "                                         Log target\n"
                          "systemd.log_level=LEVEL                  Log level\n"
@@ -379,14 +380,12 @@ static int parse_proc_cmdline_word(const char *word) {
                          "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
                          "                                         Set default log output for services\n"
                          "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
-                         "                                         Set default log error output for services\n");
+                         "                                         Set default log error output for services\n"
+                         "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
 
-        } else if (streq(word, "quiet")) {
+        } else if (streq(word, "quiet"))
                 arg_show_status = false;
-#ifdef HAVE_SYSV_COMPAT
-                arg_sysv_console = false;
-#endif
-        } else {
+        else if (!in_initrd()) {
                 unsigned i;
 
                 /* SysV compatibility */
@@ -496,14 +495,14 @@ static int config_parse_cpu_affinity2(
                 unsigned cpu;
 
                 if (!(t = strndup(w, l)))
-                        return -ENOMEM;
+                        return log_oom();
 
                 r = safe_atou(t, &cpu);
                 free(t);
 
                 if (!c)
                         if (!(c = cpu_set_malloc(&ncpus)))
-                                return -ENOMEM;
+                                return log_oom();
 
                 if (r < 0 || cpu >= ncpus) {
                         log_error("[%s:%u] Failed to parse CPU affinity: %s", filename, line, rvalue);
@@ -569,7 +568,7 @@ static int config_parse_join_controllers(
 
                 s = strndup(w, length);
                 if (!s)
-                        return -ENOMEM;
+                        return log_oom();
 
                 l = strv_split(s, ",");
                 free(s);
@@ -585,7 +584,7 @@ static int config_parse_join_controllers(
                         arg_join_controllers = new(char**, 2);
                         if (!arg_join_controllers) {
                                 strv_free(l);
-                                return -ENOMEM;
+                                return log_oom();
                         }
 
                         arg_join_controllers[0] = l;
@@ -599,7 +598,7 @@ static int config_parse_join_controllers(
                         t = new0(char**, n+2);
                         if (!t) {
                                 strv_free(l);
-                                return -ENOMEM;
+                                return log_oom();
                         }
 
                         n = 0;
@@ -613,7 +612,7 @@ static int config_parse_join_controllers(
                                         if (!c) {
                                                 strv_free(l);
                                                 strv_free_free(t);
-                                                return -ENOMEM;
+                                                return log_oom();
                                         }
 
                                         strv_free(l);
@@ -625,7 +624,7 @@ static int config_parse_join_controllers(
                                         if (!c) {
                                                 strv_free(l);
                                                 strv_free_free(t);
-                                                return -ENOMEM;
+                                                return log_oom();
                                         }
 
                                         t[n++] = c;
@@ -652,9 +651,6 @@ static int parse_config_file(void) {
                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
-#ifdef HAVE_SYSV_COMPAT
-                { "Manager", "SysVConsole",           config_parse_bool,         0, &arg_sysv_console        },
-#endif
                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
                 { "Manager", "DefaultControllers",    config_parse_strv,         0, &arg_default_controllers },
@@ -663,6 +659,24 @@ static int parse_config_file(void) {
                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
                 { "Manager", "RuntimeWatchdogSec",    config_parse_usec,         0, &arg_runtime_watchdog    },
                 { "Manager", "ShutdownWatchdogSec",   config_parse_usec,         0, &arg_shutdown_watchdog   },
+                { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
+                { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
+                { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
+                { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
+                { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
+                { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
+                { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
+                { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
+                { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
+                { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
+                { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
+                { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
+                { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
+                { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
+                { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
+                { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
+                { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
+                { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
                 { NULL, NULL, NULL, 0, NULL }
         };
 
@@ -670,7 +684,7 @@ static int parse_config_file(void) {
         const char *fn;
         int r;
 
-        fn = arg_running_as == MANAGER_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE;
+        fn = arg_running_as == SYSTEMD_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE;
         f = fopen(fn, "re");
         if (!f) {
                 if (errno == ENOENT)
@@ -713,10 +727,13 @@ static int parse_proc_cmdline(void) {
                 }
 
                 r = parse_proc_cmdline_word(word);
-                free(word);
-
-                if (r < 0)
+                if (r < 0) {
+                        log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
+                        free(word);
                         goto finish;
+                }
+
+                free(word);
         }
 
         r = 0;
@@ -737,13 +754,14 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_SYSTEM,
                 ARG_USER,
                 ARG_TEST,
+                ARG_VERSION,
                 ARG_DUMP_CONFIGURATION_ITEMS,
                 ARG_DUMP_CORE,
                 ARG_CRASH_SHELL,
                 ARG_CONFIRM_SPAWN,
                 ARG_SHOW_STATUS,
-                ARG_SYSV_CONSOLE,
                 ARG_DESERIALIZE,
+                ARG_SWITCHED_ROOT,
                 ARG_INTROSPECT,
                 ARG_DEFAULT_STD_OUTPUT,
                 ARG_DEFAULT_STD_ERROR
@@ -759,15 +777,14 @@ static int parse_argv(int argc, char *argv[]) {
                 { "user",                     no_argument,       NULL, ARG_USER                     },
                 { "test",                     no_argument,       NULL, ARG_TEST                     },
                 { "help",                     no_argument,       NULL, 'h'                          },
+                { "version",                  no_argument,       NULL, ARG_VERSION                  },
                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
-#ifdef HAVE_SYSV_COMPAT
-                { "sysv-console",             optional_argument, NULL, ARG_SYSV_CONSOLE             },
-#endif
                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
+                { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
@@ -855,17 +872,21 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_SYSTEM:
-                        arg_running_as = MANAGER_SYSTEM;
+                        arg_running_as = SYSTEMD_SYSTEM;
                         break;
 
                 case ARG_USER:
-                        arg_running_as = MANAGER_USER;
+                        arg_running_as = SYSTEMD_USER;
                         break;
 
                 case ARG_TEST:
                         arg_action = ACTION_TEST;
                         break;
 
+                case ARG_VERSION:
+                        arg_action = ACTION_VERSION;
+                        break;
+
                 case ARG_DUMP_CONFIGURATION_ITEMS:
                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
                         break;
@@ -906,17 +927,6 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_show_status = r;
                         break;
 
-#ifdef HAVE_SYSV_COMPAT
-                case ARG_SYSV_CONSOLE:
-                        r = optarg ? parse_boolean(optarg) : 1;
-                        if (r < 0) {
-                                log_error("Failed to parse SysV console boolean %s.", optarg);
-                                return r;
-                        }
-                        arg_sysv_console = r;
-                        break;
-#endif
-
                 case ARG_DESERIALIZE: {
                         int fd;
                         FILE *f;
@@ -939,6 +949,10 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_SWITCHED_ROOT:
+                        arg_switched_root = true;
+                        break;
+
                 case ARG_INTROSPECT: {
                         const char * const * i = NULL;
 
@@ -1006,8 +1020,10 @@ static int parse_argv(int argc, char *argv[]) {
                  * instead. */
 
                 for (a = argv; a < argv + argc; a++)
-                        if ((r = parse_proc_cmdline_word(*a)) < 0)
+                        if ((r = parse_proc_cmdline_word(*a)) < 0) {
+                                log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
                                 return r;
+                        }
         }
 
         return 0;
@@ -1028,9 +1044,6 @@ static int help(void) {
                "     --crash-shell[=0|1]         Run shell on crash\n"
                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
-#ifdef HAVE_SYSV_COMPAT
-               "     --sysv-console[=0|1]        Connect output of SysV scripts to console\n"
-#endif
                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
                "     --log-color[=0|1]           Highlight important log messages\n"
@@ -1042,7 +1055,15 @@ static int help(void) {
         return 0;
 }
 
-static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds) {
+static int version(void) {
+        puts(PACKAGE_STRING);
+        puts(DISTRIBUTION);
+        puts(SYSTEMD_FEATURES);
+
+        return 0;
+}
+
+static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool serialize_jobs) {
         FILE *f = NULL;
         FDSet *fds = NULL;
         int r;
@@ -1054,18 +1075,21 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds) {
         /* Make sure nothing is really destructed when we shut down */
         m->n_reloading ++;
 
-        if ((r = manager_open_serialization(m, &f)) < 0) {
+        r = manager_open_serialization(m, &f);
+        if (r < 0) {
                 log_error("Failed to create serialization file: %s", strerror(-r));
                 goto fail;
         }
 
-        if (!(fds = fdset_new())) {
+        fds = fdset_new();
+        if (!fds) {
                 r = -ENOMEM;
                 log_error("Failed to allocate fd set: %s", strerror(-r));
                 goto fail;
         }
 
-        if ((r = manager_serialize(m, f, fds)) < 0) {
+        r = manager_serialize(m, f, fds, serialize_jobs);
+        if (r < 0) {
                 log_error("Failed to serialize state: %s", strerror(-r));
                 goto fail;
         }
@@ -1075,12 +1099,14 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds) {
                 goto fail;
         }
 
-        if ((r = fd_cloexec(fileno(f), false)) < 0) {
+        r = fd_cloexec(fileno(f), false);
+        if (r < 0) {
                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
                 goto fail;
         }
 
-        if ((r = fdset_cloexec(fds, false)) < 0) {
+        r = fdset_cloexec(fds, false);
+        if (r < 0) {
                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
                 goto fail;
         }
@@ -1099,6 +1125,42 @@ fail:
         return r;
 }
 
+static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
+        struct rlimit nl;
+        int r;
+
+        assert(saved_rlimit);
+
+        /* Save the original RLIMIT_NOFILE so that we can reset it
+         * later when transitioning from the initrd to the main
+         * systemd or suchlike. */
+        if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
+                log_error("Reading RLIMIT_NOFILE failed: %m");
+                return -errno;
+        }
+
+        /* Make sure forked processes get the default kernel setting */
+        if (!arg_default_rlimit[RLIMIT_NOFILE]) {
+                struct rlimit *rl;
+
+                rl = newdup(struct rlimit, saved_rlimit, 1);
+                if (!rl)
+                        return log_oom();
+
+                arg_default_rlimit[RLIMIT_NOFILE] = rl;
+        }
+
+        /* Bump up the resource limit for ourselves substantially */
+        nl.rlim_cur = nl.rlim_max = 64*1024;
+        r = setrlimit_closest(RLIMIT_NOFILE, &nl);
+        if (r < 0) {
+                log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
+                return r;
+        }
+
+        return 0;
+}
+
 static struct dual_timestamp* parse_initrd_timestamp(struct dual_timestamp *t) {
         const char *e;
         unsigned long long a, b;
@@ -1175,10 +1237,13 @@ int main(int argc, char *argv[]) {
         const char *shutdown_verb = NULL;
         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
         static char systemd[] = "systemd";
-        bool is_reexec = false;
+        bool skip_setup = false;
         int j;
         bool loaded_policy = false;
         bool arm_reboot_watchdog = false;
+        bool queue_default_job = false;
+        char *switch_root_dir = NULL, *switch_root_init = NULL;
+        static struct rlimit saved_rlimit_nofile = { 0, 0 };
 
 #ifdef HAVE_SYSV_COMPAT
         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
@@ -1195,10 +1260,17 @@ int main(int argc, char *argv[]) {
         /* Determine if this is a reexecution or normal bootup. We do
          * the full command line parsing much later, so let's just
          * have a quick peek here. */
-
         for (j = 1; j < argc; j++)
                 if (streq(argv[j], "--deserialize")) {
-                        is_reexec = true;
+                        skip_setup = true;
+                        break;
+                }
+
+        /* If we have switched root, do all the special setup
+         * things */
+        for (j = 1; j < argc; j++)
+                if (streq(argv[j], "--switched-root")) {
+                        skip_setup = false;
                         break;
                 }
 
@@ -1213,53 +1285,100 @@ int main(int argc, char *argv[]) {
         saved_argc = argc;
 
         log_show_color(isatty(STDERR_FILENO) > 0);
-        log_show_location(false);
-        log_set_max_level(LOG_INFO);
 
-        if (getpid() == 1) {
-                arg_running_as = MANAGER_SYSTEM;
-                log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_JOURNAL : LOG_TARGET_JOURNAL_OR_KMSG);
+        if (getpid() == 1 && detect_container(NULL) <= 0) {
+
+                /* Running outside of a container as PID 1 */
+                arg_running_as = SYSTEMD_SYSTEM;
+                make_null_stdio();
+                log_set_target(LOG_TARGET_KMSG);
+                log_open();
+
+                if (in_initrd()) {
+                        char *rd_timestamp = NULL;
+
+                        dual_timestamp_get(&initrd_timestamp);
+                        asprintf(&rd_timestamp, "%llu %llu",
+                                 (unsigned long long) initrd_timestamp.realtime,
+                                 (unsigned long long) initrd_timestamp.monotonic);
+                        if (rd_timestamp) {
+                                setenv("RD_TIMESTAMP", rd_timestamp, 1);
+                                free(rd_timestamp);
+                        }
+                }
 
-                if (!is_reexec) {
+                if (!skip_setup) {
                         if (selinux_setup(&loaded_policy) < 0)
                                 goto finish;
                         if (ima_setup() < 0)
                                 goto finish;
                 }
 
-                log_open();
-
                 if (label_init(NULL) < 0)
                         goto finish;
 
-                if (!is_reexec)
+                if (!skip_setup) {
                         if (hwclock_is_localtime() > 0) {
                                 int min;
 
-                                r = hwclock_apply_localtime_delta(&min);
+                                /* The first-time call to settimeofday() does a time warp in the kernel */
+                                r = hwclock_set_timezone(&min);
                                 if (r < 0)
                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
                                 else
                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
+                        } else {
+                                /* Do dummy first-time call to seal the kernel's time warp magic */
+                                hwclock_reset_timezone();
+
+                                /* Tell the kernel our time zone */
+                                r = hwclock_set_timezone(NULL);
+                                if (r < 0)
+                                        log_error("Failed to set the kernel's time zone, ignoring: %s", strerror(-r));
                         }
+                }
+
+                /* Set the default for later on, but don't actually
+                 * open the logs like this for now. Note that if we
+                 * are transitioning from the initrd there might still
+                 * be journal fd open, and we shouldn't attempt
+                 * opening that before we parsed /proc/cmdline which
+                 * might redirect output elsewhere. */
+                log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+
+        } else if (getpid() == 1) {
+
+                /* Running inside a container, as PID 1 */
+                arg_running_as = SYSTEMD_SYSTEM;
+                log_set_target(LOG_TARGET_CONSOLE);
+                log_open();
+
+                /* For the later on, see above... */
+                log_set_target(LOG_TARGET_JOURNAL);
 
         } else {
-                arg_running_as = MANAGER_USER;
+
+                /* Running as user instance */
+                arg_running_as = SYSTEMD_USER;
                 log_set_target(LOG_TARGET_AUTO);
                 log_open();
         }
 
         /* Initialize default unit */
-        if (set_default_unit(SPECIAL_DEFAULT_TARGET) < 0)
+        r = set_default_unit(SPECIAL_DEFAULT_TARGET);
+        if (r < 0) {
+                log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
                 goto finish;
+        }
 
         /* By default, mount "cpu" and "cpuacct" together */
-        arg_join_controllers = new(char**, 2);
+        arg_join_controllers = new(char**, 3);
         if (!arg_join_controllers)
                 goto finish;
 
-        arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
-        arg_join_controllers[1] = NULL;
+        arg_join_controllers[0] = strv_new("cpu", "cpuacct", "cpuset", NULL);
+        arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
+        arg_join_controllers[2] = NULL;
 
         if (!arg_join_controllers[0])
                 goto finish;
@@ -1281,7 +1400,7 @@ int main(int argc, char *argv[]) {
         if (parse_config_file() < 0)
                 goto finish;
 
-        if (arg_running_as == MANAGER_SYSTEM)
+        if (arg_running_as == SYSTEMD_SYSTEM)
                 if (parse_proc_cmdline() < 0)
                         goto finish;
 
@@ -1295,7 +1414,7 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (arg_running_as == MANAGER_SYSTEM &&
+        if (arg_running_as == SYSTEMD_SYSTEM &&
             arg_action == ACTION_RUN &&
             running_in_chroot() > 0) {
                 log_error("Cannot be run in a chroot() environment.");
@@ -1305,6 +1424,9 @@ int main(int argc, char *argv[]) {
         if (arg_action == ACTION_HELP) {
                 retval = help();
                 goto finish;
+        } else if (arg_action == ACTION_VERSION) {
+                retval = version();
+                goto finish;
         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
                 unit_dump_config_items(stdout);
                 retval = EXIT_SUCCESS;
@@ -1338,13 +1460,14 @@ int main(int argc, char *argv[]) {
 #else
                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin",
 #endif
-               arg_running_as == MANAGER_SYSTEM);
+               arg_running_as == SYSTEMD_SYSTEM);
 
-        if (arg_running_as == MANAGER_SYSTEM) {
+        if (arg_running_as == SYSTEMD_SYSTEM) {
                 /* Parse the data passed to us. We leave this
                  * variables set, but the manager later on will not
                  * pass them on to our children. */
-                parse_initrd_timestamp(&initrd_timestamp);
+                if (!in_initrd())
+                        parse_initrd_timestamp(&initrd_timestamp);
 
                 /* Unset some environment variables passed in from the
                  * kernel that don't really make sense for us. */
@@ -1357,7 +1480,7 @@ int main(int argc, char *argv[]) {
                 unsetenv("SHLVL");
                 unsetenv("_");
 
-                /* When we are invoked by a tool chroot-like such as
+                /* When we are invoked by a chroot-like tool such as
                  * nspawn, these might be set, but make little sense
                  * to pass on */
                 unsetenv("USER");
@@ -1370,7 +1493,7 @@ int main(int argc, char *argv[]) {
         /* Move out of the way, so that we won't block unmounts */
         assert_se(chdir("/")  == 0);
 
-        if (arg_running_as == MANAGER_SYSTEM) {
+        if (arg_running_as == SYSTEMD_SYSTEM) {
                 /* Become a session leader if we aren't one yet. */
                 setsid();
 
@@ -1383,10 +1506,8 @@ int main(int argc, char *argv[]) {
 
         /* Reset the console, but only if this is really init and we
          * are freshly booted */
-        if (arg_running_as == MANAGER_SYSTEM && arg_action == ACTION_RUN) {
-                console_setup(getpid() == 1 && !is_reexec);
-                make_null_stdio();
-        }
+        if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
+                console_setup(getpid() == 1 && !skip_setup);
 
         /* Open the logging devices, if possible and necessary */
         log_open();
@@ -1402,10 +1523,22 @@ int main(int argc, char *argv[]) {
                         goto finish;
         }
 
-        log_full(arg_running_as == MANAGER_SYSTEM ? LOG_INFO : LOG_DEBUG,
-                 PACKAGE_STRING " running in %s mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")", manager_running_as_to_string(arg_running_as));
+        if (arg_running_as == SYSTEMD_SYSTEM) {
+                const char *virtualization = NULL;
+
+                log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")");
+
+                detect_virtualization(&virtualization);
+                if (virtualization)
+                        log_info("Detected virtualization '%s'.", virtualization);
+
+                if (in_initrd())
+                        log_info("Running in initial RAM disk.");
+
+        } else
+                log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")");
 
-        if (arg_running_as == MANAGER_SYSTEM && !is_reexec) {
+        if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
                 locale_setup();
 
                 if (arg_show_status || plymouth_running())
@@ -1421,9 +1554,38 @@ int main(int argc, char *argv[]) {
                 test_cgroups();
         }
 
-        if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0)
+        if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
                 watchdog_set_timeout(&arg_runtime_watchdog);
 
+        if (arg_timer_slack_nsec != (nsec_t) -1)
+                if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
+                        log_error("Failed to adjust timer slack: %m");
+
+        if (arg_capability_bounding_set_drop) {
+                r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
+                if (r < 0) {
+                        log_error("Failed to drop capability bounding set: %s", strerror(-r));
+                        goto finish;
+                }
+                r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
+                if (r < 0) {
+                        log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
+                        goto finish;
+                }
+        }
+
+        if (arg_running_as == SYSTEMD_USER) {
+                /* Become reaper of our children */
+                if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
+                        log_warning("Failed to make us a subreaper: %m");
+                        if (errno == EINVAL)
+                                log_info("Perhaps the kernel version is too old (< 3.4?)");
+                }
+        }
+
+        if (arg_running_as == SYSTEMD_SYSTEM)
+                bump_rlimit_nofile(&saved_rlimit_nofile);
+
         r = manager_new(arg_running_as, &m);
         if (r < 0) {
                 log_error("Failed to allocate manager object: %s", strerror(-r));
@@ -1431,14 +1593,13 @@ int main(int argc, char *argv[]) {
         }
 
         m->confirm_spawn = arg_confirm_spawn;
-#ifdef HAVE_SYSV_COMPAT
-        m->sysv_console = arg_sysv_console;
-#endif
         m->default_std_output = arg_default_std_output;
         m->default_std_error = arg_default_std_error;
         m->runtime_watchdog = arg_runtime_watchdog;
         m->shutdown_watchdog = arg_shutdown_watchdog;
 
+        manager_set_default_rlimits(m, arg_default_rlimit);
+
         if (dual_timestamp_is_set(&initrd_timestamp))
                 m->initrd_timestamp = initrd_timestamp;
 
@@ -1447,16 +1608,18 @@ int main(int argc, char *argv[]) {
 
         manager_set_show_status(m, arg_show_status);
 
+        /* Remember whether we should queue the default job */
+        queue_default_job = !serialization || arg_switched_root;
+
         before_startup = now(CLOCK_MONOTONIC);
 
         r = manager_startup(m, serialization, fds);
         if (r < 0)
                 log_error("Failed to fully start up daemon: %s", strerror(-r));
 
+        /* This will close all file descriptors that were opened, but
+         * not claimed by any unit. */
         if (fds) {
-                /* This will close all file descriptors that were opened, but
-                 * not claimed by any unit. */
-
                 fdset_free(fds);
                 fds = NULL;
         }
@@ -1464,7 +1627,9 @@ int main(int argc, char *argv[]) {
         if (serialization) {
                 fclose(serialization);
                 serialization = NULL;
-        } else {
+        }
+
+        if (queue_default_job) {
                 DBusError error;
                 Unit *target = NULL;
                 Job *default_unit_job;
@@ -1549,13 +1714,28 @@ int main(int argc, char *argv[]) {
                         break;
 
                 case MANAGER_REEXECUTE:
-                        if (prepare_reexecute(m, &serialization, &fds) < 0)
+
+                        if (prepare_reexecute(m, &serialization, &fds, true) < 0)
                                 goto finish;
 
                         reexecute = true;
                         log_notice("Reexecuting.");
                         goto finish;
 
+                case MANAGER_SWITCH_ROOT:
+                        /* Steal the switch root parameters */
+                        switch_root_dir = m->switch_root;
+                        switch_root_init = m->switch_root_init;
+                        m->switch_root = m->switch_root_init = NULL;
+
+                        if (!switch_root_init)
+                                if (prepare_reexecute(m, &serialization, &fds, false) < 0)
+                                        goto finish;
+
+                        reexecute = true;
+                        log_notice("Switching root.");
+                        goto finish;
+
                 case MANAGER_REBOOT:
                 case MANAGER_POWEROFF:
                 case MANAGER_HALT:
@@ -1583,74 +1763,116 @@ finish:
         if (m)
                 manager_free(m);
 
+        for (j = 0; j < RLIMIT_NLIMITS; j++)
+                free(arg_default_rlimit[j]);
+
         free(arg_default_unit);
         strv_free(arg_default_controllers);
         free_join_controllers();
 
         dbus_shutdown();
-
         label_finish();
 
         if (reexecute) {
-                const char *args[15];
-                unsigned i = 0;
-                char sfd[16];
+                const char **args;
+                unsigned i, args_size;
+
+                /* Close and disarm the watchdog, so that the new
+                 * instance can reinitialize it, but doesn't get
+                 * rebooted while we do that */
+                watchdog_close(true);
 
-                assert(serialization);
-                assert(fds);
+                /* Reset the RLIMIT_NOFILE to the kernel default, so
+                 * that the new systemd can pass the kernel default to
+                 * its child processes */
+                if (saved_rlimit_nofile.rlim_cur > 0)
+                        setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
+
+                if (switch_root_dir) {
+                        /* Kill all remaining processes from the
+                         * initrd, but don't wait for them, so that we
+                         * can handle the SIGCHLD for them after
+                         * deserializing. */
+                        broadcast_signal(SIGTERM, false);
+
+                        /* And switch root */
+                        r = switch_root(switch_root_dir);
+                        if (r < 0)
+                                log_error("Failed to switch root, ignoring: %s", strerror(-r));
+                }
 
-                args[i++] = SYSTEMD_BINARY_PATH;
+                args_size = MAX(6, argc+1);
+                args = newa(const char*, args_size);
 
-                args[i++] = "--log-level";
-                args[i++] = log_level_to_string(log_get_max_level());
+                if (!switch_root_init) {
+                        char sfd[16];
 
-                args[i++] = "--log-target";
-                args[i++] = log_target_to_string(log_get_target());
+                        /* First try to spawn ourselves with the right
+                         * path, and with full serialization. We do
+                         * this only if the user didn't specify an
+                         * explicit init to spawn. */
 
-                if (arg_running_as == MANAGER_SYSTEM)
-                        args[i++] = "--system";
-                else
-                        args[i++] = "--user";
+                        assert(serialization);
+                        assert(fds);
 
-                if (arg_dump_core)
-                        args[i++] = "--dump-core";
+                        snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
+                        char_array_0(sfd);
 
-                if (arg_crash_shell)
-                        args[i++] = "--crash-shell";
+                        i = 0;
+                        args[i++] = SYSTEMD_BINARY_PATH;
+                        if (switch_root_dir)
+                                args[i++] = "--switched-root";
+                        args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
+                        args[i++] = "--deserialize";
+                        args[i++] = sfd;
+                        args[i++] = NULL;
 
-                if (arg_confirm_spawn)
-                        args[i++] = "--confirm-spawn";
+                        assert(i <= args_size);
+                        execv(args[0], (char* const*) args);
+                }
 
-                if (arg_show_status)
-                        args[i++] = "--show-status=1";
-                else
-                        args[i++] = "--show-status=0";
+                /* Try the fallback, if there is any, without any
+                 * serialization. We pass the original argv[] and
+                 * envp[]. (Well, modulo the ordering changes due to
+                 * getopt() in argv[], and some cleanups in envp[],
+                 * but let's hope that doesn't matter.) */
 
-#ifdef HAVE_SYSV_COMPAT
-                if (arg_sysv_console)
-                        args[i++] = "--sysv-console=1";
-                else
-                        args[i++] = "--sysv-console=0";
-#endif
+                if (serialization) {
+                        fclose(serialization);
+                        serialization = NULL;
+                }
 
-                snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
-                char_array_0(sfd);
+                if (fds) {
+                        fdset_free(fds);
+                        fds = NULL;
+                }
 
-                args[i++] = "--deserialize";
-                args[i++] = sfd;
+                /* Reopen the console */
+                make_console_stdio();
 
+                for (j = 1, i = 1; j < argc; j++)
+                        args[i++] = argv[j];
                 args[i++] = NULL;
+                assert(i <= args_size);
 
-                assert(i <= ELEMENTSOF(args));
-
-                /* Close and disarm the watchdog, so that the new
-                 * instance can reinitialize it, but doesn't get
-                 * rebooted while we do that */
-                watchdog_close(true);
+                if (switch_root_init) {
+                        args[0] = switch_root_init;
+                        execv(args[0], (char* const*) args);
+                        log_warning("Failed to execute configured init, trying fallback: %m");
+                }
 
+                args[0] = "/sbin/init";
                 execv(args[0], (char* const*) args);
 
-                log_error("Failed to reexecute: %m");
+                if (errno == ENOENT) {
+                        log_warning("No /sbin/init, trying fallback");
+
+                        args[0] = "/bin/sh";
+                        args[1] = NULL;
+                        execv(args[0], (char* const*) args);
+                        log_error("Failed to execute /bin/sh, giving up: %m");
+                } else
+                        log_warning("Failed to execute /sbin/init, giving up: %m");
         }
 
         if (serialization)