chiark / gitweb /
core: add a system-wide SystemCallArchitectures= setting
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85 static char *arg_default_unit = NULL;
86 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
87 static bool arg_dump_core = true;
88 static bool arg_crash_shell = false;
89 static int arg_crash_chvt = -1;
90 static bool arg_confirm_spawn = false;
91 static ShowStatus arg_show_status = SHOW_STATUS_UNSET;
92 static bool arg_switched_root = false;
93 static char ***arg_join_controllers = NULL;
94 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
95 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
96 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
97 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
98 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
99 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
100 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
101 static usec_t arg_runtime_watchdog = 0;
102 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
103 static char **arg_default_environment = NULL;
104 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
105 static uint64_t arg_capability_bounding_set_drop = 0;
106 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
107 static Set* arg_syscall_archs = NULL;
108 static FILE* arg_serialization = NULL;
109
110 static void nop_handler(int sig) {}
111
112 noreturn static void crash(int sig) {
113
114         if (getpid() != 1)
115                 /* Pass this on immediately, if this is not PID 1 */
116                 raise(sig);
117         else if (!arg_dump_core)
118                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
119         else {
120                 struct sigaction sa = {
121                         .sa_handler = nop_handler,
122                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
123                 };
124                 pid_t pid;
125
126                 /* We want to wait for the core process, hence let's enable SIGCHLD */
127                 sigaction(SIGCHLD, &sa, NULL);
128
129                 pid = fork();
130                 if (pid < 0)
131                         log_error("Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
132
133                 else if (pid == 0) {
134                         struct rlimit rl = {};
135
136                         /* Enable default signal handler for core dump */
137                         zero(sa);
138                         sa.sa_handler = SIG_DFL;
139                         sigaction(sig, &sa, NULL);
140
141                         /* Don't limit the core dump size */
142                         rl.rlim_cur = RLIM_INFINITY;
143                         rl.rlim_max = RLIM_INFINITY;
144                         setrlimit(RLIMIT_CORE, &rl);
145
146                         /* Just to be sure... */
147                         chdir("/");
148
149                         /* Raise the signal again */
150                         raise(sig);
151
152                         assert_not_reached("We shouldn't be here...");
153                         _exit(1);
154
155                 } else {
156                         siginfo_t status;
157                         int r;
158
159                         /* Order things nicely. */
160                         r = wait_for_terminate(pid, &status);
161                         if (r < 0)
162                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
163                         else if (status.si_code != CLD_DUMPED)
164                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
165                         else
166                                 log_error("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
167                 }
168         }
169
170         if (arg_crash_chvt)
171                 chvt(arg_crash_chvt);
172
173         if (arg_crash_shell) {
174                 struct sigaction sa = {
175                         .sa_handler = SIG_IGN,
176                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
177                 };
178                 pid_t pid;
179
180                 log_info("Executing crash shell in 10s...");
181                 sleep(10);
182
183                 /* Let the kernel reap children for us */
184                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
185
186                 pid = fork();
187                 if (pid < 0)
188                         log_error("Failed to fork off crash shell: %m");
189                 else if (pid == 0) {
190                         make_console_stdio();
191                         execl("/bin/sh", "/bin/sh", NULL);
192
193                         log_error("execl() failed: %m");
194                         _exit(1);
195                 }
196
197                 log_info("Successfully spawned crash shell as pid "PID_FMT".", pid);
198         }
199
200         log_info("Freezing execution.");
201         freeze();
202 }
203
204 static void install_crash_handler(void) {
205         struct sigaction sa = {
206                 .sa_handler = crash,
207                 .sa_flags = SA_NODEFER,
208         };
209
210         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
211 }
212
213 static int console_setup(bool do_reset) {
214         int tty_fd, r;
215
216         /* If we are init, we connect stdin/stdout/stderr to /dev/null
217          * and make sure we don't have a controlling tty. */
218
219         release_terminal();
220
221         if (!do_reset)
222                 return 0;
223
224         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
225         if (tty_fd < 0) {
226                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
227                 return -tty_fd;
228         }
229
230         /* We don't want to force text mode.
231          * plymouth may be showing pictures already from initrd. */
232         r = reset_terminal_fd(tty_fd, false);
233         if (r < 0)
234                 log_error("Failed to reset /dev/console: %s", strerror(-r));
235
236         close_nointr_nofail(tty_fd);
237         return r;
238 }
239
240 static int set_default_unit(const char *u) {
241         char *c;
242
243         assert(u);
244
245         c = strdup(u);
246         if (!c)
247                 return -ENOMEM;
248
249         free(arg_default_unit);
250         arg_default_unit = c;
251
252         return 0;
253 }
254
255 static int parse_proc_cmdline_word(const char *word) {
256
257         static const char * const rlmap[] = {
258                 "emergency", SPECIAL_EMERGENCY_TARGET,
259                 "-b",        SPECIAL_EMERGENCY_TARGET,
260                 "single",    SPECIAL_RESCUE_TARGET,
261                 "-s",        SPECIAL_RESCUE_TARGET,
262                 "s",         SPECIAL_RESCUE_TARGET,
263                 "S",         SPECIAL_RESCUE_TARGET,
264                 "1",         SPECIAL_RESCUE_TARGET,
265                 "2",         SPECIAL_RUNLEVEL2_TARGET,
266                 "3",         SPECIAL_RUNLEVEL3_TARGET,
267                 "4",         SPECIAL_RUNLEVEL4_TARGET,
268                 "5",         SPECIAL_RUNLEVEL5_TARGET,
269         };
270
271         assert(word);
272
273         if (startswith(word, "systemd.unit=")) {
274
275                 if (!in_initrd())
276                         return set_default_unit(word + 13);
277
278         } else if (startswith(word, "rd.systemd.unit=")) {
279
280                 if (in_initrd())
281                         return set_default_unit(word + 16);
282
283         } else if (startswith(word, "systemd.log_target=")) {
284
285                 if (log_set_target_from_string(word + 19) < 0)
286                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
287
288         } else if (startswith(word, "systemd.log_level=")) {
289
290                 if (log_set_max_level_from_string(word + 18) < 0)
291                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
292
293         } else if (startswith(word, "systemd.log_color=")) {
294
295                 if (log_show_color_from_string(word + 18) < 0)
296                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
297
298         } else if (startswith(word, "systemd.log_location=")) {
299
300                 if (log_show_location_from_string(word + 21) < 0)
301                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
302
303         } else if (startswith(word, "systemd.dump_core=")) {
304                 int r;
305
306                 if ((r = parse_boolean(word + 18)) < 0)
307                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
308                 else
309                         arg_dump_core = r;
310
311         } else if (startswith(word, "systemd.crash_shell=")) {
312                 int r;
313
314                 if ((r = parse_boolean(word + 20)) < 0)
315                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
316                 else
317                         arg_crash_shell = r;
318
319         } else if (startswith(word, "systemd.confirm_spawn=")) {
320                 int r;
321
322                 if ((r = parse_boolean(word + 22)) < 0)
323                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
324                 else
325                         arg_confirm_spawn = r;
326
327         } else if (startswith(word, "systemd.crash_chvt=")) {
328                 int k;
329
330                 if (safe_atoi(word + 19, &k) < 0)
331                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
332                 else
333                         arg_crash_chvt = k;
334
335         } else if (startswith(word, "systemd.show_status=")) {
336                 int r;
337
338                 r = parse_show_status(word + 20, &arg_show_status);
339                 if (r < 0)
340                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
341         } else if (startswith(word, "systemd.default_standard_output=")) {
342                 int r;
343
344                 if ((r = exec_output_from_string(word + 32)) < 0)
345                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
346                 else
347                         arg_default_std_output = r;
348         } else if (startswith(word, "systemd.default_standard_error=")) {
349                 int r;
350
351                 if ((r = exec_output_from_string(word + 31)) < 0)
352                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
353                 else
354                         arg_default_std_error = r;
355         } else if (startswith(word, "systemd.setenv=")) {
356                 _cleanup_free_ char *cenv = NULL;
357
358                 cenv = strdup(word + 15);
359                 if (!cenv)
360                         return -ENOMEM;
361
362                 if (env_assignment_is_valid(cenv)) {
363                         char **env;
364
365                         env = strv_env_set(arg_default_environment, cenv);
366                         if (env)
367                                 arg_default_environment = env;
368                         else
369                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
370                 } else
371                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
372
373         } else if (startswith(word, "systemd.") ||
374                    (in_initrd() && startswith(word, "rd.systemd."))) {
375
376                 const char *c;
377
378                 /* Ignore systemd.journald.xyz and friends */
379                 c = word;
380                 if (startswith(c, "rd."))
381                         c += 3;
382                 if (startswith(c, "systemd."))
383                         c += 8;
384                 if (c[strcspn(c, ".=")] != '.')  {
385
386                         log_warning("Unknown kernel switch %s. Ignoring.", word);
387
388                         log_info("Supported kernel switches:\n"
389                                  "systemd.unit=UNIT                        Default unit to start\n"
390                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
391                                  "systemd.dump_core=0|1                    Dump core on crash\n"
392                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
393                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
394                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
395                                  "systemd.show_status=0|1|auto             Show status updates on the console during bootup\n"
396                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
397                                  "                                         Log target\n"
398                                  "systemd.log_level=LEVEL                  Log level\n"
399                                  "systemd.log_color=0|1                    Highlight important log messages\n"
400                                  "systemd.log_location=0|1                 Include code location in log messages\n"
401                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
402                                  "                                         Set default log output for services\n"
403                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
404                                  "                                         Set default log error output for services\n"
405                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
406                 }
407
408         } else if (streq(word, "quiet")) {
409                 if (arg_show_status == SHOW_STATUS_UNSET)
410                         arg_show_status = SHOW_STATUS_AUTO;
411         } else if (streq(word, "debug")) {
412                 /* Log to kmsg, the journal socket will fill up before the
413                  * journal is started and tools running during that time
414                  * will block with every log message for for 60 seconds,
415                  * before they give up. */
416                 log_set_max_level(LOG_DEBUG);
417                 log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG);
418         } else if (!in_initrd()) {
419                 unsigned i;
420
421                 /* SysV compatibility */
422                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
423                         if (streq(word, rlmap[i]))
424                                 return set_default_unit(rlmap[i+1]);
425         }
426
427         return 0;
428 }
429
430 #define DEFINE_SETTER(name, func, descr)                              \
431         static int name(const char *unit,                             \
432                         const char *filename,                         \
433                         unsigned line,                                \
434                         const char *section,                          \
435                         unsigned section_line,                        \
436                         const char *lvalue,                           \
437                         int ltype,                                    \
438                         const char *rvalue,                           \
439                         void *data,                                   \
440                         void *userdata) {                             \
441                                                                       \
442                 int r;                                                \
443                                                                       \
444                 assert(filename);                                     \
445                 assert(lvalue);                                       \
446                 assert(rvalue);                                       \
447                                                                       \
448                 r = func(rvalue);                                     \
449                 if (r < 0)                                            \
450                         log_syntax(unit, LOG_ERR, filename, line, -r, \
451                                    "Invalid " descr "'%s': %s",       \
452                                    rvalue, strerror(-r));             \
453                                                                       \
454                 return 0;                                             \
455         }
456
457 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
458 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
459 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
460 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
461
462 static int config_parse_cpu_affinity2(const char *unit,
463                                       const char *filename,
464                                       unsigned line,
465                                       const char *section,
466                                       unsigned section_line,
467                                       const char *lvalue,
468                                       int ltype,
469                                       const char *rvalue,
470                                       void *data,
471                                       void *userdata) {
472
473         char *w;
474         size_t l;
475         char *state;
476         cpu_set_t *c = NULL;
477         unsigned ncpus = 0;
478
479         assert(filename);
480         assert(lvalue);
481         assert(rvalue);
482
483         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
484                 char *t;
485                 int r;
486                 unsigned cpu;
487
488                 if (!(t = strndup(w, l)))
489                         return log_oom();
490
491                 r = safe_atou(t, &cpu);
492                 free(t);
493
494                 if (!c)
495                         if (!(c = cpu_set_malloc(&ncpus)))
496                                 return log_oom();
497
498                 if (r < 0 || cpu >= ncpus) {
499                         log_syntax(unit, LOG_ERR, filename, line, -r,
500                                    "Failed to parse CPU affinity '%s'", rvalue);
501                         CPU_FREE(c);
502                         return -EBADMSG;
503                 }
504
505                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
506         }
507
508         if (c) {
509                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
510                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
511
512                 CPU_FREE(c);
513         }
514
515         return 0;
516 }
517
518 static void strv_free_free(char ***l) {
519         char ***i;
520
521         if (!l)
522                 return;
523
524         for (i = l; *i; i++)
525                 strv_free(*i);
526
527         free(l);
528 }
529
530 static void free_join_controllers(void) {
531         strv_free_free(arg_join_controllers);
532         arg_join_controllers = NULL;
533 }
534
535 static int config_parse_join_controllers(const char *unit,
536                                          const char *filename,
537                                          unsigned line,
538                                          const char *section,
539                                          unsigned section_line,
540                                          const char *lvalue,
541                                          int ltype,
542                                          const char *rvalue,
543                                          void *data,
544                                          void *userdata) {
545
546         unsigned n = 0;
547         char *state, *w;
548         size_t length;
549
550         assert(filename);
551         assert(lvalue);
552         assert(rvalue);
553
554         free_join_controllers();
555
556         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
557                 char *s, **l;
558
559                 s = strndup(w, length);
560                 if (!s)
561                         return log_oom();
562
563                 l = strv_split(s, ",");
564                 free(s);
565
566                 strv_uniq(l);
567
568                 if (strv_length(l) <= 1) {
569                         strv_free(l);
570                         continue;
571                 }
572
573                 if (!arg_join_controllers) {
574                         arg_join_controllers = new(char**, 2);
575                         if (!arg_join_controllers) {
576                                 strv_free(l);
577                                 return log_oom();
578                         }
579
580                         arg_join_controllers[0] = l;
581                         arg_join_controllers[1] = NULL;
582
583                         n = 1;
584                 } else {
585                         char ***a;
586                         char ***t;
587
588                         t = new0(char**, n+2);
589                         if (!t) {
590                                 strv_free(l);
591                                 return log_oom();
592                         }
593
594                         n = 0;
595
596                         for (a = arg_join_controllers; *a; a++) {
597
598                                 if (strv_overlap(*a, l)) {
599                                         if (strv_extend_strv(&l, *a) < 0) {
600                                                 strv_free(l);
601                                                 strv_free_free(t);
602                                                 return log_oom();
603                                         }
604
605                                 } else {
606                                         char **c;
607
608                                         c = strv_copy(*a);
609                                         if (!c) {
610                                                 strv_free(l);
611                                                 strv_free_free(t);
612                                                 return log_oom();
613                                         }
614
615                                         t[n++] = c;
616                                 }
617                         }
618
619                         t[n++] = strv_uniq(l);
620
621                         strv_free_free(arg_join_controllers);
622                         arg_join_controllers = t;
623                 }
624         }
625
626         return 0;
627 }
628
629 static int parse_config_file(void) {
630
631         const ConfigTableItem items[] = {
632                 { "Manager", "LogLevel",                  config_parse_level2,           0, NULL                                   },
633                 { "Manager", "LogTarget",                 config_parse_target,           0, NULL                                   },
634                 { "Manager", "LogColor",                  config_parse_color,            0, NULL                                   },
635                 { "Manager", "LogLocation",               config_parse_location,         0, NULL                                   },
636                 { "Manager", "DumpCore",                  config_parse_bool,             0, &arg_dump_core                         },
637                 { "Manager", "CrashShell",                config_parse_bool,             0, &arg_crash_shell                       },
638                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
639                 { "Manager", "CrashChVT",                 config_parse_int,              0, &arg_crash_chvt                        },
640                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, NULL                                   },
641                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
642                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
643                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
644                 { "Manager", "CapabilityBoundingSet",     config_parse_bounding_set,     0, &arg_capability_bounding_set_drop      },
645                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
646                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
647                 { "Manager", "DefaultStandardOutput",     config_parse_output,           0, &arg_default_std_output                },
648                 { "Manager", "DefaultStandardError",      config_parse_output,           0, &arg_default_std_error                 },
649                 { "Manager", "DefaultTimeoutStartSec",    config_parse_sec,              0, &arg_default_timeout_start_usec        },
650                 { "Manager", "DefaultTimeoutStopSec",     config_parse_sec,              0, &arg_default_timeout_stop_usec         },
651                 { "Manager", "DefaultRestartSec",         config_parse_sec,              0, &arg_default_restart_usec              },
652                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,              0, &arg_default_start_limit_interval      },
653                 { "Manager", "DefaultStartLimitBurst",    config_parse_unsigned,         0, &arg_default_start_limit_burst         },
654                 { "Manager", "DefaultEnvironment",        config_parse_environ,          0, &arg_default_environment               },
655                 { "Manager", "DefaultLimitCPU",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CPU]        },
656                 { "Manager", "DefaultLimitFSIZE",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_FSIZE]      },
657                 { "Manager", "DefaultLimitDATA",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_DATA]       },
658                 { "Manager", "DefaultLimitSTACK",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_STACK]      },
659                 { "Manager", "DefaultLimitCORE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CORE]       },
660                 { "Manager", "DefaultLimitRSS",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RSS]        },
661                 { "Manager", "DefaultLimitNOFILE",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NOFILE]     },
662                 { "Manager", "DefaultLimitAS",            config_parse_limit,            0, &arg_default_rlimit[RLIMIT_AS]         },
663                 { "Manager", "DefaultLimitNPROC",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NPROC]      },
664                 { "Manager", "DefaultLimitMEMLOCK",       config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MEMLOCK]    },
665                 { "Manager", "DefaultLimitLOCKS",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_LOCKS]      },
666                 { "Manager", "DefaultLimitSIGPENDING",    config_parse_limit,            0, &arg_default_rlimit[RLIMIT_SIGPENDING] },
667                 { "Manager", "DefaultLimitMSGQUEUE",      config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MSGQUEUE]   },
668                 { "Manager", "DefaultLimitNICE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NICE]       },
669                 { "Manager", "DefaultLimitRTPRIO",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTPRIO]     },
670                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTTIME]     },
671                 {}
672         };
673
674         _cleanup_fclose_ FILE *f;
675         const char *fn;
676         int r;
677
678         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
679         f = fopen(fn, "re");
680         if (!f) {
681                 if (errno == ENOENT)
682                         return 0;
683
684                 log_warning("Failed to open configuration file '%s': %m", fn);
685                 return 0;
686         }
687
688         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
689         if (r < 0)
690                 log_warning("Failed to parse configuration file: %s", strerror(-r));
691
692         return 0;
693 }
694
695 static int parse_proc_cmdline(void) {
696         _cleanup_free_ char *line = NULL;
697         char *w, *state;
698         size_t l;
699         int r;
700
701         r = proc_cmdline(&line);
702         if (r < 0)
703                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
704         if (r <= 0)
705                 return 0;
706
707         FOREACH_WORD_QUOTED(w, l, line, state) {
708                 _cleanup_free_ char *word;
709
710                 word = strndup(w, l);
711                 if (!word)
712                         return log_oom();
713
714                 r = parse_proc_cmdline_word(word);
715                 if (r < 0) {
716                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
717                         return r;
718                 }
719         }
720
721         return 0;
722 }
723
724 static int parse_argv(int argc, char *argv[]) {
725
726         enum {
727                 ARG_LOG_LEVEL = 0x100,
728                 ARG_LOG_TARGET,
729                 ARG_LOG_COLOR,
730                 ARG_LOG_LOCATION,
731                 ARG_UNIT,
732                 ARG_SYSTEM,
733                 ARG_USER,
734                 ARG_TEST,
735                 ARG_VERSION,
736                 ARG_DUMP_CONFIGURATION_ITEMS,
737                 ARG_DUMP_CORE,
738                 ARG_CRASH_SHELL,
739                 ARG_CONFIRM_SPAWN,
740                 ARG_SHOW_STATUS,
741                 ARG_DESERIALIZE,
742                 ARG_SWITCHED_ROOT,
743                 ARG_DEFAULT_STD_OUTPUT,
744                 ARG_DEFAULT_STD_ERROR
745         };
746
747         static const struct option options[] = {
748                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
749                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
750                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
751                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
752                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
753                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
754                 { "user",                     no_argument,       NULL, ARG_USER                     },
755                 { "test",                     no_argument,       NULL, ARG_TEST                     },
756                 { "help",                     no_argument,       NULL, 'h'                          },
757                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
758                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
759                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
760                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
761                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
762                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
763                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
764                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
765                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
766                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
767                 { NULL,                       0,                 NULL, 0                            }
768         };
769
770         int c, r;
771
772         assert(argc >= 1);
773         assert(argv);
774
775         if (getpid() == 1)
776                 opterr = 0;
777
778         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
779
780                 switch (c) {
781
782                 case ARG_LOG_LEVEL:
783                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
784                                 log_error("Failed to parse log level %s.", optarg);
785                                 return r;
786                         }
787
788                         break;
789
790                 case ARG_LOG_TARGET:
791
792                         if ((r = log_set_target_from_string(optarg)) < 0) {
793                                 log_error("Failed to parse log target %s.", optarg);
794                                 return r;
795                         }
796
797                         break;
798
799                 case ARG_LOG_COLOR:
800
801                         if (optarg) {
802                                 if ((r = log_show_color_from_string(optarg)) < 0) {
803                                         log_error("Failed to parse log color setting %s.", optarg);
804                                         return r;
805                                 }
806                         } else
807                                 log_show_color(true);
808
809                         break;
810
811                 case ARG_LOG_LOCATION:
812
813                         if (optarg) {
814                                 if ((r = log_show_location_from_string(optarg)) < 0) {
815                                         log_error("Failed to parse log location setting %s.", optarg);
816                                         return r;
817                                 }
818                         } else
819                                 log_show_location(true);
820
821                         break;
822
823                 case ARG_DEFAULT_STD_OUTPUT:
824
825                         if ((r = exec_output_from_string(optarg)) < 0) {
826                                 log_error("Failed to parse default standard output setting %s.", optarg);
827                                 return r;
828                         } else
829                                 arg_default_std_output = r;
830                         break;
831
832                 case ARG_DEFAULT_STD_ERROR:
833
834                         if ((r = exec_output_from_string(optarg)) < 0) {
835                                 log_error("Failed to parse default standard error output setting %s.", optarg);
836                                 return r;
837                         } else
838                                 arg_default_std_error = r;
839                         break;
840
841                 case ARG_UNIT:
842
843                         if ((r = set_default_unit(optarg)) < 0) {
844                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
845                                 return r;
846                         }
847
848                         break;
849
850                 case ARG_SYSTEM:
851                         arg_running_as = SYSTEMD_SYSTEM;
852                         break;
853
854                 case ARG_USER:
855                         arg_running_as = SYSTEMD_USER;
856                         break;
857
858                 case ARG_TEST:
859                         arg_action = ACTION_TEST;
860                         break;
861
862                 case ARG_VERSION:
863                         arg_action = ACTION_VERSION;
864                         break;
865
866                 case ARG_DUMP_CONFIGURATION_ITEMS:
867                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
868                         break;
869
870                 case ARG_DUMP_CORE:
871                         r = optarg ? parse_boolean(optarg) : 1;
872                         if (r < 0) {
873                                 log_error("Failed to parse dump core boolean %s.", optarg);
874                                 return r;
875                         }
876                         arg_dump_core = r;
877                         break;
878
879                 case ARG_CRASH_SHELL:
880                         r = optarg ? parse_boolean(optarg) : 1;
881                         if (r < 0) {
882                                 log_error("Failed to parse crash shell boolean %s.", optarg);
883                                 return r;
884                         }
885                         arg_crash_shell = r;
886                         break;
887
888                 case ARG_CONFIRM_SPAWN:
889                         r = optarg ? parse_boolean(optarg) : 1;
890                         if (r < 0) {
891                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
892                                 return r;
893                         }
894                         arg_confirm_spawn = r;
895                         break;
896
897                 case ARG_SHOW_STATUS:
898                         if (optarg) {
899                                 r = parse_show_status(optarg, &arg_show_status);
900                                 if (r < 0) {
901                                         log_error("Failed to parse show status boolean %s.", optarg);
902                                         return r;
903                                 }
904                         } else
905                                 arg_show_status = SHOW_STATUS_YES;
906                         break;
907
908                 case ARG_DESERIALIZE: {
909                         int fd;
910                         FILE *f;
911
912                         r = safe_atoi(optarg, &fd);
913                         if (r < 0 || fd < 0) {
914                                 log_error("Failed to parse deserialize option %s.", optarg);
915                                 return r < 0 ? r : -EINVAL;
916                         }
917
918                         fd_cloexec(fd, true);
919
920                         f = fdopen(fd, "r");
921                         if (!f) {
922                                 log_error("Failed to open serialization fd: %m");
923                                 return -errno;
924                         }
925
926                         if (arg_serialization)
927                                 fclose(arg_serialization);
928
929                         arg_serialization = f;
930
931                         break;
932                 }
933
934                 case ARG_SWITCHED_ROOT:
935                         arg_switched_root = true;
936                         break;
937
938                 case 'h':
939                         arg_action = ACTION_HELP;
940                         break;
941
942                 case 'D':
943                         log_set_max_level(LOG_DEBUG);
944                         break;
945
946                 case 'b':
947                 case 's':
948                 case 'z':
949                         /* Just to eat away the sysvinit kernel
950                          * cmdline args without getopt() error
951                          * messages that we'll parse in
952                          * parse_proc_cmdline_word() or ignore. */
953
954                 case '?':
955                 default:
956                         if (getpid() != 1) {
957                                 log_error("Unknown option code %c", c);
958                                 return -EINVAL;
959                         }
960
961                         break;
962                 }
963
964         if (optind < argc && getpid() != 1) {
965                 /* Hmm, when we aren't run as init system
966                  * let's complain about excess arguments */
967
968                 log_error("Excess arguments.");
969                 return -EINVAL;
970         }
971
972         if (detect_container(NULL) > 0) {
973                 char **a;
974
975                 /* All /proc/cmdline arguments the kernel didn't
976                  * understand it passed to us. We're not really
977                  * interested in that usually since /proc/cmdline is
978                  * more interesting and complete. With one exception:
979                  * if we are run in a container /proc/cmdline is not
980                  * relevant for the container, hence we rely on argv[]
981                  * instead. */
982
983                 for (a = argv; a < argv + argc; a++) {
984                         r = parse_proc_cmdline_word(*a);
985                         if (r < 0) {
986                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
987                                 return r;
988                         }
989                 }
990         }
991
992         return 0;
993 }
994
995 static int help(void) {
996
997         printf("%s [OPTIONS...]\n\n"
998                "Starts up and maintains the system or user services.\n\n"
999                "  -h --help                      Show this help\n"
1000                "     --test                      Determine startup sequence, dump it and exit\n"
1001                "     --dump-configuration-items  Dump understood unit configuration items\n"
1002                "     --unit=UNIT                 Set default unit\n"
1003                "     --system                    Run a system instance, even if PID != 1\n"
1004                "     --user                      Run a user instance\n"
1005                "     --dump-core[=0|1]           Dump core on crash\n"
1006                "     --crash-shell[=0|1]         Run shell on crash\n"
1007                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1008                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1009                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1010                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1011                "     --log-color[=0|1]           Highlight important log messages\n"
1012                "     --log-location[=0|1]        Include code location in log messages\n"
1013                "     --default-standard-output=  Set default standard output for services\n"
1014                "     --default-standard-error=   Set default standard error output for services\n",
1015                program_invocation_short_name);
1016
1017         return 0;
1018 }
1019
1020 static int version(void) {
1021         puts(PACKAGE_STRING);
1022         puts(SYSTEMD_FEATURES);
1023
1024         return 0;
1025 }
1026
1027 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1028         FILE *f = NULL;
1029         FDSet *fds = NULL;
1030         int r;
1031
1032         assert(m);
1033         assert(_f);
1034         assert(_fds);
1035
1036         r = manager_open_serialization(m, &f);
1037         if (r < 0) {
1038                 log_error("Failed to create serialization file: %s", strerror(-r));
1039                 goto fail;
1040         }
1041
1042         /* Make sure nothing is really destructed when we shut down */
1043         m->n_reloading ++;
1044         bus_manager_send_reloading(m, true);
1045
1046         fds = fdset_new();
1047         if (!fds) {
1048                 r = -ENOMEM;
1049                 log_error("Failed to allocate fd set: %s", strerror(-r));
1050                 goto fail;
1051         }
1052
1053         r = manager_serialize(m, f, fds, switching_root);
1054         if (r < 0) {
1055                 log_error("Failed to serialize state: %s", strerror(-r));
1056                 goto fail;
1057         }
1058
1059         if (fseeko(f, 0, SEEK_SET) < 0) {
1060                 log_error("Failed to rewind serialization fd: %m");
1061                 goto fail;
1062         }
1063
1064         r = fd_cloexec(fileno(f), false);
1065         if (r < 0) {
1066                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1067                 goto fail;
1068         }
1069
1070         r = fdset_cloexec(fds, false);
1071         if (r < 0) {
1072                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1073                 goto fail;
1074         }
1075
1076         *_f = f;
1077         *_fds = fds;
1078
1079         return 0;
1080
1081 fail:
1082         fdset_free(fds);
1083
1084         if (f)
1085                 fclose(f);
1086
1087         return r;
1088 }
1089
1090 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1091         struct rlimit nl;
1092         int r;
1093
1094         assert(saved_rlimit);
1095
1096         /* Save the original RLIMIT_NOFILE so that we can reset it
1097          * later when transitioning from the initrd to the main
1098          * systemd or suchlike. */
1099         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1100                 log_error("Reading RLIMIT_NOFILE failed: %m");
1101                 return -errno;
1102         }
1103
1104         /* Make sure forked processes get the default kernel setting */
1105         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1106                 struct rlimit *rl;
1107
1108                 rl = newdup(struct rlimit, saved_rlimit, 1);
1109                 if (!rl)
1110                         return log_oom();
1111
1112                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1113         }
1114
1115         /* Bump up the resource limit for ourselves substantially */
1116         nl.rlim_cur = nl.rlim_max = 64*1024;
1117         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1118         if (r < 0) {
1119                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1120                 return r;
1121         }
1122
1123         return 0;
1124 }
1125
1126 static void test_mtab(void) {
1127         char *p;
1128
1129         /* Check that /etc/mtab is a symlink */
1130
1131         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1132                 bool b;
1133
1134                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1135                 free(p);
1136
1137                 if (b)
1138                         return;
1139         }
1140
1141         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1142                     "This is not supported anymore. "
1143                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1144 }
1145
1146 static void test_usr(void) {
1147
1148         /* Check that /usr is not a separate fs */
1149
1150         if (dir_is_empty("/usr") <= 0)
1151                 return;
1152
1153         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1154                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1155                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1156 }
1157
1158 static void test_cgroups(void) {
1159
1160         if (access("/proc/cgroups", F_OK) >= 0)
1161                 return;
1162
1163         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1164                     "Systems without control groups are not supported. "
1165                     "We will now sleep for 10s, and then continue boot-up. "
1166                     "Expect breakage and please do not file bugs. "
1167                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1168                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1169
1170         sleep(10);
1171 }
1172
1173 static int initialize_join_controllers(void) {
1174         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1175          * + "net_prio". We'd like to add "cpuset" to the mix, but
1176          * "cpuset" does't really work for groups with no initialized
1177          * attributes. */
1178
1179         arg_join_controllers = new(char**, 3);
1180         if (!arg_join_controllers)
1181                 return -ENOMEM;
1182
1183         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1184         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1185         arg_join_controllers[2] = NULL;
1186
1187         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1188                 free_join_controllers();
1189                 return -ENOMEM;
1190         }
1191
1192         return 0;
1193 }
1194
1195 static int enforce_syscall_archs(Set *archs) {
1196 #ifdef HAVE_SECCOMP
1197         scmp_filter_ctx *seccomp;
1198         Iterator i;
1199         void *id;
1200         int r;
1201
1202         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1203         if (!seccomp)
1204                 return log_oom();
1205
1206         SET_FOREACH(id, arg_syscall_archs, i) {
1207                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1208                 if (r == -EEXIST)
1209                         continue;
1210                 if (r < 0) {
1211                         log_error("Failed to add architecture to seccomp: %s", strerror(-r));
1212                         goto finish;
1213                 }
1214         }
1215
1216         r = seccomp_load(seccomp);
1217         if (r < 0)
1218                 log_error("Failed to add install architecture seccomp: %s", strerror(-r));
1219
1220 finish:
1221         seccomp_release(seccomp);
1222         return r;
1223 #else
1224         return 0;
1225 #endif
1226 }
1227
1228 int main(int argc, char *argv[]) {
1229         Manager *m = NULL;
1230         int r, retval = EXIT_FAILURE;
1231         usec_t before_startup, after_startup;
1232         char timespan[FORMAT_TIMESPAN_MAX];
1233         FDSet *fds = NULL;
1234         bool reexecute = false;
1235         const char *shutdown_verb = NULL;
1236         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1237         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1238         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1239         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1240         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1241         static char systemd[] = "systemd";
1242         bool skip_setup = false;
1243         unsigned j;
1244         bool loaded_policy = false;
1245         bool arm_reboot_watchdog = false;
1246         bool queue_default_job = false;
1247         char *switch_root_dir = NULL, *switch_root_init = NULL;
1248         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1249
1250 #ifdef HAVE_SYSV_COMPAT
1251         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1252                 /* This is compatibility support for SysV, where
1253                  * calling init as a user is identical to telinit. */
1254
1255                 errno = -ENOENT;
1256                 execv(SYSTEMCTL_BINARY_PATH, argv);
1257                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1258                 return 1;
1259         }
1260 #endif
1261
1262         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1263         dual_timestamp_get(&userspace_timestamp);
1264
1265         /* Determine if this is a reexecution or normal bootup. We do
1266          * the full command line parsing much later, so let's just
1267          * have a quick peek here. */
1268         if (strv_find(argv+1, "--deserialize"))
1269                 skip_setup = true;
1270
1271         /* If we have switched root, do all the special setup
1272          * things */
1273         if (strv_find(argv+1, "--switched-root"))
1274                 skip_setup = false;
1275
1276         /* If we get started via the /sbin/init symlink then we are
1277            called 'init'. After a subsequent reexecution we are then
1278            called 'systemd'. That is confusing, hence let's call us
1279            systemd right-away. */
1280         program_invocation_short_name = systemd;
1281         prctl(PR_SET_NAME, systemd);
1282
1283         saved_argv = argv;
1284         saved_argc = argc;
1285
1286         log_show_color(isatty(STDERR_FILENO) > 0);
1287
1288         /* Disable the umask logic */
1289         if (getpid() == 1)
1290                 umask(0);
1291
1292         if (getpid() == 1 && detect_container(NULL) <= 0) {
1293
1294                 /* Running outside of a container as PID 1 */
1295                 arg_running_as = SYSTEMD_SYSTEM;
1296                 make_null_stdio();
1297                 log_set_target(LOG_TARGET_KMSG);
1298                 log_open();
1299
1300                 if (in_initrd())
1301                         initrd_timestamp = userspace_timestamp;
1302
1303                 if (!skip_setup) {
1304                         mount_setup_early();
1305                         dual_timestamp_get(&security_start_timestamp);
1306                         if (selinux_setup(&loaded_policy) < 0)
1307                                 goto finish;
1308                         if (ima_setup() < 0)
1309                                 goto finish;
1310                         if (smack_setup() < 0)
1311                                 goto finish;
1312                         dual_timestamp_get(&security_finish_timestamp);
1313                 }
1314
1315                 if (label_init(NULL) < 0)
1316                         goto finish;
1317
1318                 if (!skip_setup) {
1319                         if (hwclock_is_localtime() > 0) {
1320                                 int min;
1321
1322                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1323                                 r = hwclock_set_timezone(&min);
1324                                 if (r < 0)
1325                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1326                                 else
1327                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1328                         } else if (!in_initrd()) {
1329                                 /*
1330                                  * Do dummy first-time call to seal the kernel's time warp magic
1331                                  *
1332                                  * Do not call this this from inside the initrd. The initrd might not
1333                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1334                                  * that way. In such case, we need to delay the time-warp or the sealing
1335                                  * until we reach the real system.
1336                                  */
1337                                 hwclock_reset_timezone();
1338
1339                                 /* Tell the kernel our timezone */
1340                                 r = hwclock_set_timezone(NULL);
1341                                 if (r < 0)
1342                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1343                         }
1344                 }
1345
1346                 /* Set the default for later on, but don't actually
1347                  * open the logs like this for now. Note that if we
1348                  * are transitioning from the initrd there might still
1349                  * be journal fd open, and we shouldn't attempt
1350                  * opening that before we parsed /proc/cmdline which
1351                  * might redirect output elsewhere. */
1352                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1353
1354         } else if (getpid() == 1) {
1355                 /* Running inside a container, as PID 1 */
1356                 arg_running_as = SYSTEMD_SYSTEM;
1357                 log_set_target(LOG_TARGET_CONSOLE);
1358                 log_close_console(); /* force reopen of /dev/console */
1359                 log_open();
1360
1361                 /* For the later on, see above... */
1362                 log_set_target(LOG_TARGET_JOURNAL);
1363
1364                 /* clear the kernel timestamp,
1365                  * because we are in a container */
1366                 kernel_timestamp.monotonic = 0ULL;
1367                 kernel_timestamp.realtime = 0ULL;
1368
1369         } else {
1370                 /* Running as user instance */
1371                 arg_running_as = SYSTEMD_USER;
1372                 log_set_target(LOG_TARGET_AUTO);
1373                 log_open();
1374
1375                 /* clear the kernel timestamp,
1376                  * because we are not PID 1 */
1377                 kernel_timestamp.monotonic = 0ULL;
1378                 kernel_timestamp.realtime = 0ULL;
1379         }
1380
1381         /* Initialize default unit */
1382         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1383         if (r < 0) {
1384                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1385                 goto finish;
1386         }
1387
1388         r = initialize_join_controllers();
1389         if (r < 0)
1390                 goto finish;
1391
1392         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1393          * /proc/$PID/fd is available. */
1394         if (getpid() == 1) {
1395                 r = mount_setup(loaded_policy);
1396                 if (r < 0)
1397                         goto finish;
1398         }
1399
1400         /* Reset all signal handlers. */
1401         assert_se(reset_all_signal_handlers() == 0);
1402
1403         ignore_signals(SIGNALS_IGNORE, -1);
1404
1405         if (parse_config_file() < 0)
1406                 goto finish;
1407
1408         if (arg_running_as == SYSTEMD_SYSTEM)
1409                 if (parse_proc_cmdline() < 0)
1410                         goto finish;
1411
1412         log_parse_environment();
1413
1414         if (parse_argv(argc, argv) < 0)
1415                 goto finish;
1416
1417         if (arg_action == ACTION_TEST &&
1418             geteuid() == 0) {
1419                 log_error("Don't run test mode as root.");
1420                 goto finish;
1421         }
1422
1423         if (arg_running_as == SYSTEMD_USER &&
1424             arg_action == ACTION_RUN &&
1425             sd_booted() <= 0) {
1426                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1427                 goto finish;
1428         }
1429
1430         if (arg_running_as == SYSTEMD_SYSTEM &&
1431             arg_action == ACTION_RUN &&
1432             running_in_chroot() > 0) {
1433                 log_error("Cannot be run in a chroot() environment.");
1434                 goto finish;
1435         }
1436
1437         if (arg_action == ACTION_HELP) {
1438                 retval = help();
1439                 goto finish;
1440         } else if (arg_action == ACTION_VERSION) {
1441                 retval = version();
1442                 goto finish;
1443         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1444                 unit_dump_config_items(stdout);
1445                 retval = EXIT_SUCCESS;
1446                 goto finish;
1447         } else if (arg_action == ACTION_DONE) {
1448                 retval = EXIT_SUCCESS;
1449                 goto finish;
1450         }
1451
1452         if (arg_running_as == SYSTEMD_USER &&
1453             !getenv("XDG_RUNTIME_DIR")) {
1454                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1455                 goto finish;
1456         }
1457
1458         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1459
1460         /* Close logging fds, in order not to confuse fdset below */
1461         log_close();
1462
1463         /* Remember open file descriptors for later deserialization */
1464         r = fdset_new_fill(&fds);
1465         if (r < 0) {
1466                 log_error("Failed to allocate fd set: %s", strerror(-r));
1467                 goto finish;
1468         } else
1469                 fdset_cloexec(fds, true);
1470
1471         if (arg_serialization)
1472                 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
1473
1474         if (arg_running_as == SYSTEMD_SYSTEM)
1475                 /* Become a session leader if we aren't one yet. */
1476                 setsid();
1477
1478         /* Move out of the way, so that we won't block unmounts */
1479         assert_se(chdir("/")  == 0);
1480
1481         /* Reset the console, but only if this is really init and we
1482          * are freshly booted */
1483         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1484                 console_setup(getpid() == 1 && !skip_setup);
1485
1486         /* Open the logging devices, if possible and necessary */
1487         log_open();
1488
1489         /* Make sure we leave a core dump without panicing the
1490          * kernel. */
1491         if (getpid() == 1) {
1492                 install_crash_handler();
1493
1494                 r = mount_cgroup_controllers(arg_join_controllers);
1495                 if (r < 0)
1496                         goto finish;
1497         }
1498
1499         if (arg_running_as == SYSTEMD_SYSTEM) {
1500                 const char *virtualization = NULL;
1501
1502                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1503
1504                 detect_virtualization(&virtualization);
1505                 if (virtualization)
1506                         log_info("Detected virtualization '%s'.", virtualization);
1507
1508                 if (in_initrd())
1509                         log_info("Running in initial RAM disk.");
1510
1511         } else {
1512                 _cleanup_free_ char *t = uid_to_name(getuid());
1513                 log_debug(PACKAGE_STRING " running in user mode for user "PID_FMT"/%s. (" SYSTEMD_FEATURES ")",
1514                           getuid(), t);
1515         }
1516
1517         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1518                 if (arg_show_status > 0 || plymouth_running())
1519                         status_welcome();
1520
1521 #ifdef HAVE_KMOD
1522                 if (detect_container(NULL) <= 0)
1523                         kmod_setup();
1524 #endif
1525                 hostname_setup();
1526                 machine_id_setup();
1527                 loopback_setup();
1528
1529                 test_mtab();
1530                 test_usr();
1531                 test_cgroups();
1532         }
1533
1534         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1535                 watchdog_set_timeout(&arg_runtime_watchdog);
1536
1537         if (arg_timer_slack_nsec != (nsec_t) -1)
1538                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1539                         log_error("Failed to adjust timer slack: %m");
1540
1541         if (arg_capability_bounding_set_drop) {
1542                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1543                 if (r < 0) {
1544                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1545                         goto finish;
1546                 }
1547                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1548                 if (r < 0) {
1549                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1550                         goto finish;
1551                 }
1552         }
1553
1554         if (arg_syscall_archs) {
1555                 r = enforce_syscall_archs(arg_syscall_archs);
1556                 if (r < 0)
1557                         goto finish;
1558         }
1559
1560         if (arg_running_as == SYSTEMD_USER) {
1561                 /* Become reaper of our children */
1562                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1563                         log_warning("Failed to make us a subreaper: %m");
1564                         if (errno == EINVAL)
1565                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1566                 }
1567         }
1568
1569         if (arg_running_as == SYSTEMD_SYSTEM)
1570                 bump_rlimit_nofile(&saved_rlimit_nofile);
1571
1572         r = manager_new(arg_running_as, &m);
1573         if (r < 0) {
1574                 log_error("Failed to allocate manager object: %s", strerror(-r));
1575                 goto finish;
1576         }
1577
1578         m->confirm_spawn = arg_confirm_spawn;
1579         m->default_std_output = arg_default_std_output;
1580         m->default_std_error = arg_default_std_error;
1581         m->default_restart_usec = arg_default_restart_usec;
1582         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1583         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1584         m->default_start_limit_interval = arg_default_start_limit_interval;
1585         m->default_start_limit_burst = arg_default_start_limit_burst;
1586         m->runtime_watchdog = arg_runtime_watchdog;
1587         m->shutdown_watchdog = arg_shutdown_watchdog;
1588         m->userspace_timestamp = userspace_timestamp;
1589         m->kernel_timestamp = kernel_timestamp;
1590         m->initrd_timestamp = initrd_timestamp;
1591         m->security_start_timestamp = security_start_timestamp;
1592         m->security_finish_timestamp = security_finish_timestamp;
1593
1594         manager_set_default_rlimits(m, arg_default_rlimit);
1595
1596         if (arg_default_environment)
1597                 manager_environment_add(m, NULL, arg_default_environment);
1598
1599         if (arg_show_status == SHOW_STATUS_UNSET)
1600                 arg_show_status = SHOW_STATUS_YES;
1601         manager_set_show_status(m, arg_show_status);
1602
1603         /* Remember whether we should queue the default job */
1604         queue_default_job = !arg_serialization || arg_switched_root;
1605
1606         before_startup = now(CLOCK_MONOTONIC);
1607
1608         r = manager_startup(m, arg_serialization, fds);
1609         if (r < 0)
1610                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1611
1612         /* This will close all file descriptors that were opened, but
1613          * not claimed by any unit. */
1614         fdset_free(fds);
1615         fds = NULL;
1616
1617         if (arg_serialization) {
1618                 fclose(arg_serialization);
1619                 arg_serialization = NULL;
1620         }
1621
1622         if (queue_default_job) {
1623                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1624                 Unit *target = NULL;
1625                 Job *default_unit_job;
1626
1627                 log_debug("Activating default unit: %s", arg_default_unit);
1628
1629                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1630                 if (r < 0)
1631                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1632                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1633                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1634                 else if (target->load_state == UNIT_MASKED)
1635                         log_error("Default target masked.");
1636
1637                 if (!target || target->load_state != UNIT_LOADED) {
1638                         log_info("Trying to load rescue target...");
1639
1640                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1641                         if (r < 0) {
1642                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1643                                 goto finish;
1644                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1645                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1646                                 goto finish;
1647                         } else if (target->load_state == UNIT_MASKED) {
1648                                 log_error("Rescue target masked.");
1649                                 goto finish;
1650                         }
1651                 }
1652
1653                 assert(target->load_state == UNIT_LOADED);
1654
1655                 if (arg_action == ACTION_TEST) {
1656                         printf("-> By units:\n");
1657                         manager_dump_units(m, stdout, "\t");
1658                 }
1659
1660                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1661                 if (r == -EPERM) {
1662                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1663
1664                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1665                         if (r < 0) {
1666                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1667                                 goto finish;
1668                         }
1669                 } else if (r < 0) {
1670                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1671                         goto finish;
1672                 }
1673
1674                 m->default_unit_job_id = default_unit_job->id;
1675
1676                 after_startup = now(CLOCK_MONOTONIC);
1677                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1678                          "Loaded units and determined initial transaction in %s.",
1679                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1680
1681                 if (arg_action == ACTION_TEST) {
1682                         printf("-> By jobs:\n");
1683                         manager_dump_jobs(m, stdout, "\t");
1684                         retval = EXIT_SUCCESS;
1685                         goto finish;
1686                 }
1687         }
1688
1689         for (;;) {
1690                 r = manager_loop(m);
1691                 if (r < 0) {
1692                         log_error("Failed to run mainloop: %s", strerror(-r));
1693                         goto finish;
1694                 }
1695
1696                 switch (m->exit_code) {
1697
1698                 case MANAGER_EXIT:
1699                         retval = EXIT_SUCCESS;
1700                         log_debug("Exit.");
1701                         goto finish;
1702
1703                 case MANAGER_RELOAD:
1704                         log_info("Reloading.");
1705                         r = manager_reload(m);
1706                         if (r < 0)
1707                                 log_error("Failed to reload: %s", strerror(-r));
1708                         break;
1709
1710                 case MANAGER_REEXECUTE:
1711
1712                         if (prepare_reexecute(m, &arg_serialization, &fds, false) < 0)
1713                                 goto finish;
1714
1715                         reexecute = true;
1716                         log_notice("Reexecuting.");
1717                         goto finish;
1718
1719                 case MANAGER_SWITCH_ROOT:
1720                         /* Steal the switch root parameters */
1721                         switch_root_dir = m->switch_root;
1722                         switch_root_init = m->switch_root_init;
1723                         m->switch_root = m->switch_root_init = NULL;
1724
1725                         if (!switch_root_init)
1726                                 if (prepare_reexecute(m, &arg_serialization, &fds, true) < 0)
1727                                         goto finish;
1728
1729                         reexecute = true;
1730                         log_notice("Switching root.");
1731                         goto finish;
1732
1733                 case MANAGER_REBOOT:
1734                 case MANAGER_POWEROFF:
1735                 case MANAGER_HALT:
1736                 case MANAGER_KEXEC: {
1737                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1738                                 [MANAGER_REBOOT] = "reboot",
1739                                 [MANAGER_POWEROFF] = "poweroff",
1740                                 [MANAGER_HALT] = "halt",
1741                                 [MANAGER_KEXEC] = "kexec"
1742                         };
1743
1744                         assert_se(shutdown_verb = table[m->exit_code]);
1745                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1746
1747                         log_notice("Shutting down.");
1748                         goto finish;
1749                 }
1750
1751                 default:
1752                         assert_not_reached("Unknown exit code.");
1753                 }
1754         }
1755
1756 finish:
1757         if (m) {
1758                 manager_free(m);
1759                 m = NULL;
1760         }
1761
1762         for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++) {
1763                 free(arg_default_rlimit[j]);
1764                 arg_default_rlimit[j] = NULL;
1765         }
1766
1767         free(arg_default_unit);
1768         arg_default_unit = NULL;
1769
1770         free_join_controllers();
1771
1772         strv_free(arg_default_environment);
1773         arg_default_environment = NULL;
1774
1775         set_free(arg_syscall_archs);
1776         arg_syscall_archs = NULL;
1777
1778         label_finish();
1779
1780         if (reexecute) {
1781                 const char **args;
1782                 unsigned i, args_size;
1783
1784                 /* Close and disarm the watchdog, so that the new
1785                  * instance can reinitialize it, but doesn't get
1786                  * rebooted while we do that */
1787                 watchdog_close(true);
1788
1789                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1790                  * that the new systemd can pass the kernel default to
1791                  * its child processes */
1792                 if (saved_rlimit_nofile.rlim_cur > 0)
1793                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1794
1795                 if (switch_root_dir) {
1796                         /* Kill all remaining processes from the
1797                          * initrd, but don't wait for them, so that we
1798                          * can handle the SIGCHLD for them after
1799                          * deserializing. */
1800                         broadcast_signal(SIGTERM, false, true);
1801
1802                         /* And switch root */
1803                         r = switch_root(switch_root_dir);
1804                         if (r < 0)
1805                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1806                 }
1807
1808                 args_size = MAX(6, argc+1);
1809                 args = newa(const char*, args_size);
1810
1811                 if (!switch_root_init) {
1812                         char sfd[16];
1813
1814                         /* First try to spawn ourselves with the right
1815                          * path, and with full serialization. We do
1816                          * this only if the user didn't specify an
1817                          * explicit init to spawn. */
1818
1819                         assert(arg_serialization);
1820                         assert(fds);
1821
1822                         snprintf(sfd, sizeof(sfd), "%i", fileno(arg_serialization));
1823                         char_array_0(sfd);
1824
1825                         i = 0;
1826                         args[i++] = SYSTEMD_BINARY_PATH;
1827                         if (switch_root_dir)
1828                                 args[i++] = "--switched-root";
1829                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1830                         args[i++] = "--deserialize";
1831                         args[i++] = sfd;
1832                         args[i++] = NULL;
1833
1834                         /* do not pass along the environment we inherit from the kernel or initrd */
1835                         if (switch_root_dir)
1836                                 clearenv();
1837
1838                         assert(i <= args_size);
1839                         execv(args[0], (char* const*) args);
1840                 }
1841
1842                 /* Try the fallback, if there is any, without any
1843                  * serialization. We pass the original argv[] and
1844                  * envp[]. (Well, modulo the ordering changes due to
1845                  * getopt() in argv[], and some cleanups in envp[],
1846                  * but let's hope that doesn't matter.) */
1847
1848                 if (arg_serialization) {
1849                         fclose(arg_serialization);
1850                         arg_serialization = NULL;
1851                 }
1852
1853                 if (fds) {
1854                         fdset_free(fds);
1855                         fds = NULL;
1856                 }
1857
1858                 /* Reopen the console */
1859                 make_console_stdio();
1860
1861                 for (j = 1, i = 1; j < (unsigned) argc; j++)
1862                         args[i++] = argv[j];
1863                 args[i++] = NULL;
1864                 assert(i <= args_size);
1865
1866                 if (switch_root_init) {
1867                         args[0] = switch_root_init;
1868                         execv(args[0], (char* const*) args);
1869                         log_warning("Failed to execute configured init, trying fallback: %m");
1870                 }
1871
1872                 args[0] = "/sbin/init";
1873                 execv(args[0], (char* const*) args);
1874
1875                 if (errno == ENOENT) {
1876                         log_warning("No /sbin/init, trying fallback");
1877
1878                         args[0] = "/bin/sh";
1879                         args[1] = NULL;
1880                         execv(args[0], (char* const*) args);
1881                         log_error("Failed to execute /bin/sh, giving up: %m");
1882                 } else
1883                         log_warning("Failed to execute /sbin/init, giving up: %m");
1884         }
1885
1886         if (arg_serialization) {
1887                 fclose(arg_serialization);
1888                 arg_serialization = NULL;
1889         }
1890
1891         if (fds) {
1892                 fdset_free(fds);
1893                 fds = NULL;
1894         }
1895
1896 #ifdef HAVE_VALGRIND_VALGRIND_H
1897         /* If we are PID 1 and running under valgrind, then let's exit
1898          * here explicitly. valgrind will only generate nice output on
1899          * exit(), not on exec(), hence let's do the former not the
1900          * latter here. */
1901         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1902                 return 0;
1903 #endif
1904
1905         if (shutdown_verb) {
1906                 const char * command_line[] = {
1907                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1908                         shutdown_verb,
1909                         NULL
1910                 };
1911                 _cleanup_strv_free_ char **env_block = NULL;
1912                 env_block = strv_copy(environ);
1913
1914                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1915                         char *e;
1916
1917                         /* If we reboot let's set the shutdown
1918                          * watchdog and tell the shutdown binary to
1919                          * repeatedly ping it */
1920                         watchdog_set_timeout(&arg_shutdown_watchdog);
1921                         watchdog_close(false);
1922
1923                         /* Tell the binary how often to ping, ignore failure */
1924                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1925                                 strv_push(&env_block, e);
1926                 } else
1927                         watchdog_close(true);
1928
1929                 /* Avoid the creation of new processes forked by the
1930                  * kernel; at this point, we will not listen to the
1931                  * signals anyway */
1932                 if (detect_container(NULL) <= 0)
1933                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1934
1935                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1936                 log_error("Failed to execute shutdown binary, freezing: %m");
1937         }
1938
1939         if (getpid() == 1)
1940                 freeze();
1941
1942         return retval;
1943 }