chiark / gitweb /
e2e1399564afc8c0c01769a82644c33399dfcdd8
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38 #ifdef HAVE_SECCOMP
39 #include <seccomp.h>
40 #endif
41
42 #include "sd-daemon.h"
43 #include "sd-messages.h"
44 #include "sd-bus.h"
45 #include "log.h"
46 #include "fdset.h"
47 #include "special.h"
48 #include "conf-parser.h"
49 #include "missing.h"
50 #include "label.h"
51 #include "pager.h"
52 #include "build.h"
53 #include "strv.h"
54 #include "def.h"
55 #include "virt.h"
56 #include "architecture.h"
57 #include "watchdog.h"
58 #include "path-util.h"
59 #include "switch-root.h"
60 #include "capability.h"
61 #include "killall.h"
62 #include "env-util.h"
63 #include "clock-util.h"
64 #include "fileio.h"
65 #include "bus-error.h"
66 #include "bus-util.h"
67 #include "selinux-util.h"
68 #include "manager.h"
69 #include "dbus-manager.h"
70 #include "load-fragment.h"
71
72 #include "mount-setup.h"
73 #include "loopback-setup.h"
74 #include "hostname-setup.h"
75 #include "machine-id-setup.h"
76 #include "selinux-setup.h"
77 #include "ima-setup.h"
78 #include "smack-setup.h"
79 #include "kmod-setup.h"
80
81 static enum {
82         ACTION_RUN,
83         ACTION_HELP,
84         ACTION_VERSION,
85         ACTION_TEST,
86         ACTION_DUMP_CONFIGURATION_ITEMS,
87         ACTION_DONE
88 } arg_action = ACTION_RUN;
89 static char *arg_default_unit = NULL;
90 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
91 static bool arg_dump_core = true;
92 static bool arg_crash_shell = false;
93 static int arg_crash_chvt = -1;
94 static bool arg_confirm_spawn = false;
95 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
96 static bool arg_switched_root = false;
97 static int arg_no_pager = -1;
98 static char ***arg_join_controllers = NULL;
99 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
100 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
101 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
102 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
103 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
104 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
105 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
106 static usec_t arg_runtime_watchdog = 0;
107 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
108 static char **arg_default_environment = NULL;
109 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
110 static uint64_t arg_capability_bounding_set_drop = 0;
111 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
112 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
113 static Set* arg_syscall_archs = NULL;
114 static FILE* arg_serialization = NULL;
115 static bool arg_default_cpu_accounting = false;
116 static bool arg_default_blockio_accounting = false;
117 static bool arg_default_memory_accounting = false;
118
119 static void nop_handler(int sig) {}
120
121 static void pager_open_if_enabled(void) {
122
123         if (arg_no_pager <= 0)
124                 return;
125
126         pager_open(false);
127 }
128
129 noreturn static void crash(int sig) {
130
131         if (getpid() != 1)
132                 /* Pass this on immediately, if this is not PID 1 */
133                 raise(sig);
134         else if (!arg_dump_core)
135                 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
136         else {
137                 struct sigaction sa = {
138                         .sa_handler = nop_handler,
139                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
140                 };
141                 pid_t pid;
142
143                 /* We want to wait for the core process, hence let's enable SIGCHLD */
144                 sigaction(SIGCHLD, &sa, NULL);
145
146                 pid = raw_clone(SIGCHLD, NULL);
147                 if (pid < 0)
148                         log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
149
150                 else if (pid == 0) {
151                         struct rlimit rl = {};
152
153                         /* Enable default signal handler for core dump */
154                         zero(sa);
155                         sa.sa_handler = SIG_DFL;
156                         sigaction(sig, &sa, NULL);
157
158                         /* Don't limit the core dump size */
159                         rl.rlim_cur = RLIM_INFINITY;
160                         rl.rlim_max = RLIM_INFINITY;
161                         setrlimit(RLIMIT_CORE, &rl);
162
163                         /* Just to be sure... */
164                         chdir("/");
165
166                         /* Raise the signal again */
167                         pid = raw_getpid();
168                         kill(pid, sig); /* raise() would kill the parent */
169
170                         assert_not_reached("We shouldn't be here...");
171                         _exit(1);
172                 } else {
173                         siginfo_t status;
174                         int r;
175
176                         /* Order things nicely. */
177                         r = wait_for_terminate(pid, &status);
178                         if (r < 0)
179                                 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
180                         else if (status.si_code != CLD_DUMPED)
181                                 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
182                                               signal_to_string(sig),
183                                               pid, sigchld_code_to_string(status.si_code),
184                                               status.si_status,
185                                               strna(status.si_code == CLD_EXITED
186                                                     ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
187                                                     : signal_to_string(status.si_status)));
188                         else
189                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
190                 }
191         }
192
193         if (arg_crash_chvt)
194                 chvt(arg_crash_chvt);
195
196         if (arg_crash_shell) {
197                 struct sigaction sa = {
198                         .sa_handler = SIG_IGN,
199                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
200                 };
201                 pid_t pid;
202
203                 log_info("Executing crash shell in 10s...");
204                 sleep(10);
205
206                 /* Let the kernel reap children for us */
207                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
208
209                 pid = raw_clone(SIGCHLD, NULL);
210                 if (pid < 0)
211                         log_emergency_errno(errno, "Failed to fork off crash shell: %m");
212                 else if (pid == 0) {
213                         make_console_stdio();
214                         execle("/bin/sh", "/bin/sh", NULL, environ);
215
216                         log_emergency_errno(errno, "execle() failed: %m");
217                         _exit(1);
218                 } else
219                         log_info("Successfully spawned crash shell as PID "PID_FMT".", pid);
220         }
221
222         log_emergency("Freezing execution.");
223         freeze();
224 }
225
226 static void install_crash_handler(void) {
227         struct sigaction sa = {
228                 .sa_handler = crash,
229                 .sa_flags = SA_NODEFER,
230         };
231
232         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
233 }
234
235 static int console_setup(void) {
236         _cleanup_close_ int tty_fd = -1;
237         int r;
238
239         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
240         if (tty_fd < 0)
241                 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
242
243         /* We don't want to force text mode.  plymouth may be showing
244          * pictures already from initrd. */
245         r = reset_terminal_fd(tty_fd, false);
246         if (r < 0)
247                 return log_error_errno(r, "Failed to reset /dev/console: %m");
248
249         return 0;
250 }
251
252 static int set_default_unit(const char *u) {
253         char *c;
254
255         assert(u);
256
257         c = strdup(u);
258         if (!c)
259                 return -ENOMEM;
260
261         free(arg_default_unit);
262         arg_default_unit = c;
263
264         return 0;
265 }
266
267 static int parse_proc_cmdline_item(const char *key, const char *value) {
268
269         static const char * const rlmap[] = {
270                 "emergency", SPECIAL_EMERGENCY_TARGET,
271                 "-b",        SPECIAL_EMERGENCY_TARGET,
272                 "rescue",    SPECIAL_RESCUE_TARGET,
273                 "single",    SPECIAL_RESCUE_TARGET,
274                 "-s",        SPECIAL_RESCUE_TARGET,
275                 "s",         SPECIAL_RESCUE_TARGET,
276                 "S",         SPECIAL_RESCUE_TARGET,
277                 "1",         SPECIAL_RESCUE_TARGET,
278                 "2",         SPECIAL_RUNLEVEL2_TARGET,
279                 "3",         SPECIAL_RUNLEVEL3_TARGET,
280                 "4",         SPECIAL_RUNLEVEL4_TARGET,
281                 "5",         SPECIAL_RUNLEVEL5_TARGET,
282         };
283         int r;
284
285         assert(key);
286
287         if (streq(key, "systemd.unit") && value) {
288
289                 if (!in_initrd())
290                         return set_default_unit(value);
291
292         } else if (streq(key, "rd.systemd.unit") && value) {
293
294                 if (in_initrd())
295                         return set_default_unit(value);
296
297         } else if (streq(key, "systemd.dump_core") && value) {
298
299                 r = parse_boolean(value);
300                 if (r < 0)
301                         log_warning("Failed to parse dump core switch %s. Ignoring.", value);
302                 else
303                         arg_dump_core = r;
304
305         } else if (streq(key, "systemd.crash_shell") && value) {
306
307                 r = parse_boolean(value);
308                 if (r < 0)
309                         log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
310                 else
311                         arg_crash_shell = r;
312
313         } else if (streq(key, "systemd.crash_chvt") && value) {
314
315                 if (safe_atoi(value, &r) < 0)
316                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
317                 else
318                         arg_crash_chvt = r;
319
320         } else if (streq(key, "systemd.confirm_spawn") && value) {
321
322                 r = parse_boolean(value);
323                 if (r < 0)
324                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", value);
325                 else
326                         arg_confirm_spawn = r;
327
328         } else if (streq(key, "systemd.show_status") && value) {
329
330                 r = parse_show_status(value, &arg_show_status);
331                 if (r < 0)
332                         log_warning("Failed to parse show status switch %s. Ignoring.", value);
333
334         } else if (streq(key, "systemd.default_standard_output") && value) {
335
336                 r = exec_output_from_string(value);
337                 if (r < 0)
338                         log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
339                 else
340                         arg_default_std_output = r;
341
342         } else if (streq(key, "systemd.default_standard_error") && value) {
343
344                 r = exec_output_from_string(value);
345                 if (r < 0)
346                         log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
347                 else
348                         arg_default_std_error = r;
349
350         } else if (streq(key, "systemd.setenv") && value) {
351
352                 if (env_assignment_is_valid(value)) {
353                         char **env;
354
355                         env = strv_env_set(arg_default_environment, value);
356                         if (env)
357                                 arg_default_environment = env;
358                         else
359                                 log_warning_errno(ENOMEM, "Setting environment variable '%s' failed, ignoring: %m", value);
360                 } else
361                         log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
362
363         } else if (streq(key, "quiet") && !value) {
364
365                 log_set_max_level(LOG_NOTICE);
366
367                 if (arg_show_status == _SHOW_STATUS_UNSET)
368                         arg_show_status = SHOW_STATUS_AUTO;
369
370         } else if (streq(key, "debug") && !value) {
371
372                 /* Note that log_parse_environment() handles 'debug'
373                  * too, and sets the log level to LOG_DEBUG. */
374
375                 if (detect_container(NULL) > 0)
376                         log_set_target(LOG_TARGET_CONSOLE);
377
378         } else if (!in_initrd() && !value) {
379                 unsigned i;
380
381                 /* SysV compatibility */
382                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
383                         if (streq(key, rlmap[i]))
384                                 return set_default_unit(rlmap[i+1]);
385         }
386
387         return 0;
388 }
389
390 #define DEFINE_SETTER(name, func, descr)                              \
391         static int name(const char *unit,                             \
392                         const char *filename,                         \
393                         unsigned line,                                \
394                         const char *section,                          \
395                         unsigned section_line,                        \
396                         const char *lvalue,                           \
397                         int ltype,                                    \
398                         const char *rvalue,                           \
399                         void *data,                                   \
400                         void *userdata) {                             \
401                                                                       \
402                 int r;                                                \
403                                                                       \
404                 assert(filename);                                     \
405                 assert(lvalue);                                       \
406                 assert(rvalue);                                       \
407                                                                       \
408                 r = func(rvalue);                                     \
409                 if (r < 0)                                            \
410                         log_syntax(unit, LOG_ERR, filename, line, -r, \
411                                    "Invalid " descr "'%s': %s",       \
412                                    rvalue, strerror(-r));             \
413                                                                       \
414                 return 0;                                             \
415         }
416
417 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
418 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
419 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
420 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
421
422 static int config_parse_cpu_affinity2(
423                 const char *unit,
424                 const char *filename,
425                 unsigned line,
426                 const char *section,
427                 unsigned section_line,
428                 const char *lvalue,
429                 int ltype,
430                 const char *rvalue,
431                 void *data,
432                 void *userdata) {
433
434         const char *word, *state;
435         size_t l;
436         cpu_set_t *c = NULL;
437         unsigned ncpus = 0;
438
439         assert(filename);
440         assert(lvalue);
441         assert(rvalue);
442
443         FOREACH_WORD_QUOTED(word, l, rvalue, state) {
444                 char *t;
445                 int r;
446                 unsigned cpu;
447
448                 if (!(t = strndup(word, l)))
449                         return log_oom();
450
451                 r = safe_atou(t, &cpu);
452                 free(t);
453
454                 if (!c)
455                         if (!(c = cpu_set_malloc(&ncpus)))
456                                 return log_oom();
457
458                 if (r < 0 || cpu >= ncpus) {
459                         log_syntax(unit, LOG_ERR, filename, line, -r,
460                                    "Failed to parse CPU affinity '%s'", rvalue);
461                         CPU_FREE(c);
462                         return -EBADMSG;
463                 }
464
465                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
466         }
467         if (!isempty(state))
468                 log_syntax(unit, LOG_ERR, filename, line, EINVAL,
469                            "Trailing garbage, ignoring.");
470
471         if (c) {
472                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
473                         log_unit_warning(unit, "Failed to set CPU affinity: %m");
474
475                 CPU_FREE(c);
476         }
477
478         return 0;
479 }
480
481 static int config_parse_show_status(
482                 const char* unit,
483                 const char *filename,
484                 unsigned line,
485                 const char *section,
486                 unsigned section_line,
487                 const char *lvalue,
488                 int ltype,
489                 const char *rvalue,
490                 void *data,
491                 void *userdata) {
492
493         int k;
494         ShowStatus *b = data;
495
496         assert(filename);
497         assert(lvalue);
498         assert(rvalue);
499         assert(data);
500
501         k = parse_show_status(rvalue, b);
502         if (k < 0) {
503                 log_syntax(unit, LOG_ERR, filename, line, -k,
504                            "Failed to parse show status setting, ignoring: %s", rvalue);
505                 return 0;
506         }
507
508         return 0;
509 }
510
511 static void strv_free_free(char ***l) {
512         char ***i;
513
514         if (!l)
515                 return;
516
517         for (i = l; *i; i++)
518                 strv_free(*i);
519
520         free(l);
521 }
522
523 static void free_join_controllers(void) {
524         strv_free_free(arg_join_controllers);
525         arg_join_controllers = NULL;
526 }
527
528 static int config_parse_join_controllers(const char *unit,
529                                          const char *filename,
530                                          unsigned line,
531                                          const char *section,
532                                          unsigned section_line,
533                                          const char *lvalue,
534                                          int ltype,
535                                          const char *rvalue,
536                                          void *data,
537                                          void *userdata) {
538
539         unsigned n = 0;
540         const char *word, *state;
541         size_t length;
542
543         assert(filename);
544         assert(lvalue);
545         assert(rvalue);
546
547         free_join_controllers();
548
549         FOREACH_WORD_QUOTED(word, length, rvalue, state) {
550                 char *s, **l;
551
552                 s = strndup(word, length);
553                 if (!s)
554                         return log_oom();
555
556                 l = strv_split(s, ",");
557                 free(s);
558
559                 strv_uniq(l);
560
561                 if (strv_length(l) <= 1) {
562                         strv_free(l);
563                         continue;
564                 }
565
566                 if (!arg_join_controllers) {
567                         arg_join_controllers = new(char**, 2);
568                         if (!arg_join_controllers) {
569                                 strv_free(l);
570                                 return log_oom();
571                         }
572
573                         arg_join_controllers[0] = l;
574                         arg_join_controllers[1] = NULL;
575
576                         n = 1;
577                 } else {
578                         char ***a;
579                         char ***t;
580
581                         t = new0(char**, n+2);
582                         if (!t) {
583                                 strv_free(l);
584                                 return log_oom();
585                         }
586
587                         n = 0;
588
589                         for (a = arg_join_controllers; *a; a++) {
590
591                                 if (strv_overlap(*a, l)) {
592                                         if (strv_extend_strv(&l, *a) < 0) {
593                                                 strv_free(l);
594                                                 strv_free_free(t);
595                                                 return log_oom();
596                                         }
597
598                                 } else {
599                                         char **c;
600
601                                         c = strv_copy(*a);
602                                         if (!c) {
603                                                 strv_free(l);
604                                                 strv_free_free(t);
605                                                 return log_oom();
606                                         }
607
608                                         t[n++] = c;
609                                 }
610                         }
611
612                         t[n++] = strv_uniq(l);
613
614                         strv_free_free(arg_join_controllers);
615                         arg_join_controllers = t;
616                 }
617         }
618         if (!isempty(state))
619                 log_syntax(unit, LOG_ERR, filename, line, EINVAL,
620                            "Trailing garbage, ignoring.");
621
622         return 0;
623 }
624
625 static int parse_config_file(void) {
626
627         const ConfigTableItem items[] = {
628                 { "Manager", "LogLevel",                  config_parse_level2,           0, NULL                                   },
629                 { "Manager", "LogTarget",                 config_parse_target,           0, NULL                                   },
630                 { "Manager", "LogColor",                  config_parse_color,            0, NULL                                   },
631                 { "Manager", "LogLocation",               config_parse_location,         0, NULL                                   },
632                 { "Manager", "DumpCore",                  config_parse_bool,             0, &arg_dump_core                         },
633                 { "Manager", "CrashShell",                config_parse_bool,             0, &arg_crash_shell                       },
634                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
635                 { "Manager", "CrashChVT",                 config_parse_int,              0, &arg_crash_chvt                        },
636                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, NULL                                   },
637                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
638                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
639                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
640                 { "Manager", "CapabilityBoundingSet",     config_parse_bounding_set,     0, &arg_capability_bounding_set_drop      },
641 #ifdef HAVE_SECCOMP
642                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
643 #endif
644                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
645                 { "Manager", "DefaultTimerAccuracySec",   config_parse_sec,              0, &arg_default_timer_accuracy_usec       },
646                 { "Manager", "DefaultStandardOutput",     config_parse_output,           0, &arg_default_std_output                },
647                 { "Manager", "DefaultStandardError",      config_parse_output,           0, &arg_default_std_error                 },
648                 { "Manager", "DefaultTimeoutStartSec",    config_parse_sec,              0, &arg_default_timeout_start_usec        },
649                 { "Manager", "DefaultTimeoutStopSec",     config_parse_sec,              0, &arg_default_timeout_stop_usec         },
650                 { "Manager", "DefaultRestartSec",         config_parse_sec,              0, &arg_default_restart_usec              },
651                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,              0, &arg_default_start_limit_interval      },
652                 { "Manager", "DefaultStartLimitBurst",    config_parse_unsigned,         0, &arg_default_start_limit_burst         },
653                 { "Manager", "DefaultEnvironment",        config_parse_environ,          0, &arg_default_environment               },
654                 { "Manager", "DefaultLimitCPU",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CPU]        },
655                 { "Manager", "DefaultLimitFSIZE",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_FSIZE]      },
656                 { "Manager", "DefaultLimitDATA",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_DATA]       },
657                 { "Manager", "DefaultLimitSTACK",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_STACK]      },
658                 { "Manager", "DefaultLimitCORE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CORE]       },
659                 { "Manager", "DefaultLimitRSS",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RSS]        },
660                 { "Manager", "DefaultLimitNOFILE",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NOFILE]     },
661                 { "Manager", "DefaultLimitAS",            config_parse_limit,            0, &arg_default_rlimit[RLIMIT_AS]         },
662                 { "Manager", "DefaultLimitNPROC",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NPROC]      },
663                 { "Manager", "DefaultLimitMEMLOCK",       config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MEMLOCK]    },
664                 { "Manager", "DefaultLimitLOCKS",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_LOCKS]      },
665                 { "Manager", "DefaultLimitSIGPENDING",    config_parse_limit,            0, &arg_default_rlimit[RLIMIT_SIGPENDING] },
666                 { "Manager", "DefaultLimitMSGQUEUE",      config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MSGQUEUE]   },
667                 { "Manager", "DefaultLimitNICE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NICE]       },
668                 { "Manager", "DefaultLimitRTPRIO",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTPRIO]     },
669                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTTIME]     },
670                 { "Manager", "DefaultCPUAccounting",      config_parse_bool,             0, &arg_default_cpu_accounting            },
671                 { "Manager", "DefaultBlockIOAccounting",  config_parse_bool,             0, &arg_default_blockio_accounting        },
672                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
673                 {}
674         };
675
676         const char *fn, *conf_dirs_nulstr;
677
678         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
679         conf_dirs_nulstr = arg_running_as == SYSTEMD_SYSTEM ? CONF_DIRS_NULSTR("systemd/system.conf") : CONF_DIRS_NULSTR("systemd/user.conf");
680         config_parse_many(fn, conf_dirs_nulstr, "Manager\0",
681                           config_item_table_lookup, items, false, NULL);
682
683         return 0;
684 }
685
686 static int parse_argv(int argc, char *argv[]) {
687
688         enum {
689                 ARG_LOG_LEVEL = 0x100,
690                 ARG_LOG_TARGET,
691                 ARG_LOG_COLOR,
692                 ARG_LOG_LOCATION,
693                 ARG_UNIT,
694                 ARG_SYSTEM,
695                 ARG_USER,
696                 ARG_TEST,
697                 ARG_NO_PAGER,
698                 ARG_VERSION,
699                 ARG_DUMP_CONFIGURATION_ITEMS,
700                 ARG_DUMP_CORE,
701                 ARG_CRASH_SHELL,
702                 ARG_CONFIRM_SPAWN,
703                 ARG_SHOW_STATUS,
704                 ARG_DESERIALIZE,
705                 ARG_SWITCHED_ROOT,
706                 ARG_DEFAULT_STD_OUTPUT,
707                 ARG_DEFAULT_STD_ERROR
708         };
709
710         static const struct option options[] = {
711                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
712                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
713                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
714                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
715                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
716                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
717                 { "user",                     no_argument,       NULL, ARG_USER                     },
718                 { "test",                     no_argument,       NULL, ARG_TEST                     },
719                 { "no-pager",                 no_argument,       NULL, ARG_NO_PAGER                 },
720                 { "help",                     no_argument,       NULL, 'h'                          },
721                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
722                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
723                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
724                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
725                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
726                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
727                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
728                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
729                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
730                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
731                 {}
732         };
733
734         int c, r;
735
736         assert(argc >= 1);
737         assert(argv);
738
739         if (getpid() == 1)
740                 opterr = 0;
741
742         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
743
744                 switch (c) {
745
746                 case ARG_LOG_LEVEL:
747                         r = log_set_max_level_from_string(optarg);
748                         if (r < 0) {
749                                 log_error("Failed to parse log level %s.", optarg);
750                                 return r;
751                         }
752
753                         break;
754
755                 case ARG_LOG_TARGET:
756                         r = log_set_target_from_string(optarg);
757                         if (r < 0) {
758                                 log_error("Failed to parse log target %s.", optarg);
759                                 return r;
760                         }
761
762                         break;
763
764                 case ARG_LOG_COLOR:
765
766                         if (optarg) {
767                                 r = log_show_color_from_string(optarg);
768                                 if (r < 0) {
769                                         log_error("Failed to parse log color setting %s.", optarg);
770                                         return r;
771                                 }
772                         } else
773                                 log_show_color(true);
774
775                         break;
776
777                 case ARG_LOG_LOCATION:
778                         if (optarg) {
779                                 r = log_show_location_from_string(optarg);
780                                 if (r < 0) {
781                                         log_error("Failed to parse log location setting %s.", optarg);
782                                         return r;
783                                 }
784                         } else
785                                 log_show_location(true);
786
787                         break;
788
789                 case ARG_DEFAULT_STD_OUTPUT:
790                         r = exec_output_from_string(optarg);
791                         if (r < 0) {
792                                 log_error("Failed to parse default standard output setting %s.", optarg);
793                                 return r;
794                         } else
795                                 arg_default_std_output = r;
796                         break;
797
798                 case ARG_DEFAULT_STD_ERROR:
799                         r = exec_output_from_string(optarg);
800                         if (r < 0) {
801                                 log_error("Failed to parse default standard error output setting %s.", optarg);
802                                 return r;
803                         } else
804                                 arg_default_std_error = r;
805                         break;
806
807                 case ARG_UNIT:
808
809                         r = set_default_unit(optarg);
810                         if (r < 0)
811                                 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
812
813                         break;
814
815                 case ARG_SYSTEM:
816                         arg_running_as = SYSTEMD_SYSTEM;
817                         break;
818
819                 case ARG_USER:
820                         arg_running_as = SYSTEMD_USER;
821                         break;
822
823                 case ARG_TEST:
824                         arg_action = ACTION_TEST;
825                         if (arg_no_pager < 0)
826                                 arg_no_pager = true;
827                         break;
828
829                 case ARG_NO_PAGER:
830                         arg_no_pager = true;
831                         break;
832
833                 case ARG_VERSION:
834                         arg_action = ACTION_VERSION;
835                         break;
836
837                 case ARG_DUMP_CONFIGURATION_ITEMS:
838                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
839                         break;
840
841                 case ARG_DUMP_CORE:
842                         r = optarg ? parse_boolean(optarg) : 1;
843                         if (r < 0) {
844                                 log_error("Failed to parse dump core boolean %s.", optarg);
845                                 return r;
846                         }
847                         arg_dump_core = r;
848                         break;
849
850                 case ARG_CRASH_SHELL:
851                         r = optarg ? parse_boolean(optarg) : 1;
852                         if (r < 0) {
853                                 log_error("Failed to parse crash shell boolean %s.", optarg);
854                                 return r;
855                         }
856                         arg_crash_shell = r;
857                         break;
858
859                 case ARG_CONFIRM_SPAWN:
860                         r = optarg ? parse_boolean(optarg) : 1;
861                         if (r < 0) {
862                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
863                                 return r;
864                         }
865                         arg_confirm_spawn = r;
866                         break;
867
868                 case ARG_SHOW_STATUS:
869                         if (optarg) {
870                                 r = parse_show_status(optarg, &arg_show_status);
871                                 if (r < 0) {
872                                         log_error("Failed to parse show status boolean %s.", optarg);
873                                         return r;
874                                 }
875                         } else
876                                 arg_show_status = SHOW_STATUS_YES;
877                         break;
878
879                 case ARG_DESERIALIZE: {
880                         int fd;
881                         FILE *f;
882
883                         r = safe_atoi(optarg, &fd);
884                         if (r < 0 || fd < 0) {
885                                 log_error("Failed to parse deserialize option %s.", optarg);
886                                 return r < 0 ? r : -EINVAL;
887                         }
888
889                         fd_cloexec(fd, true);
890
891                         f = fdopen(fd, "r");
892                         if (!f)
893                                 return log_error_errno(errno, "Failed to open serialization fd: %m");
894
895                         if (arg_serialization)
896                                 fclose(arg_serialization);
897
898                         arg_serialization = f;
899
900                         break;
901                 }
902
903                 case ARG_SWITCHED_ROOT:
904                         arg_switched_root = true;
905                         break;
906
907                 case 'h':
908                         arg_action = ACTION_HELP;
909                         if (arg_no_pager < 0)
910                                 arg_no_pager = true;
911                         break;
912
913                 case 'D':
914                         log_set_max_level(LOG_DEBUG);
915                         break;
916
917                 case 'b':
918                 case 's':
919                 case 'z':
920                         /* Just to eat away the sysvinit kernel
921                          * cmdline args without getopt() error
922                          * messages that we'll parse in
923                          * parse_proc_cmdline_word() or ignore. */
924
925                 case '?':
926                         if (getpid() != 1)
927                                 return -EINVAL;
928                         else
929                                 return 0;
930
931                 default:
932                         assert_not_reached("Unhandled option code.");
933                 }
934
935         if (optind < argc && getpid() != 1) {
936                 /* Hmm, when we aren't run as init system
937                  * let's complain about excess arguments */
938
939                 log_error("Excess arguments.");
940                 return -EINVAL;
941         }
942
943         return 0;
944 }
945
946 static int help(void) {
947
948         printf("%s [OPTIONS...]\n\n"
949                "Starts up and maintains the system or user services.\n\n"
950                "  -h --help                      Show this help\n"
951                "     --test                      Determine startup sequence, dump it and exit\n"
952                "     --no-pager                  Do not pipe output into a pager\n"
953                "     --dump-configuration-items  Dump understood unit configuration items\n"
954                "     --unit=UNIT                 Set default unit\n"
955                "     --system                    Run a system instance, even if PID != 1\n"
956                "     --user                      Run a user instance\n"
957                "     --dump-core[=0|1]           Dump core on crash\n"
958                "     --crash-shell[=0|1]         Run shell on crash\n"
959                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
960                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
961                "     --log-target=TARGET         Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
962                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
963                "     --log-color[=0|1]           Highlight important log messages\n"
964                "     --log-location[=0|1]        Include code location in log messages\n"
965                "     --default-standard-output=  Set default standard output for services\n"
966                "     --default-standard-error=   Set default standard error output for services\n",
967                program_invocation_short_name);
968
969         return 0;
970 }
971
972 static int version(void) {
973         puts(PACKAGE_STRING);
974         puts(SYSTEMD_FEATURES);
975
976         return 0;
977 }
978
979 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
980         FILE *f = NULL;
981         FDSet *fds = NULL;
982         int r;
983
984         assert(m);
985         assert(_f);
986         assert(_fds);
987
988         r = manager_open_serialization(m, &f);
989         if (r < 0) {
990                 log_error_errno(r, "Failed to create serialization file: %m");
991                 goto fail;
992         }
993
994         /* Make sure nothing is really destructed when we shut down */
995         m->n_reloading ++;
996         bus_manager_send_reloading(m, true);
997
998         fds = fdset_new();
999         if (!fds) {
1000                 r = -ENOMEM;
1001                 log_error_errno(r, "Failed to allocate fd set: %m");
1002                 goto fail;
1003         }
1004
1005         r = manager_serialize(m, f, fds, switching_root);
1006         if (r < 0) {
1007                 log_error_errno(r, "Failed to serialize state: %m");
1008                 goto fail;
1009         }
1010
1011         if (fseeko(f, 0, SEEK_SET) < 0) {
1012                 log_error_errno(errno, "Failed to rewind serialization fd: %m");
1013                 goto fail;
1014         }
1015
1016         r = fd_cloexec(fileno(f), false);
1017         if (r < 0) {
1018                 log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1019                 goto fail;
1020         }
1021
1022         r = fdset_cloexec(fds, false);
1023         if (r < 0) {
1024                 log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1025                 goto fail;
1026         }
1027
1028         *_f = f;
1029         *_fds = fds;
1030
1031         return 0;
1032
1033 fail:
1034         fdset_free(fds);
1035
1036         if (f)
1037                 fclose(f);
1038
1039         return r;
1040 }
1041
1042 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1043         struct rlimit nl;
1044         int r;
1045
1046         assert(saved_rlimit);
1047
1048         /* Save the original RLIMIT_NOFILE so that we can reset it
1049          * later when transitioning from the initrd to the main
1050          * systemd or suchlike. */
1051         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1052                 return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
1053
1054         /* Make sure forked processes get the default kernel setting */
1055         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1056                 struct rlimit *rl;
1057
1058                 rl = newdup(struct rlimit, saved_rlimit, 1);
1059                 if (!rl)
1060                         return log_oom();
1061
1062                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1063         }
1064
1065         /* Bump up the resource limit for ourselves substantially */
1066         nl.rlim_cur = nl.rlim_max = 64*1024;
1067         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1068         if (r < 0)
1069                 return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
1070
1071         return 0;
1072 }
1073
1074 static void test_mtab(void) {
1075
1076         static const char ok[] =
1077                 "/proc/self/mounts\0"
1078                 "/proc/mounts\0"
1079                 "../proc/self/mounts\0"
1080                 "../proc/mounts\0";
1081
1082         _cleanup_free_ char *p = NULL;
1083         int r;
1084
1085         /* Check that /etc/mtab is a symlink to the right place or
1086          * non-existing. But certainly not a file, or a symlink to
1087          * some weird place... */
1088
1089         r = readlink_malloc("/etc/mtab", &p);
1090         if (r == -ENOENT)
1091                 return;
1092         if (r >= 0 && nulstr_contains(ok, p))
1093                 return;
1094
1095         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1096                     "This is not supported anymore. "
1097                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1098 }
1099
1100 static void test_usr(void) {
1101
1102         /* Check that /usr is not a separate fs */
1103
1104         if (dir_is_empty("/usr") <= 0)
1105                 return;
1106
1107         log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1108                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1109                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1110 }
1111
1112 static int initialize_join_controllers(void) {
1113         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1114          * + "net_prio". We'd like to add "cpuset" to the mix, but
1115          * "cpuset" doesn't really work for groups with no initialized
1116          * attributes. */
1117
1118         arg_join_controllers = new(char**, 3);
1119         if (!arg_join_controllers)
1120                 return -ENOMEM;
1121
1122         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1123         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1124         arg_join_controllers[2] = NULL;
1125
1126         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1127                 free_join_controllers();
1128                 return -ENOMEM;
1129         }
1130
1131         return 0;
1132 }
1133
1134 static int enforce_syscall_archs(Set *archs) {
1135 #ifdef HAVE_SECCOMP
1136         scmp_filter_ctx *seccomp;
1137         Iterator i;
1138         void *id;
1139         int r;
1140
1141         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1142         if (!seccomp)
1143                 return log_oom();
1144
1145         SET_FOREACH(id, arg_syscall_archs, i) {
1146                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1147                 if (r == -EEXIST)
1148                         continue;
1149                 if (r < 0) {
1150                         log_error_errno(r, "Failed to add architecture to seccomp: %m");
1151                         goto finish;
1152                 }
1153         }
1154
1155         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1156         if (r < 0) {
1157                 log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m");
1158                 goto finish;
1159         }
1160
1161         r = seccomp_load(seccomp);
1162         if (r < 0)
1163                 log_error_errno(r, "Failed to add install architecture seccomp: %m");
1164
1165 finish:
1166         seccomp_release(seccomp);
1167         return r;
1168 #else
1169         return 0;
1170 #endif
1171 }
1172
1173 static int status_welcome(void) {
1174         _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1175         int r;
1176
1177         r = parse_env_file("/etc/os-release", NEWLINE,
1178                            "PRETTY_NAME", &pretty_name,
1179                            "ANSI_COLOR", &ansi_color,
1180                            NULL);
1181         if (r == -ENOENT) {
1182                 r = parse_env_file("/usr/lib/os-release", NEWLINE,
1183                                    "PRETTY_NAME", &pretty_name,
1184                                    "ANSI_COLOR", &ansi_color,
1185                                    NULL);
1186         }
1187
1188         if (r < 0 && r != -ENOENT)
1189                 log_warning_errno(r, "Failed to read os-release file: %m");
1190
1191         return status_printf(NULL, false, false,
1192                              "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1193                              isempty(ansi_color) ? "1" : ansi_color,
1194                              isempty(pretty_name) ? "Linux" : pretty_name);
1195 }
1196
1197 static int write_container_id(void) {
1198         const char *c;
1199
1200         c = getenv("container");
1201         if (isempty(c))
1202                 return 0;
1203
1204         return write_string_file("/run/systemd/container", c);
1205 }
1206
1207 int main(int argc, char *argv[]) {
1208         Manager *m = NULL;
1209         int r, retval = EXIT_FAILURE;
1210         usec_t before_startup, after_startup;
1211         char timespan[FORMAT_TIMESPAN_MAX];
1212         FDSet *fds = NULL;
1213         bool reexecute = false;
1214         const char *shutdown_verb = NULL;
1215         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1216         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1217         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1218         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1219         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1220         static char systemd[] = "systemd";
1221         bool skip_setup = false;
1222         unsigned j;
1223         bool loaded_policy = false;
1224         bool arm_reboot_watchdog = false;
1225         bool queue_default_job = false;
1226         bool empty_etc = false;
1227         char *switch_root_dir = NULL, *switch_root_init = NULL;
1228         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1229         const char *error_message = NULL;
1230
1231 #ifdef HAVE_SYSV_COMPAT
1232         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1233                 /* This is compatibility support for SysV, where
1234                  * calling init as a user is identical to telinit. */
1235
1236                 errno = -ENOENT;
1237                 execv(SYSTEMCTL_BINARY_PATH, argv);
1238                 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1239                 return 1;
1240         }
1241 #endif
1242
1243         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1244         dual_timestamp_get(&userspace_timestamp);
1245
1246         /* Determine if this is a reexecution or normal bootup. We do
1247          * the full command line parsing much later, so let's just
1248          * have a quick peek here. */
1249         if (strv_find(argv+1, "--deserialize"))
1250                 skip_setup = true;
1251
1252         /* If we have switched root, do all the special setup
1253          * things */
1254         if (strv_find(argv+1, "--switched-root"))
1255                 skip_setup = false;
1256
1257         /* If we get started via the /sbin/init symlink then we are
1258            called 'init'. After a subsequent reexecution we are then
1259            called 'systemd'. That is confusing, hence let's call us
1260            systemd right-away. */
1261         program_invocation_short_name = systemd;
1262         prctl(PR_SET_NAME, systemd);
1263
1264         saved_argv = argv;
1265         saved_argc = argc;
1266
1267         log_show_color(isatty(STDERR_FILENO) > 0);
1268         log_set_upgrade_syslog_to_journal(true);
1269
1270         /* Disable the umask logic */
1271         if (getpid() == 1)
1272                 umask(0);
1273
1274         if (getpid() == 1 && detect_container(NULL) <= 0) {
1275
1276                 /* Running outside of a container as PID 1 */
1277                 arg_running_as = SYSTEMD_SYSTEM;
1278                 make_null_stdio();
1279                 log_set_target(LOG_TARGET_KMSG);
1280                 log_open();
1281
1282                 if (in_initrd())
1283                         initrd_timestamp = userspace_timestamp;
1284
1285                 if (!skip_setup) {
1286                         mount_setup_early();
1287                         dual_timestamp_get(&security_start_timestamp);
1288                         if (mac_selinux_setup(&loaded_policy) < 0) {
1289                                 error_message = "Failed to load SELinux policy";
1290                                 goto finish;
1291                         } else if (ima_setup() < 0) {
1292                                 error_message = "Failed to load IMA policy";
1293                                 goto finish;
1294                         } else if (mac_smack_setup(&loaded_policy) < 0) {
1295                                 error_message = "Failed to load SMACK policy";
1296                                 goto finish;
1297                         }
1298                         dual_timestamp_get(&security_finish_timestamp);
1299                 }
1300
1301                 if (mac_selinux_init(NULL) < 0) {
1302                         error_message = "Failed to initialize SELinux policy";
1303                         goto finish;
1304                 }
1305
1306                 if (!skip_setup) {
1307                         if (clock_is_localtime() > 0) {
1308                                 int min;
1309
1310                                 /*
1311                                  * The very first call of settimeofday() also does a time warp in the kernel.
1312                                  *
1313                                  * In the rtc-in-local time mode, we set the kernel's timezone, and rely on
1314                                  * external tools to take care of maintaining the RTC and do all adjustments.
1315                                  * This matches the behavior of Windows, which leaves the RTC alone if the
1316                                  * registry tells that the RTC runs in UTC.
1317                                  */
1318                                 r = clock_set_timezone(&min);
1319                                 if (r < 0)
1320                                         log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1321                                 else
1322                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1323                         } else if (!in_initrd()) {
1324                                 /*
1325                                  * Do a dummy very first call to seal the kernel's time warp magic.
1326                                  *
1327                                  * Do not call this this from inside the initrd. The initrd might not
1328                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1329                                  * that way. In such case, we need to delay the time-warp or the sealing
1330                                  * until we reach the real system.
1331                                  *
1332                                  * Do no set the kernel's timezone. The concept of local time cannot
1333                                  * be supported reliably, the time will jump or be incorrect at every daylight
1334                                  * saving time change. All kernel local time concepts will be treated
1335                                  * as UTC that way.
1336                                  */
1337                                 clock_reset_timewarp();
1338                         }
1339                 }
1340
1341                 /* Set the default for later on, but don't actually
1342                  * open the logs like this for now. Note that if we
1343                  * are transitioning from the initrd there might still
1344                  * be journal fd open, and we shouldn't attempt
1345                  * opening that before we parsed /proc/cmdline which
1346                  * might redirect output elsewhere. */
1347                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1348
1349         } else if (getpid() == 1) {
1350                 /* Running inside a container, as PID 1 */
1351                 arg_running_as = SYSTEMD_SYSTEM;
1352                 log_set_target(LOG_TARGET_CONSOLE);
1353                 log_close_console(); /* force reopen of /dev/console */
1354                 log_open();
1355
1356                 /* For the later on, see above... */
1357                 log_set_target(LOG_TARGET_JOURNAL);
1358
1359                 /* clear the kernel timestamp,
1360                  * because we are in a container */
1361                 kernel_timestamp.monotonic = 0ULL;
1362                 kernel_timestamp.realtime = 0ULL;
1363
1364         } else {
1365                 /* Running as user instance */
1366                 arg_running_as = SYSTEMD_USER;
1367                 log_set_target(LOG_TARGET_AUTO);
1368                 log_open();
1369
1370                 /* clear the kernel timestamp,
1371                  * because we are not PID 1 */
1372                 kernel_timestamp.monotonic = 0ULL;
1373                 kernel_timestamp.realtime = 0ULL;
1374         }
1375
1376         /* Initialize default unit */
1377         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1378         if (r < 0) {
1379                 log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
1380                 error_message = "Failed to set default unit";
1381                 goto finish;
1382         }
1383
1384         r = initialize_join_controllers();
1385         if (r < 0) {
1386                 error_message = "Failed to initalize cgroup controllers";
1387                 goto finish;
1388         }
1389
1390         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1391          * /proc/$PID/fd is available. */
1392         if (getpid() == 1) {
1393
1394                 /* Load the kernel modules early, so that we kdbus.ko is loaded before kdbusfs shall be mounted */
1395                 if (!skip_setup)
1396                         kmod_setup();
1397
1398                 r = mount_setup(loaded_policy);
1399                 if (r < 0) {
1400                         error_message = "Failed to mount API filesystems";
1401                         goto finish;
1402                 }
1403         }
1404
1405         /* Reset all signal handlers. */
1406         assert_se(reset_all_signal_handlers() == 0);
1407
1408         ignore_signals(SIGNALS_IGNORE, -1);
1409
1410         if (parse_config_file() < 0) {
1411                 error_message = "Failed to parse config file";
1412                 goto finish;
1413         }
1414
1415         if (arg_running_as == SYSTEMD_SYSTEM) {
1416                 r = parse_proc_cmdline(parse_proc_cmdline_item);
1417                 if (r < 0)
1418                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1419         }
1420
1421         /* Note that this also parses bits from the kernel command
1422          * line, including "debug". */
1423         log_parse_environment();
1424
1425         if (parse_argv(argc, argv) < 0) {
1426                 error_message = "Failed to parse commandline arguments";
1427                 goto finish;
1428         }
1429
1430         if (arg_action == ACTION_TEST &&
1431             geteuid() == 0) {
1432                 log_error("Don't run test mode as root.");
1433                 goto finish;
1434         }
1435
1436         if (arg_running_as == SYSTEMD_USER &&
1437             arg_action == ACTION_RUN &&
1438             sd_booted() <= 0) {
1439                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1440                 goto finish;
1441         }
1442
1443         if (arg_running_as == SYSTEMD_SYSTEM &&
1444             arg_action == ACTION_RUN &&
1445             running_in_chroot() > 0) {
1446                 log_error("Cannot be run in a chroot() environment.");
1447                 goto finish;
1448         }
1449
1450         if (arg_action == ACTION_TEST)
1451                 skip_setup = true;
1452
1453         pager_open_if_enabled();
1454
1455         if (arg_action == ACTION_HELP) {
1456                 retval = help();
1457                 goto finish;
1458         } else if (arg_action == ACTION_VERSION) {
1459                 retval = version();
1460                 goto finish;
1461         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1462                 unit_dump_config_items(stdout);
1463                 retval = EXIT_SUCCESS;
1464                 goto finish;
1465         } else if (arg_action == ACTION_DONE) {
1466                 retval = EXIT_SUCCESS;
1467                 goto finish;
1468         }
1469
1470         if (arg_running_as == SYSTEMD_USER &&
1471             !getenv("XDG_RUNTIME_DIR")) {
1472                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1473                 goto finish;
1474         }
1475
1476         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1477
1478         /* Close logging fds, in order not to confuse fdset below */
1479         log_close();
1480
1481         /* Remember open file descriptors for later deserialization */
1482         r = fdset_new_fill(&fds);
1483         if (r < 0) {
1484                 log_emergency_errno(r, "Failed to allocate fd set: %m");
1485                 error_message = "Failed to allocate fd set";
1486                 goto finish;
1487         } else
1488                 fdset_cloexec(fds, true);
1489
1490         if (arg_serialization)
1491                 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
1492
1493         if (arg_running_as == SYSTEMD_SYSTEM)
1494                 /* Become a session leader if we aren't one yet. */
1495                 setsid();
1496
1497         /* Move out of the way, so that we won't block unmounts */
1498         assert_se(chdir("/")  == 0);
1499
1500         /* Reset the console, but only if this is really init and we
1501          * are freshly booted */
1502         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN) {
1503
1504                 /* If we are init, we connect stdin/stdout/stderr to
1505                  * /dev/null and make sure we don't have a controlling
1506                  * tty. */
1507                 release_terminal();
1508
1509                 if (getpid() == 1 && !skip_setup)
1510                         console_setup();
1511         }
1512
1513         /* Open the logging devices, if possible and necessary */
1514         log_open();
1515
1516         if (arg_show_status == _SHOW_STATUS_UNSET)
1517                 arg_show_status = SHOW_STATUS_YES;
1518
1519         /* Make sure we leave a core dump without panicing the
1520          * kernel. */
1521         if (getpid() == 1) {
1522                 install_crash_handler();
1523
1524                 r = mount_cgroup_controllers(arg_join_controllers);
1525                 if (r < 0)
1526                         goto finish;
1527         }
1528
1529         if (arg_running_as == SYSTEMD_SYSTEM) {
1530                 const char *virtualization = NULL;
1531
1532                 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1533                          arg_action == ACTION_TEST ? "test " : "" );
1534
1535                 detect_virtualization(&virtualization);
1536                 if (virtualization)
1537                         log_info("Detected virtualization '%s'.", virtualization);
1538
1539                 write_container_id();
1540
1541                 log_info("Detected architecture '%s'.", architecture_to_string(uname_architecture()));
1542
1543                 if (in_initrd())
1544                         log_info("Running in initial RAM disk.");
1545
1546                 /* Let's check whether /etc is already populated. We
1547                  * don't actually really check for that, but use
1548                  * /etc/machine-id as flag file. This allows container
1549                  * managers and installers to provision a couple of
1550                  * files already. If the container manager wants to
1551                  * provision the machine ID itself it should pass
1552                  * $container_uuid to PID 1. */
1553
1554                 empty_etc = access("/etc/machine-id", F_OK) < 0;
1555                 if (empty_etc)
1556                         log_info("Running with unpopulated /etc.");
1557         } else {
1558                 _cleanup_free_ char *t;
1559
1560                 t = uid_to_name(getuid());
1561                 log_debug(PACKAGE_STRING " running in %suser mode for user "UID_FMT"/%s. (" SYSTEMD_FEATURES ")",
1562                           arg_action == ACTION_TEST ? " test" : "", getuid(), t);
1563         }
1564
1565         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1566                 if (arg_show_status > 0 || plymouth_running())
1567                         status_welcome();
1568
1569                 hostname_setup();
1570                 machine_id_setup(NULL);
1571                 loopback_setup();
1572
1573                 test_mtab();
1574                 test_usr();
1575         }
1576
1577         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1578                 watchdog_set_timeout(&arg_runtime_watchdog);
1579
1580         if (arg_timer_slack_nsec != NSEC_INFINITY)
1581                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1582                         log_error_errno(errno, "Failed to adjust timer slack: %m");
1583
1584         if (arg_capability_bounding_set_drop) {
1585                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1586                 if (r < 0) {
1587                         log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1588                         error_message = "Failed to drop capability bounding set of usermode helpers";
1589                         goto finish;
1590                 }
1591                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1592                 if (r < 0) {
1593                         log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1594                         error_message = "Failed to drop capability bounding set";
1595                         goto finish;
1596                 }
1597         }
1598
1599         if (arg_syscall_archs) {
1600                 r = enforce_syscall_archs(arg_syscall_archs);
1601                 if (r < 0) {
1602                         error_message = "Failed to set syscall architectures";
1603                         goto finish;
1604                 }
1605         }
1606
1607         if (arg_running_as == SYSTEMD_USER) {
1608                 /* Become reaper of our children */
1609                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1610                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
1611                         if (errno == EINVAL)
1612                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1613                 }
1614         }
1615
1616         if (arg_running_as == SYSTEMD_SYSTEM) {
1617                 bump_rlimit_nofile(&saved_rlimit_nofile);
1618
1619                 if (empty_etc) {
1620                         r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_FULL, false, NULL, 0);
1621                         if (r < 0)
1622                                 log_warning_errno(r, "Failed to populate /etc with preset unit settings, ignoring: %m");
1623                         else
1624                                 log_info("Populated /etc with preset unit settings.");
1625                 }
1626         }
1627
1628         r = manager_new(arg_running_as, arg_action == ACTION_TEST, &m);
1629         if (r < 0) {
1630                 log_emergency_errno(r, "Failed to allocate manager object: %m");
1631                 error_message = "Failed to allocate manager object";
1632                 goto finish;
1633         }
1634
1635         m->confirm_spawn = arg_confirm_spawn;
1636         m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
1637         m->default_std_output = arg_default_std_output;
1638         m->default_std_error = arg_default_std_error;
1639         m->default_restart_usec = arg_default_restart_usec;
1640         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1641         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1642         m->default_start_limit_interval = arg_default_start_limit_interval;
1643         m->default_start_limit_burst = arg_default_start_limit_burst;
1644         m->default_cpu_accounting = arg_default_cpu_accounting;
1645         m->default_blockio_accounting = arg_default_blockio_accounting;
1646         m->default_memory_accounting = arg_default_memory_accounting;
1647         m->runtime_watchdog = arg_runtime_watchdog;
1648         m->shutdown_watchdog = arg_shutdown_watchdog;
1649
1650         m->userspace_timestamp = userspace_timestamp;
1651         m->kernel_timestamp = kernel_timestamp;
1652         m->initrd_timestamp = initrd_timestamp;
1653         m->security_start_timestamp = security_start_timestamp;
1654         m->security_finish_timestamp = security_finish_timestamp;
1655
1656         manager_set_default_rlimits(m, arg_default_rlimit);
1657         manager_environment_add(m, NULL, arg_default_environment);
1658         manager_set_show_status(m, arg_show_status);
1659         manager_set_first_boot(m, empty_etc);
1660
1661         /* Remember whether we should queue the default job */
1662         queue_default_job = !arg_serialization || arg_switched_root;
1663
1664         before_startup = now(CLOCK_MONOTONIC);
1665
1666         r = manager_startup(m, arg_serialization, fds);
1667         if (r < 0)
1668                 log_error_errno(r, "Failed to fully start up daemon: %m");
1669
1670         /* This will close all file descriptors that were opened, but
1671          * not claimed by any unit. */
1672         fdset_free(fds);
1673         fds = NULL;
1674
1675         if (arg_serialization) {
1676                 fclose(arg_serialization);
1677                 arg_serialization = NULL;
1678         }
1679
1680         if (queue_default_job) {
1681                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1682                 Unit *target = NULL;
1683                 Job *default_unit_job;
1684
1685                 log_debug("Activating default unit: %s", arg_default_unit);
1686
1687                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1688                 if (r < 0)
1689                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1690                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1691                         log_error_errno(target->load_error, "Failed to load default target: %m");
1692                 else if (target->load_state == UNIT_MASKED)
1693                         log_error("Default target masked.");
1694
1695                 if (!target || target->load_state != UNIT_LOADED) {
1696                         log_info("Trying to load rescue target...");
1697
1698                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1699                         if (r < 0) {
1700                                 log_emergency("Failed to load rescue target: %s", bus_error_message(&error, r));
1701                                 error_message = "Failed to load rescue target";
1702                                 goto finish;
1703                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1704                                 log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
1705                                 error_message = "Failed to load rescue target";
1706                                 goto finish;
1707                         } else if (target->load_state == UNIT_MASKED) {
1708                                 log_emergency("Rescue target masked.");
1709                                 error_message = "Rescue target masked";
1710                                 goto finish;
1711                         }
1712                 }
1713
1714                 assert(target->load_state == UNIT_LOADED);
1715
1716                 if (arg_action == ACTION_TEST) {
1717                         printf("-> By units:\n");
1718                         manager_dump_units(m, stdout, "\t");
1719                 }
1720
1721                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1722                 if (r == -EPERM) {
1723                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1724
1725                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1726                         if (r < 0) {
1727                                 log_emergency("Failed to start default target: %s", bus_error_message(&error, r));
1728                                 error_message = "Failed to start default target";
1729                                 goto finish;
1730                         }
1731                 } else if (r < 0) {
1732                         log_emergency("Failed to isolate default target: %s", bus_error_message(&error, r));
1733                         error_message = "Failed to isolate default target";
1734                         goto finish;
1735                 }
1736
1737                 m->default_unit_job_id = default_unit_job->id;
1738
1739                 after_startup = now(CLOCK_MONOTONIC);
1740                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1741                          "Loaded units and determined initial transaction in %s.",
1742                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
1743
1744                 if (arg_action == ACTION_TEST) {
1745                         printf("-> By jobs:\n");
1746                         manager_dump_jobs(m, stdout, "\t");
1747                         retval = EXIT_SUCCESS;
1748                         goto finish;
1749                 }
1750         }
1751
1752         for (;;) {
1753                 r = manager_loop(m);
1754                 if (r < 0) {
1755                         log_emergency_errno(r, "Failed to run main loop: %m");
1756                         error_message = "Failed to run main loop";
1757                         goto finish;
1758                 }
1759
1760                 switch (m->exit_code) {
1761
1762                 case MANAGER_EXIT:
1763                         retval = EXIT_SUCCESS;
1764                         log_debug("Exit.");
1765                         goto finish;
1766
1767                 case MANAGER_RELOAD:
1768                         log_info("Reloading.");
1769                         r = manager_reload(m);
1770                         if (r < 0)
1771                                 log_error_errno(r, "Failed to reload: %m");
1772                         break;
1773
1774                 case MANAGER_REEXECUTE:
1775
1776                         if (prepare_reexecute(m, &arg_serialization, &fds, false) < 0) {
1777                                 error_message = "Failed to prepare for reexection";
1778                                 goto finish;
1779                         }
1780
1781                         reexecute = true;
1782                         log_notice("Reexecuting.");
1783                         goto finish;
1784
1785                 case MANAGER_SWITCH_ROOT:
1786                         /* Steal the switch root parameters */
1787                         switch_root_dir = m->switch_root;
1788                         switch_root_init = m->switch_root_init;
1789                         m->switch_root = m->switch_root_init = NULL;
1790
1791                         if (!switch_root_init)
1792                                 if (prepare_reexecute(m, &arg_serialization, &fds, true) < 0) {
1793                                         error_message = "Failed to prepare for reexection";
1794                                         goto finish;
1795                                 }
1796
1797                         reexecute = true;
1798                         log_notice("Switching root.");
1799                         goto finish;
1800
1801                 case MANAGER_REBOOT:
1802                 case MANAGER_POWEROFF:
1803                 case MANAGER_HALT:
1804                 case MANAGER_KEXEC: {
1805                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1806                                 [MANAGER_REBOOT] = "reboot",
1807                                 [MANAGER_POWEROFF] = "poweroff",
1808                                 [MANAGER_HALT] = "halt",
1809                                 [MANAGER_KEXEC] = "kexec"
1810                         };
1811
1812                         assert_se(shutdown_verb = table[m->exit_code]);
1813                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1814
1815                         log_notice("Shutting down.");
1816                         goto finish;
1817                 }
1818
1819                 default:
1820                         assert_not_reached("Unknown exit code.");
1821                 }
1822         }
1823
1824 finish:
1825         pager_close();
1826
1827         m = manager_free(m);
1828
1829         for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++) {
1830                 free(arg_default_rlimit[j]);
1831                 arg_default_rlimit[j] = NULL;
1832         }
1833
1834         free(arg_default_unit);
1835         arg_default_unit = NULL;
1836
1837         free_join_controllers();
1838
1839         strv_free(arg_default_environment);
1840         arg_default_environment = NULL;
1841
1842         set_free(arg_syscall_archs);
1843         arg_syscall_archs = NULL;
1844
1845         mac_selinux_finish();
1846
1847         if (reexecute) {
1848                 const char **args;
1849                 unsigned i, args_size;
1850
1851                 /* Close and disarm the watchdog, so that the new
1852                  * instance can reinitialize it, but doesn't get
1853                  * rebooted while we do that */
1854                 watchdog_close(true);
1855
1856                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1857                  * that the new systemd can pass the kernel default to
1858                  * its child processes */
1859                 if (saved_rlimit_nofile.rlim_cur > 0)
1860                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1861
1862                 if (switch_root_dir) {
1863                         /* Kill all remaining processes from the
1864                          * initrd, but don't wait for them, so that we
1865                          * can handle the SIGCHLD for them after
1866                          * deserializing. */
1867                         broadcast_signal(SIGTERM, false, true);
1868
1869                         /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1870                         r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1871                         if (r < 0)
1872                                 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1873                 }
1874
1875                 args_size = MAX(6, argc+1);
1876                 args = newa(const char*, args_size);
1877
1878                 if (!switch_root_init) {
1879                         char sfd[16];
1880
1881                         /* First try to spawn ourselves with the right
1882                          * path, and with full serialization. We do
1883                          * this only if the user didn't specify an
1884                          * explicit init to spawn. */
1885
1886                         assert(arg_serialization);
1887                         assert(fds);
1888
1889                         snprintf(sfd, sizeof(sfd), "%i", fileno(arg_serialization));
1890                         char_array_0(sfd);
1891
1892                         i = 0;
1893                         args[i++] = SYSTEMD_BINARY_PATH;
1894                         if (switch_root_dir)
1895                                 args[i++] = "--switched-root";
1896                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1897                         args[i++] = "--deserialize";
1898                         args[i++] = sfd;
1899                         args[i++] = NULL;
1900
1901                         /* do not pass along the environment we inherit from the kernel or initrd */
1902                         if (switch_root_dir)
1903                                 clearenv();
1904
1905                         assert(i <= args_size);
1906                         execv(args[0], (char* const*) args);
1907                 }
1908
1909                 /* Try the fallback, if there is any, without any
1910                  * serialization. We pass the original argv[] and
1911                  * envp[]. (Well, modulo the ordering changes due to
1912                  * getopt() in argv[], and some cleanups in envp[],
1913                  * but let's hope that doesn't matter.) */
1914
1915                 if (arg_serialization) {
1916                         fclose(arg_serialization);
1917                         arg_serialization = NULL;
1918                 }
1919
1920                 if (fds) {
1921                         fdset_free(fds);
1922                         fds = NULL;
1923                 }
1924
1925                 /* Reopen the console */
1926                 make_console_stdio();
1927
1928                 for (j = 1, i = 1; j < (unsigned) argc; j++)
1929                         args[i++] = argv[j];
1930                 args[i++] = NULL;
1931                 assert(i <= args_size);
1932
1933                 /* Reenable any blocked signals, especially important
1934                  * if we switch from initial ramdisk to init=... */
1935                 reset_all_signal_handlers();
1936                 reset_signal_mask();
1937
1938                 if (switch_root_init) {
1939                         args[0] = switch_root_init;
1940                         execv(args[0], (char* const*) args);
1941                         log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1942                 }
1943
1944                 args[0] = "/sbin/init";
1945                 execv(args[0], (char* const*) args);
1946
1947                 if (errno == ENOENT) {
1948                         log_warning("No /sbin/init, trying fallback");
1949
1950                         args[0] = "/bin/sh";
1951                         args[1] = NULL;
1952                         execv(args[0], (char* const*) args);
1953                         log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1954                 } else
1955                         log_warning_errno(errno, "Failed to execute /sbin/init, giving up: %m");
1956         }
1957
1958         if (arg_serialization) {
1959                 fclose(arg_serialization);
1960                 arg_serialization = NULL;
1961         }
1962
1963         if (fds) {
1964                 fdset_free(fds);
1965                 fds = NULL;
1966         }
1967
1968 #ifdef HAVE_VALGRIND_VALGRIND_H
1969         /* If we are PID 1 and running under valgrind, then let's exit
1970          * here explicitly. valgrind will only generate nice output on
1971          * exit(), not on exec(), hence let's do the former not the
1972          * latter here. */
1973         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1974                 return 0;
1975 #endif
1976
1977         if (shutdown_verb) {
1978                 char log_level[DECIMAL_STR_MAX(int) + 1];
1979                 const char* command_line[9] = {
1980                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1981                         shutdown_verb,
1982                         "--log-level", log_level,
1983                         "--log-target",
1984                 };
1985                 unsigned pos = 5;
1986                 _cleanup_strv_free_ char **env_block = NULL;
1987
1988                 assert(command_line[pos] == NULL);
1989                 env_block = strv_copy(environ);
1990
1991                 snprintf(log_level, sizeof(log_level), "%d", log_get_max_level());
1992
1993                 switch (log_get_target()) {
1994                 case LOG_TARGET_KMSG:
1995                 case LOG_TARGET_JOURNAL_OR_KMSG:
1996                 case LOG_TARGET_SYSLOG_OR_KMSG:
1997                         command_line[pos++] = "kmsg";
1998                         break;
1999
2000                 case LOG_TARGET_CONSOLE:
2001                 default:
2002                         command_line[pos++] = "console";
2003                         break;
2004                 };
2005
2006                 if (log_get_show_color())
2007                         command_line[pos++] = "--log-color";
2008
2009                 if (log_get_show_location())
2010                         command_line[pos++] = "--log-location";
2011
2012                 assert(pos < ELEMENTSOF(command_line));
2013
2014                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
2015                         char *e;
2016
2017                         /* If we reboot let's set the shutdown
2018                          * watchdog and tell the shutdown binary to
2019                          * repeatedly ping it */
2020                         watchdog_set_timeout(&arg_shutdown_watchdog);
2021                         watchdog_close(false);
2022
2023                         /* Tell the binary how often to ping, ignore failure */
2024                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
2025                                 strv_push(&env_block, e);
2026                 } else
2027                         watchdog_close(true);
2028
2029                 /* Avoid the creation of new processes forked by the
2030                  * kernel; at this point, we will not listen to the
2031                  * signals anyway */
2032                 if (detect_container(NULL) <= 0)
2033                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
2034
2035                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
2036                 log_error_errno(errno, "Failed to execute shutdown binary, %s: %m",
2037                           getpid() == 1 ? "freezing" : "quitting");
2038         }
2039
2040         if (getpid() == 1) {
2041                 if (error_message)
2042                         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2043                                               ANSI_HIGHLIGHT_RED_ON "!!!!!!" ANSI_HIGHLIGHT_OFF,
2044                                               "%s, freezing.", error_message);
2045                 freeze();
2046         }
2047
2048         return retval;
2049 }