chiark / gitweb /
core: make RuntimeDirectory honor SELinux labels
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38 #ifdef HAVE_SECCOMP
39 #include <seccomp.h>
40 #endif
41
42 #include "sd-daemon.h"
43 #include "sd-messages.h"
44 #include "sd-bus.h"
45 #include "log.h"
46 #include "fdset.h"
47 #include "special.h"
48 #include "conf-parser.h"
49 #include "missing.h"
50 #include "label.h"
51 #include "pager.h"
52 #include "build.h"
53 #include "strv.h"
54 #include "def.h"
55 #include "virt.h"
56 #include "architecture.h"
57 #include "watchdog.h"
58 #include "path-util.h"
59 #include "switch-root.h"
60 #include "capability.h"
61 #include "killall.h"
62 #include "env-util.h"
63 #include "clock-util.h"
64 #include "fileio.h"
65 #include "bus-error.h"
66 #include "bus-util.h"
67 #include "selinux-util.h"
68 #include "manager.h"
69 #include "dbus-manager.h"
70 #include "load-fragment.h"
71
72 #include "mount-setup.h"
73 #include "loopback-setup.h"
74 #include "hostname-setup.h"
75 #include "machine-id-setup.h"
76 #include "selinux-setup.h"
77 #include "ima-setup.h"
78 #include "smack-setup.h"
79 #include "kmod-setup.h"
80
81 static enum {
82         ACTION_RUN,
83         ACTION_HELP,
84         ACTION_VERSION,
85         ACTION_TEST,
86         ACTION_DUMP_CONFIGURATION_ITEMS,
87         ACTION_DONE
88 } arg_action = ACTION_RUN;
89 static char *arg_default_unit = NULL;
90 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
91 static bool arg_dump_core = true;
92 static bool arg_crash_shell = false;
93 static int arg_crash_chvt = -1;
94 static bool arg_confirm_spawn = false;
95 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
96 static bool arg_switched_root = false;
97 static int arg_no_pager = -1;
98 static char ***arg_join_controllers = NULL;
99 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
100 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
101 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
102 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
103 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
104 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
105 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
106 static usec_t arg_runtime_watchdog = 0;
107 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
108 static char **arg_default_environment = NULL;
109 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
110 static uint64_t arg_capability_bounding_set_drop = 0;
111 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
112 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
113 static Set* arg_syscall_archs = NULL;
114 static FILE* arg_serialization = NULL;
115 static bool arg_default_cpu_accounting = false;
116 static bool arg_default_blockio_accounting = false;
117 static bool arg_default_memory_accounting = false;
118
119 static void nop_handler(int sig) {}
120
121 static void pager_open_if_enabled(void) {
122
123         if (arg_no_pager <= 0)
124                 return;
125
126         pager_open(false);
127 }
128
129 noreturn static void crash(int sig) {
130
131         if (getpid() != 1)
132                 /* Pass this on immediately, if this is not PID 1 */
133                 raise(sig);
134         else if (!arg_dump_core)
135                 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
136         else {
137                 struct sigaction sa = {
138                         .sa_handler = nop_handler,
139                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
140                 };
141                 pid_t pid;
142
143                 /* We want to wait for the core process, hence let's enable SIGCHLD */
144                 sigaction(SIGCHLD, &sa, NULL);
145
146                 pid = raw_clone(SIGCHLD, NULL);
147                 if (pid < 0)
148                         log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
149
150                 else if (pid == 0) {
151                         struct rlimit rl = {};
152
153                         /* Enable default signal handler for core dump */
154                         zero(sa);
155                         sa.sa_handler = SIG_DFL;
156                         sigaction(sig, &sa, NULL);
157
158                         /* Don't limit the core dump size */
159                         rl.rlim_cur = RLIM_INFINITY;
160                         rl.rlim_max = RLIM_INFINITY;
161                         setrlimit(RLIMIT_CORE, &rl);
162
163                         /* Just to be sure... */
164                         chdir("/");
165
166                         /* Raise the signal again */
167                         pid = raw_getpid();
168                         kill(pid, sig); /* raise() would kill the parent */
169
170                         assert_not_reached("We shouldn't be here...");
171                         _exit(1);
172                 } else {
173                         siginfo_t status;
174                         int r;
175
176                         /* Order things nicely. */
177                         r = wait_for_terminate(pid, &status);
178                         if (r < 0)
179                                 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
180                         else if (status.si_code != CLD_DUMPED)
181                                 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
182                                               signal_to_string(sig),
183                                               pid, sigchld_code_to_string(status.si_code),
184                                               status.si_status,
185                                               strna(status.si_code == CLD_EXITED
186                                                     ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
187                                                     : signal_to_string(status.si_status)));
188                         else
189                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
190                 }
191         }
192
193         if (arg_crash_chvt)
194                 chvt(arg_crash_chvt);
195
196         if (arg_crash_shell) {
197                 struct sigaction sa = {
198                         .sa_handler = SIG_IGN,
199                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
200                 };
201                 pid_t pid;
202
203                 log_info("Executing crash shell in 10s...");
204                 sleep(10);
205
206                 /* Let the kernel reap children for us */
207                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
208
209                 pid = raw_clone(SIGCHLD, NULL);
210                 if (pid < 0)
211                         log_emergency_errno(errno, "Failed to fork off crash shell: %m");
212                 else if (pid == 0) {
213                         make_console_stdio();
214                         execle("/bin/sh", "/bin/sh", NULL, environ);
215
216                         log_emergency_errno(errno, "execle() failed: %m");
217                         _exit(1);
218                 } else
219                         log_info("Successfully spawned crash shell as PID "PID_FMT".", pid);
220         }
221
222         log_emergency("Freezing execution.");
223         freeze();
224 }
225
226 static void install_crash_handler(void) {
227         static const struct sigaction sa = {
228                 .sa_handler = crash,
229                 .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
230         };
231         int r;
232
233         /* We ignore the return value here, since, we don't mind if we
234          * cannot set up a crash handler */
235         r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
236         if (r < 0)
237                 log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
238 }
239
240 static int console_setup(void) {
241         _cleanup_close_ int tty_fd = -1;
242         int r;
243
244         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
245         if (tty_fd < 0)
246                 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
247
248         /* We don't want to force text mode.  plymouth may be showing
249          * pictures already from initrd. */
250         r = reset_terminal_fd(tty_fd, false);
251         if (r < 0)
252                 return log_error_errno(r, "Failed to reset /dev/console: %m");
253
254         return 0;
255 }
256
257 static int set_default_unit(const char *u) {
258         char *c;
259
260         assert(u);
261
262         c = strdup(u);
263         if (!c)
264                 return -ENOMEM;
265
266         free(arg_default_unit);
267         arg_default_unit = c;
268
269         return 0;
270 }
271
272 static int parse_proc_cmdline_item(const char *key, const char *value) {
273
274         static const char * const rlmap[] = {
275                 "emergency", SPECIAL_EMERGENCY_TARGET,
276                 "-b",        SPECIAL_EMERGENCY_TARGET,
277                 "rescue",    SPECIAL_RESCUE_TARGET,
278                 "single",    SPECIAL_RESCUE_TARGET,
279                 "-s",        SPECIAL_RESCUE_TARGET,
280                 "s",         SPECIAL_RESCUE_TARGET,
281                 "S",         SPECIAL_RESCUE_TARGET,
282                 "1",         SPECIAL_RESCUE_TARGET,
283                 "2",         SPECIAL_RUNLEVEL2_TARGET,
284                 "3",         SPECIAL_RUNLEVEL3_TARGET,
285                 "4",         SPECIAL_RUNLEVEL4_TARGET,
286                 "5",         SPECIAL_RUNLEVEL5_TARGET,
287         };
288         int r;
289
290         assert(key);
291
292         if (streq(key, "systemd.unit") && value) {
293
294                 if (!in_initrd())
295                         return set_default_unit(value);
296
297         } else if (streq(key, "rd.systemd.unit") && value) {
298
299                 if (in_initrd())
300                         return set_default_unit(value);
301
302         } else if (streq(key, "systemd.dump_core") && value) {
303
304                 r = parse_boolean(value);
305                 if (r < 0)
306                         log_warning("Failed to parse dump core switch %s. Ignoring.", value);
307                 else
308                         arg_dump_core = r;
309
310         } else if (streq(key, "systemd.crash_shell") && value) {
311
312                 r = parse_boolean(value);
313                 if (r < 0)
314                         log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
315                 else
316                         arg_crash_shell = r;
317
318         } else if (streq(key, "systemd.crash_chvt") && value) {
319
320                 if (safe_atoi(value, &r) < 0)
321                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
322                 else
323                         arg_crash_chvt = r;
324
325         } else if (streq(key, "systemd.confirm_spawn") && value) {
326
327                 r = parse_boolean(value);
328                 if (r < 0)
329                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", value);
330                 else
331                         arg_confirm_spawn = r;
332
333         } else if (streq(key, "systemd.show_status") && value) {
334
335                 r = parse_show_status(value, &arg_show_status);
336                 if (r < 0)
337                         log_warning("Failed to parse show status switch %s. Ignoring.", value);
338
339         } else if (streq(key, "systemd.default_standard_output") && value) {
340
341                 r = exec_output_from_string(value);
342                 if (r < 0)
343                         log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
344                 else
345                         arg_default_std_output = r;
346
347         } else if (streq(key, "systemd.default_standard_error") && value) {
348
349                 r = exec_output_from_string(value);
350                 if (r < 0)
351                         log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
352                 else
353                         arg_default_std_error = r;
354
355         } else if (streq(key, "systemd.setenv") && value) {
356
357                 if (env_assignment_is_valid(value)) {
358                         char **env;
359
360                         env = strv_env_set(arg_default_environment, value);
361                         if (env)
362                                 arg_default_environment = env;
363                         else
364                                 log_warning_errno(ENOMEM, "Setting environment variable '%s' failed, ignoring: %m", value);
365                 } else
366                         log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
367
368         } else if (streq(key, "quiet") && !value) {
369
370                 if (arg_show_status == _SHOW_STATUS_UNSET)
371                         arg_show_status = SHOW_STATUS_AUTO;
372
373         } else if (streq(key, "debug") && !value) {
374
375                 /* Note that log_parse_environment() handles 'debug'
376                  * too, and sets the log level to LOG_DEBUG. */
377
378                 if (detect_container(NULL) > 0)
379                         log_set_target(LOG_TARGET_CONSOLE);
380
381         } else if (!in_initrd() && !value) {
382                 unsigned i;
383
384                 /* SysV compatibility */
385                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
386                         if (streq(key, rlmap[i]))
387                                 return set_default_unit(rlmap[i+1]);
388         }
389
390         return 0;
391 }
392
393 #define DEFINE_SETTER(name, func, descr)                              \
394         static int name(const char *unit,                             \
395                         const char *filename,                         \
396                         unsigned line,                                \
397                         const char *section,                          \
398                         unsigned section_line,                        \
399                         const char *lvalue,                           \
400                         int ltype,                                    \
401                         const char *rvalue,                           \
402                         void *data,                                   \
403                         void *userdata) {                             \
404                                                                       \
405                 int r;                                                \
406                                                                       \
407                 assert(filename);                                     \
408                 assert(lvalue);                                       \
409                 assert(rvalue);                                       \
410                                                                       \
411                 r = func(rvalue);                                     \
412                 if (r < 0)                                            \
413                         log_syntax(unit, LOG_ERR, filename, line, -r, \
414                                    "Invalid " descr "'%s': %s",       \
415                                    rvalue, strerror(-r));             \
416                                                                       \
417                 return 0;                                             \
418         }
419
420 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
421 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
422 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
423 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
424
425 static int config_parse_cpu_affinity2(
426                 const char *unit,
427                 const char *filename,
428                 unsigned line,
429                 const char *section,
430                 unsigned section_line,
431                 const char *lvalue,
432                 int ltype,
433                 const char *rvalue,
434                 void *data,
435                 void *userdata) {
436
437         const char *word, *state;
438         size_t l;
439         cpu_set_t *c = NULL;
440         unsigned ncpus = 0;
441
442         assert(filename);
443         assert(lvalue);
444         assert(rvalue);
445
446         FOREACH_WORD_QUOTED(word, l, rvalue, state) {
447                 char *t;
448                 int r;
449                 unsigned cpu;
450
451                 if (!(t = strndup(word, l)))
452                         return log_oom();
453
454                 r = safe_atou(t, &cpu);
455                 free(t);
456
457                 if (!c)
458                         if (!(c = cpu_set_malloc(&ncpus)))
459                                 return log_oom();
460
461                 if (r < 0 || cpu >= ncpus) {
462                         log_syntax(unit, LOG_ERR, filename, line, -r,
463                                    "Failed to parse CPU affinity '%s'", rvalue);
464                         CPU_FREE(c);
465                         return -EBADMSG;
466                 }
467
468                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
469         }
470         if (!isempty(state))
471                 log_syntax(unit, LOG_ERR, filename, line, EINVAL,
472                            "Trailing garbage, ignoring.");
473
474         if (c) {
475                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
476                         log_unit_warning(unit, "Failed to set CPU affinity: %m");
477
478                 CPU_FREE(c);
479         }
480
481         return 0;
482 }
483
484 static int config_parse_show_status(
485                 const char* unit,
486                 const char *filename,
487                 unsigned line,
488                 const char *section,
489                 unsigned section_line,
490                 const char *lvalue,
491                 int ltype,
492                 const char *rvalue,
493                 void *data,
494                 void *userdata) {
495
496         int k;
497         ShowStatus *b = data;
498
499         assert(filename);
500         assert(lvalue);
501         assert(rvalue);
502         assert(data);
503
504         k = parse_show_status(rvalue, b);
505         if (k < 0) {
506                 log_syntax(unit, LOG_ERR, filename, line, -k,
507                            "Failed to parse show status setting, ignoring: %s", rvalue);
508                 return 0;
509         }
510
511         return 0;
512 }
513
514 static void strv_free_free(char ***l) {
515         char ***i;
516
517         if (!l)
518                 return;
519
520         for (i = l; *i; i++)
521                 strv_free(*i);
522
523         free(l);
524 }
525
526 static void free_join_controllers(void) {
527         strv_free_free(arg_join_controllers);
528         arg_join_controllers = NULL;
529 }
530
531 static int config_parse_join_controllers(const char *unit,
532                                          const char *filename,
533                                          unsigned line,
534                                          const char *section,
535                                          unsigned section_line,
536                                          const char *lvalue,
537                                          int ltype,
538                                          const char *rvalue,
539                                          void *data,
540                                          void *userdata) {
541
542         unsigned n = 0;
543         const char *word, *state;
544         size_t length;
545
546         assert(filename);
547         assert(lvalue);
548         assert(rvalue);
549
550         free_join_controllers();
551
552         FOREACH_WORD_QUOTED(word, length, rvalue, state) {
553                 char *s, **l;
554
555                 s = strndup(word, length);
556                 if (!s)
557                         return log_oom();
558
559                 l = strv_split(s, ",");
560                 free(s);
561
562                 strv_uniq(l);
563
564                 if (strv_length(l) <= 1) {
565                         strv_free(l);
566                         continue;
567                 }
568
569                 if (!arg_join_controllers) {
570                         arg_join_controllers = new(char**, 2);
571                         if (!arg_join_controllers) {
572                                 strv_free(l);
573                                 return log_oom();
574                         }
575
576                         arg_join_controllers[0] = l;
577                         arg_join_controllers[1] = NULL;
578
579                         n = 1;
580                 } else {
581                         char ***a;
582                         char ***t;
583
584                         t = new0(char**, n+2);
585                         if (!t) {
586                                 strv_free(l);
587                                 return log_oom();
588                         }
589
590                         n = 0;
591
592                         for (a = arg_join_controllers; *a; a++) {
593
594                                 if (strv_overlap(*a, l)) {
595                                         if (strv_extend_strv(&l, *a) < 0) {
596                                                 strv_free(l);
597                                                 strv_free_free(t);
598                                                 return log_oom();
599                                         }
600
601                                 } else {
602                                         char **c;
603
604                                         c = strv_copy(*a);
605                                         if (!c) {
606                                                 strv_free(l);
607                                                 strv_free_free(t);
608                                                 return log_oom();
609                                         }
610
611                                         t[n++] = c;
612                                 }
613                         }
614
615                         t[n++] = strv_uniq(l);
616
617                         strv_free_free(arg_join_controllers);
618                         arg_join_controllers = t;
619                 }
620         }
621         if (!isempty(state))
622                 log_syntax(unit, LOG_ERR, filename, line, EINVAL,
623                            "Trailing garbage, ignoring.");
624
625         return 0;
626 }
627
628 static int parse_config_file(void) {
629
630         const ConfigTableItem items[] = {
631                 { "Manager", "LogLevel",                  config_parse_level2,           0, NULL                                   },
632                 { "Manager", "LogTarget",                 config_parse_target,           0, NULL                                   },
633                 { "Manager", "LogColor",                  config_parse_color,            0, NULL                                   },
634                 { "Manager", "LogLocation",               config_parse_location,         0, NULL                                   },
635                 { "Manager", "DumpCore",                  config_parse_bool,             0, &arg_dump_core                         },
636                 { "Manager", "CrashShell",                config_parse_bool,             0, &arg_crash_shell                       },
637                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
638                 { "Manager", "CrashChVT",                 config_parse_int,              0, &arg_crash_chvt                        },
639                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, NULL                                   },
640                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
641                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
642                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
643                 { "Manager", "CapabilityBoundingSet",     config_parse_bounding_set,     0, &arg_capability_bounding_set_drop      },
644 #ifdef HAVE_SECCOMP
645                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
646 #endif
647                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
648                 { "Manager", "DefaultTimerAccuracySec",   config_parse_sec,              0, &arg_default_timer_accuracy_usec       },
649                 { "Manager", "DefaultStandardOutput",     config_parse_output,           0, &arg_default_std_output                },
650                 { "Manager", "DefaultStandardError",      config_parse_output,           0, &arg_default_std_error                 },
651                 { "Manager", "DefaultTimeoutStartSec",    config_parse_sec,              0, &arg_default_timeout_start_usec        },
652                 { "Manager", "DefaultTimeoutStopSec",     config_parse_sec,              0, &arg_default_timeout_stop_usec         },
653                 { "Manager", "DefaultRestartSec",         config_parse_sec,              0, &arg_default_restart_usec              },
654                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,              0, &arg_default_start_limit_interval      },
655                 { "Manager", "DefaultStartLimitBurst",    config_parse_unsigned,         0, &arg_default_start_limit_burst         },
656                 { "Manager", "DefaultEnvironment",        config_parse_environ,          0, &arg_default_environment               },
657                 { "Manager", "DefaultLimitCPU",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CPU]        },
658                 { "Manager", "DefaultLimitFSIZE",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_FSIZE]      },
659                 { "Manager", "DefaultLimitDATA",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_DATA]       },
660                 { "Manager", "DefaultLimitSTACK",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_STACK]      },
661                 { "Manager", "DefaultLimitCORE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CORE]       },
662                 { "Manager", "DefaultLimitRSS",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RSS]        },
663                 { "Manager", "DefaultLimitNOFILE",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NOFILE]     },
664                 { "Manager", "DefaultLimitAS",            config_parse_limit,            0, &arg_default_rlimit[RLIMIT_AS]         },
665                 { "Manager", "DefaultLimitNPROC",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NPROC]      },
666                 { "Manager", "DefaultLimitMEMLOCK",       config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MEMLOCK]    },
667                 { "Manager", "DefaultLimitLOCKS",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_LOCKS]      },
668                 { "Manager", "DefaultLimitSIGPENDING",    config_parse_limit,            0, &arg_default_rlimit[RLIMIT_SIGPENDING] },
669                 { "Manager", "DefaultLimitMSGQUEUE",      config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MSGQUEUE]   },
670                 { "Manager", "DefaultLimitNICE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NICE]       },
671                 { "Manager", "DefaultLimitRTPRIO",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTPRIO]     },
672                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTTIME]     },
673                 { "Manager", "DefaultCPUAccounting",      config_parse_bool,             0, &arg_default_cpu_accounting            },
674                 { "Manager", "DefaultBlockIOAccounting",  config_parse_bool,             0, &arg_default_blockio_accounting        },
675                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
676                 {}
677         };
678
679         const char *fn, *conf_dirs_nulstr;
680
681         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
682         conf_dirs_nulstr = arg_running_as == SYSTEMD_SYSTEM ? CONF_DIRS_NULSTR("systemd/system.conf") : CONF_DIRS_NULSTR("systemd/user.conf");
683         config_parse_many(fn, conf_dirs_nulstr, "Manager\0",
684                           config_item_table_lookup, items, false, NULL);
685
686         return 0;
687 }
688
689 static int parse_argv(int argc, char *argv[]) {
690
691         enum {
692                 ARG_LOG_LEVEL = 0x100,
693                 ARG_LOG_TARGET,
694                 ARG_LOG_COLOR,
695                 ARG_LOG_LOCATION,
696                 ARG_UNIT,
697                 ARG_SYSTEM,
698                 ARG_USER,
699                 ARG_TEST,
700                 ARG_NO_PAGER,
701                 ARG_VERSION,
702                 ARG_DUMP_CONFIGURATION_ITEMS,
703                 ARG_DUMP_CORE,
704                 ARG_CRASH_SHELL,
705                 ARG_CONFIRM_SPAWN,
706                 ARG_SHOW_STATUS,
707                 ARG_DESERIALIZE,
708                 ARG_SWITCHED_ROOT,
709                 ARG_DEFAULT_STD_OUTPUT,
710                 ARG_DEFAULT_STD_ERROR
711         };
712
713         static const struct option options[] = {
714                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
715                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
716                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
717                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
718                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
719                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
720                 { "user",                     no_argument,       NULL, ARG_USER                     },
721                 { "test",                     no_argument,       NULL, ARG_TEST                     },
722                 { "no-pager",                 no_argument,       NULL, ARG_NO_PAGER                 },
723                 { "help",                     no_argument,       NULL, 'h'                          },
724                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
725                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
726                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
727                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
728                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
729                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
730                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
731                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
732                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
733                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
734                 {}
735         };
736
737         int c, r;
738
739         assert(argc >= 1);
740         assert(argv);
741
742         if (getpid() == 1)
743                 opterr = 0;
744
745         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
746
747                 switch (c) {
748
749                 case ARG_LOG_LEVEL:
750                         r = log_set_max_level_from_string(optarg);
751                         if (r < 0) {
752                                 log_error("Failed to parse log level %s.", optarg);
753                                 return r;
754                         }
755
756                         break;
757
758                 case ARG_LOG_TARGET:
759                         r = log_set_target_from_string(optarg);
760                         if (r < 0) {
761                                 log_error("Failed to parse log target %s.", optarg);
762                                 return r;
763                         }
764
765                         break;
766
767                 case ARG_LOG_COLOR:
768
769                         if (optarg) {
770                                 r = log_show_color_from_string(optarg);
771                                 if (r < 0) {
772                                         log_error("Failed to parse log color setting %s.", optarg);
773                                         return r;
774                                 }
775                         } else
776                                 log_show_color(true);
777
778                         break;
779
780                 case ARG_LOG_LOCATION:
781                         if (optarg) {
782                                 r = log_show_location_from_string(optarg);
783                                 if (r < 0) {
784                                         log_error("Failed to parse log location setting %s.", optarg);
785                                         return r;
786                                 }
787                         } else
788                                 log_show_location(true);
789
790                         break;
791
792                 case ARG_DEFAULT_STD_OUTPUT:
793                         r = exec_output_from_string(optarg);
794                         if (r < 0) {
795                                 log_error("Failed to parse default standard output setting %s.", optarg);
796                                 return r;
797                         } else
798                                 arg_default_std_output = r;
799                         break;
800
801                 case ARG_DEFAULT_STD_ERROR:
802                         r = exec_output_from_string(optarg);
803                         if (r < 0) {
804                                 log_error("Failed to parse default standard error output setting %s.", optarg);
805                                 return r;
806                         } else
807                                 arg_default_std_error = r;
808                         break;
809
810                 case ARG_UNIT:
811
812                         r = set_default_unit(optarg);
813                         if (r < 0)
814                                 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
815
816                         break;
817
818                 case ARG_SYSTEM:
819                         arg_running_as = SYSTEMD_SYSTEM;
820                         break;
821
822                 case ARG_USER:
823                         arg_running_as = SYSTEMD_USER;
824                         break;
825
826                 case ARG_TEST:
827                         arg_action = ACTION_TEST;
828                         if (arg_no_pager < 0)
829                                 arg_no_pager = true;
830                         break;
831
832                 case ARG_NO_PAGER:
833                         arg_no_pager = true;
834                         break;
835
836                 case ARG_VERSION:
837                         arg_action = ACTION_VERSION;
838                         break;
839
840                 case ARG_DUMP_CONFIGURATION_ITEMS:
841                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
842                         break;
843
844                 case ARG_DUMP_CORE:
845                         r = optarg ? parse_boolean(optarg) : 1;
846                         if (r < 0) {
847                                 log_error("Failed to parse dump core boolean %s.", optarg);
848                                 return r;
849                         }
850                         arg_dump_core = r;
851                         break;
852
853                 case ARG_CRASH_SHELL:
854                         r = optarg ? parse_boolean(optarg) : 1;
855                         if (r < 0) {
856                                 log_error("Failed to parse crash shell boolean %s.", optarg);
857                                 return r;
858                         }
859                         arg_crash_shell = r;
860                         break;
861
862                 case ARG_CONFIRM_SPAWN:
863                         r = optarg ? parse_boolean(optarg) : 1;
864                         if (r < 0) {
865                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
866                                 return r;
867                         }
868                         arg_confirm_spawn = r;
869                         break;
870
871                 case ARG_SHOW_STATUS:
872                         if (optarg) {
873                                 r = parse_show_status(optarg, &arg_show_status);
874                                 if (r < 0) {
875                                         log_error("Failed to parse show status boolean %s.", optarg);
876                                         return r;
877                                 }
878                         } else
879                                 arg_show_status = SHOW_STATUS_YES;
880                         break;
881
882                 case ARG_DESERIALIZE: {
883                         int fd;
884                         FILE *f;
885
886                         r = safe_atoi(optarg, &fd);
887                         if (r < 0 || fd < 0) {
888                                 log_error("Failed to parse deserialize option %s.", optarg);
889                                 return r < 0 ? r : -EINVAL;
890                         }
891
892                         fd_cloexec(fd, true);
893
894                         f = fdopen(fd, "r");
895                         if (!f)
896                                 return log_error_errno(errno, "Failed to open serialization fd: %m");
897
898                         if (arg_serialization)
899                                 fclose(arg_serialization);
900
901                         arg_serialization = f;
902
903                         break;
904                 }
905
906                 case ARG_SWITCHED_ROOT:
907                         arg_switched_root = true;
908                         break;
909
910                 case 'h':
911                         arg_action = ACTION_HELP;
912                         if (arg_no_pager < 0)
913                                 arg_no_pager = true;
914                         break;
915
916                 case 'D':
917                         log_set_max_level(LOG_DEBUG);
918                         break;
919
920                 case 'b':
921                 case 's':
922                 case 'z':
923                         /* Just to eat away the sysvinit kernel
924                          * cmdline args without getopt() error
925                          * messages that we'll parse in
926                          * parse_proc_cmdline_word() or ignore. */
927
928                 case '?':
929                         if (getpid() != 1)
930                                 return -EINVAL;
931                         else
932                                 return 0;
933
934                 default:
935                         assert_not_reached("Unhandled option code.");
936                 }
937
938         if (optind < argc && getpid() != 1) {
939                 /* Hmm, when we aren't run as init system
940                  * let's complain about excess arguments */
941
942                 log_error("Excess arguments.");
943                 return -EINVAL;
944         }
945
946         return 0;
947 }
948
949 static int help(void) {
950
951         printf("%s [OPTIONS...]\n\n"
952                "Starts up and maintains the system or user services.\n\n"
953                "  -h --help                      Show this help\n"
954                "     --test                      Determine startup sequence, dump it and exit\n"
955                "     --no-pager                  Do not pipe output into a pager\n"
956                "     --dump-configuration-items  Dump understood unit configuration items\n"
957                "     --unit=UNIT                 Set default unit\n"
958                "     --system                    Run a system instance, even if PID != 1\n"
959                "     --user                      Run a user instance\n"
960                "     --dump-core[=0|1]           Dump core on crash\n"
961                "     --crash-shell[=0|1]         Run shell on crash\n"
962                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
963                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
964                "     --log-target=TARGET         Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
965                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
966                "     --log-color[=0|1]           Highlight important log messages\n"
967                "     --log-location[=0|1]        Include code location in log messages\n"
968                "     --default-standard-output=  Set default standard output for services\n"
969                "     --default-standard-error=   Set default standard error output for services\n",
970                program_invocation_short_name);
971
972         return 0;
973 }
974
975 static int version(void) {
976         puts(PACKAGE_STRING);
977         puts(SYSTEMD_FEATURES);
978
979         return 0;
980 }
981
982 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
983         FILE *f = NULL;
984         FDSet *fds = NULL;
985         int r;
986
987         assert(m);
988         assert(_f);
989         assert(_fds);
990
991         r = manager_open_serialization(m, &f);
992         if (r < 0) {
993                 log_error_errno(r, "Failed to create serialization file: %m");
994                 goto fail;
995         }
996
997         /* Make sure nothing is really destructed when we shut down */
998         m->n_reloading ++;
999         bus_manager_send_reloading(m, true);
1000
1001         fds = fdset_new();
1002         if (!fds) {
1003                 r = -ENOMEM;
1004                 log_error_errno(r, "Failed to allocate fd set: %m");
1005                 goto fail;
1006         }
1007
1008         r = manager_serialize(m, f, fds, switching_root);
1009         if (r < 0) {
1010                 log_error_errno(r, "Failed to serialize state: %m");
1011                 goto fail;
1012         }
1013
1014         if (fseeko(f, 0, SEEK_SET) < 0) {
1015                 log_error_errno(errno, "Failed to rewind serialization fd: %m");
1016                 goto fail;
1017         }
1018
1019         r = fd_cloexec(fileno(f), false);
1020         if (r < 0) {
1021                 log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1022                 goto fail;
1023         }
1024
1025         r = fdset_cloexec(fds, false);
1026         if (r < 0) {
1027                 log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1028                 goto fail;
1029         }
1030
1031         *_f = f;
1032         *_fds = fds;
1033
1034         return 0;
1035
1036 fail:
1037         fdset_free(fds);
1038
1039         if (f)
1040                 fclose(f);
1041
1042         return r;
1043 }
1044
1045 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1046         struct rlimit nl;
1047         int r;
1048
1049         assert(saved_rlimit);
1050
1051         /* Save the original RLIMIT_NOFILE so that we can reset it
1052          * later when transitioning from the initrd to the main
1053          * systemd or suchlike. */
1054         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1055                 return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
1056
1057         /* Make sure forked processes get the default kernel setting */
1058         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1059                 struct rlimit *rl;
1060
1061                 rl = newdup(struct rlimit, saved_rlimit, 1);
1062                 if (!rl)
1063                         return log_oom();
1064
1065                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1066         }
1067
1068         /* Bump up the resource limit for ourselves substantially */
1069         nl.rlim_cur = nl.rlim_max = 64*1024;
1070         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1071         if (r < 0)
1072                 return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
1073
1074         return 0;
1075 }
1076
1077 static void test_mtab(void) {
1078
1079         static const char ok[] =
1080                 "/proc/self/mounts\0"
1081                 "/proc/mounts\0"
1082                 "../proc/self/mounts\0"
1083                 "../proc/mounts\0";
1084
1085         _cleanup_free_ char *p = NULL;
1086         int r;
1087
1088         /* Check that /etc/mtab is a symlink to the right place or
1089          * non-existing. But certainly not a file, or a symlink to
1090          * some weird place... */
1091
1092         r = readlink_malloc("/etc/mtab", &p);
1093         if (r == -ENOENT)
1094                 return;
1095         if (r >= 0 && nulstr_contains(ok, p))
1096                 return;
1097
1098         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1099                     "This is not supported anymore. "
1100                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1101 }
1102
1103 static void test_usr(void) {
1104
1105         /* Check that /usr is not a separate fs */
1106
1107         if (dir_is_empty("/usr") <= 0)
1108                 return;
1109
1110         log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
1111                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1112                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1113 }
1114
1115 static int initialize_join_controllers(void) {
1116         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1117          * + "net_prio". We'd like to add "cpuset" to the mix, but
1118          * "cpuset" doesn't really work for groups with no initialized
1119          * attributes. */
1120
1121         arg_join_controllers = new(char**, 3);
1122         if (!arg_join_controllers)
1123                 return -ENOMEM;
1124
1125         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1126         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1127         arg_join_controllers[2] = NULL;
1128
1129         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1130                 free_join_controllers();
1131                 return -ENOMEM;
1132         }
1133
1134         return 0;
1135 }
1136
1137 static int enforce_syscall_archs(Set *archs) {
1138 #ifdef HAVE_SECCOMP
1139         scmp_filter_ctx *seccomp;
1140         Iterator i;
1141         void *id;
1142         int r;
1143
1144         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1145         if (!seccomp)
1146                 return log_oom();
1147
1148         SET_FOREACH(id, arg_syscall_archs, i) {
1149                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1150                 if (r == -EEXIST)
1151                         continue;
1152                 if (r < 0) {
1153                         log_error_errno(r, "Failed to add architecture to seccomp: %m");
1154                         goto finish;
1155                 }
1156         }
1157
1158         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1159         if (r < 0) {
1160                 log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m");
1161                 goto finish;
1162         }
1163
1164         r = seccomp_load(seccomp);
1165         if (r < 0)
1166                 log_error_errno(r, "Failed to add install architecture seccomp: %m");
1167
1168 finish:
1169         seccomp_release(seccomp);
1170         return r;
1171 #else
1172         return 0;
1173 #endif
1174 }
1175
1176 static int status_welcome(void) {
1177         _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1178         int r;
1179
1180         r = parse_env_file("/etc/os-release", NEWLINE,
1181                            "PRETTY_NAME", &pretty_name,
1182                            "ANSI_COLOR", &ansi_color,
1183                            NULL);
1184         if (r == -ENOENT) {
1185                 r = parse_env_file("/usr/lib/os-release", NEWLINE,
1186                                    "PRETTY_NAME", &pretty_name,
1187                                    "ANSI_COLOR", &ansi_color,
1188                                    NULL);
1189         }
1190
1191         if (r < 0 && r != -ENOENT)
1192                 log_warning_errno(r, "Failed to read os-release file: %m");
1193
1194         return status_printf(NULL, false, false,
1195                              "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1196                              isempty(ansi_color) ? "1" : ansi_color,
1197                              isempty(pretty_name) ? "Linux" : pretty_name);
1198 }
1199
1200 static int write_container_id(void) {
1201         const char *c;
1202
1203         c = getenv("container");
1204         if (isempty(c))
1205                 return 0;
1206
1207         return write_string_file("/run/systemd/container", c);
1208 }
1209
1210 int main(int argc, char *argv[]) {
1211         Manager *m = NULL;
1212         int r, retval = EXIT_FAILURE;
1213         usec_t before_startup, after_startup;
1214         char timespan[FORMAT_TIMESPAN_MAX];
1215         FDSet *fds = NULL;
1216         bool reexecute = false;
1217         const char *shutdown_verb = NULL;
1218         dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL;
1219         dual_timestamp userspace_timestamp = DUAL_TIMESTAMP_NULL;
1220         dual_timestamp kernel_timestamp = DUAL_TIMESTAMP_NULL;
1221         dual_timestamp security_start_timestamp = DUAL_TIMESTAMP_NULL;
1222         dual_timestamp security_finish_timestamp = DUAL_TIMESTAMP_NULL;
1223         static char systemd[] = "systemd";
1224         bool skip_setup = false;
1225         unsigned j;
1226         bool loaded_policy = false;
1227         bool arm_reboot_watchdog = false;
1228         bool queue_default_job = false;
1229         bool empty_etc = false;
1230         char *switch_root_dir = NULL, *switch_root_init = NULL;
1231         struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
1232         const char *error_message = NULL;
1233
1234 #ifdef HAVE_SYSV_COMPAT
1235         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1236                 /* This is compatibility support for SysV, where
1237                  * calling init as a user is identical to telinit. */
1238
1239                 errno = -ENOENT;
1240                 execv(SYSTEMCTL_BINARY_PATH, argv);
1241                 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1242                 return 1;
1243         }
1244 #endif
1245
1246         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1247         dual_timestamp_get(&userspace_timestamp);
1248
1249         /* Determine if this is a reexecution or normal bootup. We do
1250          * the full command line parsing much later, so let's just
1251          * have a quick peek here. */
1252         if (strv_find(argv+1, "--deserialize"))
1253                 skip_setup = true;
1254
1255         /* If we have switched root, do all the special setup
1256          * things */
1257         if (strv_find(argv+1, "--switched-root"))
1258                 skip_setup = false;
1259
1260         /* If we get started via the /sbin/init symlink then we are
1261            called 'init'. After a subsequent reexecution we are then
1262            called 'systemd'. That is confusing, hence let's call us
1263            systemd right-away. */
1264         program_invocation_short_name = systemd;
1265         prctl(PR_SET_NAME, systemd);
1266
1267         saved_argv = argv;
1268         saved_argc = argc;
1269
1270         log_show_color(isatty(STDERR_FILENO) > 0);
1271         log_set_upgrade_syslog_to_journal(true);
1272
1273         /* Disable the umask logic */
1274         if (getpid() == 1)
1275                 umask(0);
1276
1277         if (getpid() == 1 && detect_container(NULL) <= 0) {
1278
1279                 /* Running outside of a container as PID 1 */
1280                 arg_running_as = SYSTEMD_SYSTEM;
1281                 make_null_stdio();
1282                 log_set_target(LOG_TARGET_KMSG);
1283                 log_open();
1284
1285                 if (in_initrd())
1286                         initrd_timestamp = userspace_timestamp;
1287
1288                 if (!skip_setup) {
1289                         mount_setup_early();
1290                         dual_timestamp_get(&security_start_timestamp);
1291                         if (mac_selinux_setup(&loaded_policy) < 0) {
1292                                 error_message = "Failed to load SELinux policy";
1293                                 goto finish;
1294                         } else if (ima_setup() < 0) {
1295                                 error_message = "Failed to load IMA policy";
1296                                 goto finish;
1297                         } else if (mac_smack_setup(&loaded_policy) < 0) {
1298                                 error_message = "Failed to load SMACK policy";
1299                                 goto finish;
1300                         }
1301                         dual_timestamp_get(&security_finish_timestamp);
1302                 }
1303
1304                 if (mac_selinux_init(NULL) < 0) {
1305                         error_message = "Failed to initialize SELinux policy";
1306                         goto finish;
1307                 }
1308
1309                 if (!skip_setup) {
1310                         if (clock_is_localtime() > 0) {
1311                                 int min;
1312
1313                                 /*
1314                                  * The very first call of settimeofday() also does a time warp in the kernel.
1315                                  *
1316                                  * In the rtc-in-local time mode, we set the kernel's timezone, and rely on
1317                                  * external tools to take care of maintaining the RTC and do all adjustments.
1318                                  * This matches the behavior of Windows, which leaves the RTC alone if the
1319                                  * registry tells that the RTC runs in UTC.
1320                                  */
1321                                 r = clock_set_timezone(&min);
1322                                 if (r < 0)
1323                                         log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1324                                 else
1325                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1326                         } else if (!in_initrd()) {
1327                                 /*
1328                                  * Do a dummy very first call to seal the kernel's time warp magic.
1329                                  *
1330                                  * Do not call this this from inside the initrd. The initrd might not
1331                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1332                                  * that way. In such case, we need to delay the time-warp or the sealing
1333                                  * until we reach the real system.
1334                                  *
1335                                  * Do no set the kernel's timezone. The concept of local time cannot
1336                                  * be supported reliably, the time will jump or be incorrect at every daylight
1337                                  * saving time change. All kernel local time concepts will be treated
1338                                  * as UTC that way.
1339                                  */
1340                                 clock_reset_timewarp();
1341                         }
1342                 }
1343
1344                 /* Set the default for later on, but don't actually
1345                  * open the logs like this for now. Note that if we
1346                  * are transitioning from the initrd there might still
1347                  * be journal fd open, and we shouldn't attempt
1348                  * opening that before we parsed /proc/cmdline which
1349                  * might redirect output elsewhere. */
1350                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1351
1352         } else if (getpid() == 1) {
1353                 /* Running inside a container, as PID 1 */
1354                 arg_running_as = SYSTEMD_SYSTEM;
1355                 log_set_target(LOG_TARGET_CONSOLE);
1356                 log_close_console(); /* force reopen of /dev/console */
1357                 log_open();
1358
1359                 /* For the later on, see above... */
1360                 log_set_target(LOG_TARGET_JOURNAL);
1361
1362                 /* clear the kernel timestamp,
1363                  * because we are in a container */
1364                 kernel_timestamp.monotonic = 0ULL;
1365                 kernel_timestamp.realtime = 0ULL;
1366
1367         } else {
1368                 /* Running as user instance */
1369                 arg_running_as = SYSTEMD_USER;
1370                 log_set_target(LOG_TARGET_AUTO);
1371                 log_open();
1372
1373                 /* clear the kernel timestamp,
1374                  * because we are not PID 1 */
1375                 kernel_timestamp.monotonic = 0ULL;
1376                 kernel_timestamp.realtime = 0ULL;
1377         }
1378
1379         /* Initialize default unit */
1380         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1381         if (r < 0) {
1382                 log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
1383                 error_message = "Failed to set default unit";
1384                 goto finish;
1385         }
1386
1387         r = initialize_join_controllers();
1388         if (r < 0) {
1389                 error_message = "Failed to initalize cgroup controllers";
1390                 goto finish;
1391         }
1392
1393         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1394          * /proc/$PID/fd is available. */
1395         if (getpid() == 1) {
1396
1397                 /* Load the kernel modules early, so that we kdbus.ko is loaded before kdbusfs shall be mounted */
1398                 if (!skip_setup)
1399                         kmod_setup();
1400
1401                 r = mount_setup(loaded_policy);
1402                 if (r < 0) {
1403                         error_message = "Failed to mount API filesystems";
1404                         goto finish;
1405                 }
1406         }
1407
1408         /* Reset all signal handlers. */
1409         assert_se(reset_all_signal_handlers() == 0);
1410
1411         ignore_signals(SIGNALS_IGNORE, -1);
1412
1413         if (parse_config_file() < 0) {
1414                 error_message = "Failed to parse config file";
1415                 goto finish;
1416         }
1417
1418         if (arg_running_as == SYSTEMD_SYSTEM) {
1419                 r = parse_proc_cmdline(parse_proc_cmdline_item);
1420                 if (r < 0)
1421                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1422         }
1423
1424         /* Note that this also parses bits from the kernel command
1425          * line, including "debug". */
1426         log_parse_environment();
1427
1428         if (parse_argv(argc, argv) < 0) {
1429                 error_message = "Failed to parse commandline arguments";
1430                 goto finish;
1431         }
1432
1433         if (arg_action == ACTION_TEST &&
1434             geteuid() == 0) {
1435                 log_error("Don't run test mode as root.");
1436                 goto finish;
1437         }
1438
1439         if (arg_running_as == SYSTEMD_USER &&
1440             arg_action == ACTION_RUN &&
1441             sd_booted() <= 0) {
1442                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1443                 goto finish;
1444         }
1445
1446         if (arg_running_as == SYSTEMD_SYSTEM &&
1447             arg_action == ACTION_RUN &&
1448             running_in_chroot() > 0) {
1449                 log_error("Cannot be run in a chroot() environment.");
1450                 goto finish;
1451         }
1452
1453         if (arg_action == ACTION_TEST)
1454                 skip_setup = true;
1455
1456         pager_open_if_enabled();
1457
1458         if (arg_action == ACTION_HELP) {
1459                 retval = help();
1460                 goto finish;
1461         } else if (arg_action == ACTION_VERSION) {
1462                 retval = version();
1463                 goto finish;
1464         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1465                 unit_dump_config_items(stdout);
1466                 retval = EXIT_SUCCESS;
1467                 goto finish;
1468         } else if (arg_action == ACTION_DONE) {
1469                 retval = EXIT_SUCCESS;
1470                 goto finish;
1471         }
1472
1473         if (arg_running_as == SYSTEMD_USER &&
1474             !getenv("XDG_RUNTIME_DIR")) {
1475                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1476                 goto finish;
1477         }
1478
1479         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1480
1481         /* Close logging fds, in order not to confuse fdset below */
1482         log_close();
1483
1484         /* Remember open file descriptors for later deserialization */
1485         r = fdset_new_fill(&fds);
1486         if (r < 0) {
1487                 log_emergency_errno(r, "Failed to allocate fd set: %m");
1488                 error_message = "Failed to allocate fd set";
1489                 goto finish;
1490         } else
1491                 fdset_cloexec(fds, true);
1492
1493         if (arg_serialization)
1494                 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
1495
1496         if (arg_running_as == SYSTEMD_SYSTEM)
1497                 /* Become a session leader if we aren't one yet. */
1498                 setsid();
1499
1500         /* Move out of the way, so that we won't block unmounts */
1501         assert_se(chdir("/")  == 0);
1502
1503         /* Reset the console, but only if this is really init and we
1504          * are freshly booted */
1505         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN) {
1506
1507                 /* If we are init, we connect stdin/stdout/stderr to
1508                  * /dev/null and make sure we don't have a controlling
1509                  * tty. */
1510                 release_terminal();
1511
1512                 if (getpid() == 1 && !skip_setup)
1513                         console_setup();
1514         }
1515
1516         /* Open the logging devices, if possible and necessary */
1517         log_open();
1518
1519         if (arg_show_status == _SHOW_STATUS_UNSET)
1520                 arg_show_status = SHOW_STATUS_YES;
1521
1522         /* Make sure we leave a core dump without panicing the
1523          * kernel. */
1524         if (getpid() == 1) {
1525                 install_crash_handler();
1526
1527                 r = mount_cgroup_controllers(arg_join_controllers);
1528                 if (r < 0)
1529                         goto finish;
1530         }
1531
1532         if (arg_running_as == SYSTEMD_SYSTEM) {
1533                 const char *virtualization = NULL;
1534
1535                 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1536                          arg_action == ACTION_TEST ? "test " : "" );
1537
1538                 detect_virtualization(&virtualization);
1539                 if (virtualization)
1540                         log_info("Detected virtualization '%s'.", virtualization);
1541
1542                 write_container_id();
1543
1544                 log_info("Detected architecture '%s'.", architecture_to_string(uname_architecture()));
1545
1546                 if (in_initrd())
1547                         log_info("Running in initial RAM disk.");
1548
1549                 /* Let's check whether /etc is already populated. We
1550                  * don't actually really check for that, but use
1551                  * /etc/machine-id as flag file. This allows container
1552                  * managers and installers to provision a couple of
1553                  * files already. If the container manager wants to
1554                  * provision the machine ID itself it should pass
1555                  * $container_uuid to PID 1. */
1556
1557                 empty_etc = access("/etc/machine-id", F_OK) < 0;
1558                 if (empty_etc)
1559                         log_info("Running with unpopulated /etc.");
1560         } else {
1561                 _cleanup_free_ char *t;
1562
1563                 t = uid_to_name(getuid());
1564                 log_debug(PACKAGE_STRING " running in %suser mode for user "UID_FMT"/%s. (" SYSTEMD_FEATURES ")",
1565                           arg_action == ACTION_TEST ? " test" : "", getuid(), t);
1566         }
1567
1568         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1569                 if (arg_show_status > 0 || plymouth_running())
1570                         status_welcome();
1571
1572                 hostname_setup();
1573                 machine_id_setup(NULL);
1574                 loopback_setup();
1575
1576                 test_mtab();
1577                 test_usr();
1578         }
1579
1580         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1581                 watchdog_set_timeout(&arg_runtime_watchdog);
1582
1583         if (arg_timer_slack_nsec != NSEC_INFINITY)
1584                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1585                         log_error_errno(errno, "Failed to adjust timer slack: %m");
1586
1587         if (arg_capability_bounding_set_drop) {
1588                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1589                 if (r < 0) {
1590                         log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1591                         error_message = "Failed to drop capability bounding set of usermode helpers";
1592                         goto finish;
1593                 }
1594                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1595                 if (r < 0) {
1596                         log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1597                         error_message = "Failed to drop capability bounding set";
1598                         goto finish;
1599                 }
1600         }
1601
1602         if (arg_syscall_archs) {
1603                 r = enforce_syscall_archs(arg_syscall_archs);
1604                 if (r < 0) {
1605                         error_message = "Failed to set syscall architectures";
1606                         goto finish;
1607                 }
1608         }
1609
1610         if (arg_running_as == SYSTEMD_USER) {
1611                 /* Become reaper of our children */
1612                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1613                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
1614                         if (errno == EINVAL)
1615                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1616                 }
1617         }
1618
1619         if (arg_running_as == SYSTEMD_SYSTEM) {
1620                 bump_rlimit_nofile(&saved_rlimit_nofile);
1621
1622                 if (empty_etc) {
1623                         r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_FULL, false, NULL, 0);
1624                         if (r < 0)
1625                                 log_warning_errno(r, "Failed to populate /etc with preset unit settings, ignoring: %m");
1626                         else
1627                                 log_info("Populated /etc with preset unit settings.");
1628                 }
1629         }
1630
1631         r = manager_new(arg_running_as, arg_action == ACTION_TEST, &m);
1632         if (r < 0) {
1633                 log_emergency_errno(r, "Failed to allocate manager object: %m");
1634                 error_message = "Failed to allocate manager object";
1635                 goto finish;
1636         }
1637
1638         m->confirm_spawn = arg_confirm_spawn;
1639         m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
1640         m->default_std_output = arg_default_std_output;
1641         m->default_std_error = arg_default_std_error;
1642         m->default_restart_usec = arg_default_restart_usec;
1643         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1644         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1645         m->default_start_limit_interval = arg_default_start_limit_interval;
1646         m->default_start_limit_burst = arg_default_start_limit_burst;
1647         m->default_cpu_accounting = arg_default_cpu_accounting;
1648         m->default_blockio_accounting = arg_default_blockio_accounting;
1649         m->default_memory_accounting = arg_default_memory_accounting;
1650         m->runtime_watchdog = arg_runtime_watchdog;
1651         m->shutdown_watchdog = arg_shutdown_watchdog;
1652
1653         m->userspace_timestamp = userspace_timestamp;
1654         m->kernel_timestamp = kernel_timestamp;
1655         m->initrd_timestamp = initrd_timestamp;
1656         m->security_start_timestamp = security_start_timestamp;
1657         m->security_finish_timestamp = security_finish_timestamp;
1658
1659         manager_set_default_rlimits(m, arg_default_rlimit);
1660         manager_environment_add(m, NULL, arg_default_environment);
1661         manager_set_show_status(m, arg_show_status);
1662         manager_set_first_boot(m, empty_etc);
1663
1664         /* Remember whether we should queue the default job */
1665         queue_default_job = !arg_serialization || arg_switched_root;
1666
1667         before_startup = now(CLOCK_MONOTONIC);
1668
1669         r = manager_startup(m, arg_serialization, fds);
1670         if (r < 0)
1671                 log_error_errno(r, "Failed to fully start up daemon: %m");
1672
1673         /* This will close all file descriptors that were opened, but
1674          * not claimed by any unit. */
1675         fdset_free(fds);
1676         fds = NULL;
1677
1678         if (arg_serialization) {
1679                 fclose(arg_serialization);
1680                 arg_serialization = NULL;
1681         }
1682
1683         if (queue_default_job) {
1684                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1685                 Unit *target = NULL;
1686                 Job *default_unit_job;
1687
1688                 log_debug("Activating default unit: %s", arg_default_unit);
1689
1690                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1691                 if (r < 0)
1692                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1693                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1694                         log_error_errno(target->load_error, "Failed to load default target: %m");
1695                 else if (target->load_state == UNIT_MASKED)
1696                         log_error("Default target masked.");
1697
1698                 if (!target || target->load_state != UNIT_LOADED) {
1699                         log_info("Trying to load rescue target...");
1700
1701                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1702                         if (r < 0) {
1703                                 log_emergency("Failed to load rescue target: %s", bus_error_message(&error, r));
1704                                 error_message = "Failed to load rescue target";
1705                                 goto finish;
1706                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1707                                 log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
1708                                 error_message = "Failed to load rescue target";
1709                                 goto finish;
1710                         } else if (target->load_state == UNIT_MASKED) {
1711                                 log_emergency("Rescue target masked.");
1712                                 error_message = "Rescue target masked";
1713                                 goto finish;
1714                         }
1715                 }
1716
1717                 assert(target->load_state == UNIT_LOADED);
1718
1719                 if (arg_action == ACTION_TEST) {
1720                         printf("-> By units:\n");
1721                         manager_dump_units(m, stdout, "\t");
1722                 }
1723
1724                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1725                 if (r == -EPERM) {
1726                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1727
1728                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1729                         if (r < 0) {
1730                                 log_emergency("Failed to start default target: %s", bus_error_message(&error, r));
1731                                 error_message = "Failed to start default target";
1732                                 goto finish;
1733                         }
1734                 } else if (r < 0) {
1735                         log_emergency("Failed to isolate default target: %s", bus_error_message(&error, r));
1736                         error_message = "Failed to isolate default target";
1737                         goto finish;
1738                 }
1739
1740                 m->default_unit_job_id = default_unit_job->id;
1741
1742                 after_startup = now(CLOCK_MONOTONIC);
1743                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1744                          "Loaded units and determined initial transaction in %s.",
1745                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
1746
1747                 if (arg_action == ACTION_TEST) {
1748                         printf("-> By jobs:\n");
1749                         manager_dump_jobs(m, stdout, "\t");
1750                         retval = EXIT_SUCCESS;
1751                         goto finish;
1752                 }
1753         }
1754
1755         for (;;) {
1756                 r = manager_loop(m);
1757                 if (r < 0) {
1758                         log_emergency_errno(r, "Failed to run main loop: %m");
1759                         error_message = "Failed to run main loop";
1760                         goto finish;
1761                 }
1762
1763                 switch (m->exit_code) {
1764
1765                 case MANAGER_EXIT:
1766                         retval = EXIT_SUCCESS;
1767                         log_debug("Exit.");
1768                         goto finish;
1769
1770                 case MANAGER_RELOAD:
1771                         log_info("Reloading.");
1772                         r = manager_reload(m);
1773                         if (r < 0)
1774                                 log_error_errno(r, "Failed to reload: %m");
1775                         break;
1776
1777                 case MANAGER_REEXECUTE:
1778
1779                         if (prepare_reexecute(m, &arg_serialization, &fds, false) < 0) {
1780                                 error_message = "Failed to prepare for reexection";
1781                                 goto finish;
1782                         }
1783
1784                         reexecute = true;
1785                         log_notice("Reexecuting.");
1786                         goto finish;
1787
1788                 case MANAGER_SWITCH_ROOT:
1789                         /* Steal the switch root parameters */
1790                         switch_root_dir = m->switch_root;
1791                         switch_root_init = m->switch_root_init;
1792                         m->switch_root = m->switch_root_init = NULL;
1793
1794                         if (!switch_root_init)
1795                                 if (prepare_reexecute(m, &arg_serialization, &fds, true) < 0) {
1796                                         error_message = "Failed to prepare for reexection";
1797                                         goto finish;
1798                                 }
1799
1800                         reexecute = true;
1801                         log_notice("Switching root.");
1802                         goto finish;
1803
1804                 case MANAGER_REBOOT:
1805                 case MANAGER_POWEROFF:
1806                 case MANAGER_HALT:
1807                 case MANAGER_KEXEC: {
1808                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1809                                 [MANAGER_REBOOT] = "reboot",
1810                                 [MANAGER_POWEROFF] = "poweroff",
1811                                 [MANAGER_HALT] = "halt",
1812                                 [MANAGER_KEXEC] = "kexec"
1813                         };
1814
1815                         assert_se(shutdown_verb = table[m->exit_code]);
1816                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1817
1818                         log_notice("Shutting down.");
1819                         goto finish;
1820                 }
1821
1822                 default:
1823                         assert_not_reached("Unknown exit code.");
1824                 }
1825         }
1826
1827 finish:
1828         pager_close();
1829
1830         if (m)
1831                 arg_shutdown_watchdog = m->shutdown_watchdog;
1832         m = manager_free(m);
1833
1834         for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++) {
1835                 free(arg_default_rlimit[j]);
1836                 arg_default_rlimit[j] = NULL;
1837         }
1838
1839         free(arg_default_unit);
1840         arg_default_unit = NULL;
1841
1842         free_join_controllers();
1843
1844         strv_free(arg_default_environment);
1845         arg_default_environment = NULL;
1846
1847         set_free(arg_syscall_archs);
1848         arg_syscall_archs = NULL;
1849
1850         mac_selinux_finish();
1851
1852         if (reexecute) {
1853                 const char **args;
1854                 unsigned i, args_size;
1855
1856                 /* Close and disarm the watchdog, so that the new
1857                  * instance can reinitialize it, but doesn't get
1858                  * rebooted while we do that */
1859                 watchdog_close(true);
1860
1861                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1862                  * that the new systemd can pass the kernel default to
1863                  * its child processes */
1864                 if (saved_rlimit_nofile.rlim_cur > 0)
1865                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1866
1867                 if (switch_root_dir) {
1868                         /* Kill all remaining processes from the
1869                          * initrd, but don't wait for them, so that we
1870                          * can handle the SIGCHLD for them after
1871                          * deserializing. */
1872                         broadcast_signal(SIGTERM, false, true);
1873
1874                         /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1875                         r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1876                         if (r < 0)
1877                                 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1878                 }
1879
1880                 args_size = MAX(6, argc+1);
1881                 args = newa(const char*, args_size);
1882
1883                 if (!switch_root_init) {
1884                         char sfd[DECIMAL_STR_MAX(int) + 1];
1885
1886                         /* First try to spawn ourselves with the right
1887                          * path, and with full serialization. We do
1888                          * this only if the user didn't specify an
1889                          * explicit init to spawn. */
1890
1891                         assert(arg_serialization);
1892                         assert(fds);
1893
1894                         xsprintf(sfd, "%i", fileno(arg_serialization));
1895
1896                         i = 0;
1897                         args[i++] = SYSTEMD_BINARY_PATH;
1898                         if (switch_root_dir)
1899                                 args[i++] = "--switched-root";
1900                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1901                         args[i++] = "--deserialize";
1902                         args[i++] = sfd;
1903                         args[i++] = NULL;
1904
1905                         /* do not pass along the environment we inherit from the kernel or initrd */
1906                         if (switch_root_dir)
1907                                 clearenv();
1908
1909                         assert(i <= args_size);
1910                         execv(args[0], (char* const*) args);
1911                 }
1912
1913                 /* Try the fallback, if there is any, without any
1914                  * serialization. We pass the original argv[] and
1915                  * envp[]. (Well, modulo the ordering changes due to
1916                  * getopt() in argv[], and some cleanups in envp[],
1917                  * but let's hope that doesn't matter.) */
1918
1919                 if (arg_serialization) {
1920                         fclose(arg_serialization);
1921                         arg_serialization = NULL;
1922                 }
1923
1924                 if (fds) {
1925                         fdset_free(fds);
1926                         fds = NULL;
1927                 }
1928
1929                 /* Reopen the console */
1930                 make_console_stdio();
1931
1932                 for (j = 1, i = 1; j < (unsigned) argc; j++)
1933                         args[i++] = argv[j];
1934                 args[i++] = NULL;
1935                 assert(i <= args_size);
1936
1937                 /* Reenable any blocked signals, especially important
1938                  * if we switch from initial ramdisk to init=... */
1939                 reset_all_signal_handlers();
1940                 reset_signal_mask();
1941
1942                 if (switch_root_init) {
1943                         args[0] = switch_root_init;
1944                         execv(args[0], (char* const*) args);
1945                         log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1946                 }
1947
1948                 args[0] = "/sbin/init";
1949                 execv(args[0], (char* const*) args);
1950
1951                 if (errno == ENOENT) {
1952                         log_warning("No /sbin/init, trying fallback");
1953
1954                         args[0] = "/bin/sh";
1955                         args[1] = NULL;
1956                         execv(args[0], (char* const*) args);
1957                         log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1958                 } else
1959                         log_warning_errno(errno, "Failed to execute /sbin/init, giving up: %m");
1960         }
1961
1962         if (arg_serialization) {
1963                 fclose(arg_serialization);
1964                 arg_serialization = NULL;
1965         }
1966
1967         if (fds) {
1968                 fdset_free(fds);
1969                 fds = NULL;
1970         }
1971
1972 #ifdef HAVE_VALGRIND_VALGRIND_H
1973         /* If we are PID 1 and running under valgrind, then let's exit
1974          * here explicitly. valgrind will only generate nice output on
1975          * exit(), not on exec(), hence let's do the former not the
1976          * latter here. */
1977         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1978                 return 0;
1979 #endif
1980
1981         if (shutdown_verb) {
1982                 char log_level[DECIMAL_STR_MAX(int) + 1];
1983                 const char* command_line[9] = {
1984                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1985                         shutdown_verb,
1986                         "--log-level", log_level,
1987                         "--log-target",
1988                 };
1989                 unsigned pos = 5;
1990                 _cleanup_strv_free_ char **env_block = NULL;
1991
1992                 assert(command_line[pos] == NULL);
1993                 env_block = strv_copy(environ);
1994
1995                 xsprintf(log_level, "%d", log_get_max_level());
1996
1997                 switch (log_get_target()) {
1998                 case LOG_TARGET_KMSG:
1999                 case LOG_TARGET_JOURNAL_OR_KMSG:
2000                 case LOG_TARGET_SYSLOG_OR_KMSG:
2001                         command_line[pos++] = "kmsg";
2002                         break;
2003
2004                 case LOG_TARGET_CONSOLE:
2005                 default:
2006                         command_line[pos++] = "console";
2007                         break;
2008                 };
2009
2010                 if (log_get_show_color())
2011                         command_line[pos++] = "--log-color";
2012
2013                 if (log_get_show_location())
2014                         command_line[pos++] = "--log-location";
2015
2016                 assert(pos < ELEMENTSOF(command_line));
2017
2018                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
2019                         char *e;
2020
2021                         /* If we reboot let's set the shutdown
2022                          * watchdog and tell the shutdown binary to
2023                          * repeatedly ping it */
2024                         r = watchdog_set_timeout(&arg_shutdown_watchdog);
2025                         watchdog_close(r < 0);
2026
2027                         /* Tell the binary how often to ping, ignore failure */
2028                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
2029                                 strv_push(&env_block, e);
2030                 } else
2031                         watchdog_close(true);
2032
2033                 /* Avoid the creation of new processes forked by the
2034                  * kernel; at this point, we will not listen to the
2035                  * signals anyway */
2036                 if (detect_container(NULL) <= 0)
2037                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
2038
2039                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
2040                 log_error_errno(errno, "Failed to execute shutdown binary, %s: %m",
2041                           getpid() == 1 ? "freezing" : "quitting");
2042         }
2043
2044         if (getpid() == 1) {
2045                 if (error_message)
2046                         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2047                                               ANSI_HIGHLIGHT_RED_ON "!!!!!!" ANSI_HIGHLIGHT_OFF,
2048                                               "%s, freezing.", error_message);
2049                 freeze();
2050         }
2051
2052         return retval;
2053 }