chiark / gitweb /
core: use raw_clone instead of fork in signal handler
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38 #ifdef HAVE_SECCOMP
39 #include <seccomp.h>
40 #endif
41
42 #include "sd-daemon.h"
43 #include "sd-messages.h"
44 #include "sd-bus.h"
45 #include "manager.h"
46 #include "log.h"
47 #include "load-fragment.h"
48 #include "fdset.h"
49 #include "special.h"
50 #include "conf-parser.h"
51 #include "missing.h"
52 #include "label.h"
53 #include "pager.h"
54 #include "build.h"
55 #include "strv.h"
56 #include "def.h"
57 #include "virt.h"
58 #include "architecture.h"
59 #include "watchdog.h"
60 #include "path-util.h"
61 #include "switch-root.h"
62 #include "capability.h"
63 #include "killall.h"
64 #include "env-util.h"
65 #include "clock-util.h"
66 #include "fileio.h"
67 #include "dbus-manager.h"
68 #include "bus-error.h"
69 #include "bus-util.h"
70
71 #include "mount-setup.h"
72 #include "loopback-setup.h"
73 #include "hostname-setup.h"
74 #include "machine-id-setup.h"
75 #include "selinux-setup.h"
76 #include "ima-setup.h"
77 #include "smack-setup.h"
78 #include "kmod-setup.h"
79
80 static enum {
81         ACTION_RUN,
82         ACTION_HELP,
83         ACTION_VERSION,
84         ACTION_TEST,
85         ACTION_DUMP_CONFIGURATION_ITEMS,
86         ACTION_DONE
87 } arg_action = ACTION_RUN;
88 static char *arg_default_unit = NULL;
89 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
90 static bool arg_dump_core = true;
91 static bool arg_crash_shell = false;
92 static int arg_crash_chvt = -1;
93 static bool arg_confirm_spawn = false;
94 static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
95 static bool arg_switched_root = false;
96 static int arg_no_pager = -1;
97 static char ***arg_join_controllers = NULL;
98 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
99 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
100 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
101 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
102 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
103 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
104 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
105 static usec_t arg_runtime_watchdog = 0;
106 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
107 static char **arg_default_environment = NULL;
108 static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
109 static uint64_t arg_capability_bounding_set_drop = 0;
110 static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
111 static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
112 static Set* arg_syscall_archs = NULL;
113 static FILE* arg_serialization = NULL;
114 static bool arg_default_cpu_accounting = false;
115 static bool arg_default_blockio_accounting = false;
116 static bool arg_default_memory_accounting = false;
117
118 static void nop_handler(int sig) {}
119
120 static void pager_open_if_enabled(void) {
121
122         if (arg_no_pager <= 0)
123                 return;
124
125         pager_open(false);
126 }
127
128 noreturn static void crash(int sig) {
129
130         if (getpid() != 1)
131                 /* Pass this on immediately, if this is not PID 1 */
132                 raise(sig);
133         else if (!arg_dump_core)
134                 log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
135         else {
136                 struct sigaction sa = {
137                         .sa_handler = nop_handler,
138                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
139                 };
140                 pid_t pid;
141
142                 /* We want to wait for the core process, hence let's enable SIGCHLD */
143                 sigaction(SIGCHLD, &sa, NULL);
144
145                 pid = raw_clone(SIGCHLD, NULL);
146                 if (pid < 0)
147                         log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
148
149                 else if (pid == 0) {
150                         struct rlimit rl = {};
151
152                         /* Enable default signal handler for core dump */
153                         zero(sa);
154                         sa.sa_handler = SIG_DFL;
155                         sigaction(sig, &sa, NULL);
156
157                         /* Don't limit the core dump size */
158                         rl.rlim_cur = RLIM_INFINITY;
159                         rl.rlim_max = RLIM_INFINITY;
160                         setrlimit(RLIMIT_CORE, &rl);
161
162                         /* Just to be sure... */
163                         chdir("/");
164
165                         /* Raise the signal again */
166                         pid = raw_getpid();
167                         kill(pid, sig); /* raise() would kill the parent */
168
169                         assert_not_reached("We shouldn't be here...");
170                         _exit(1);
171                 } else {
172                         siginfo_t status;
173                         int r;
174
175                         /* Order things nicely. */
176                         r = wait_for_terminate(pid, &status);
177                         if (r < 0)
178                                 log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
179                         else if (status.si_code != CLD_DUMPED)
180                                 log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
181                                               signal_to_string(sig),
182                                               pid, sigchld_code_to_string(status.si_code),
183                                               status.si_status,
184                                               strna(status.si_code == CLD_EXITED
185                                                     ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
186                                                     : signal_to_string(status.si_status)));
187                         else
188                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
189                 }
190         }
191
192         if (arg_crash_chvt)
193                 chvt(arg_crash_chvt);
194
195         if (arg_crash_shell) {
196                 struct sigaction sa = {
197                         .sa_handler = SIG_IGN,
198                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
199                 };
200                 pid_t pid;
201
202                 log_info("Executing crash shell in 10s...");
203                 sleep(10);
204
205                 /* Let the kernel reap children for us */
206                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
207
208                 pid = raw_clone(SIGCHLD, NULL);
209                 if (pid < 0)
210                         log_emergency_errno(errno, "Failed to fork off crash shell: %m");
211                 else if (pid == 0) {
212                         make_console_stdio();
213                         execle("/bin/sh", "/bin/sh", NULL, environ);
214
215                         log_emergency_errno(errno, "execle() failed: %m");
216                         _exit(1);
217                 } else
218                         log_info("Successfully spawned crash shell as PID "PID_FMT".", pid);
219         }
220
221         log_emergency("Freezing execution.");
222         freeze();
223 }
224
225 static void install_crash_handler(void) {
226         struct sigaction sa = {
227                 .sa_handler = crash,
228                 .sa_flags = SA_NODEFER,
229         };
230
231         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
232 }
233
234 static int console_setup(void) {
235         _cleanup_close_ int tty_fd = -1;
236         int r;
237
238         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
239         if (tty_fd < 0)
240                 return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
241
242         /* We don't want to force text mode.  plymouth may be showing
243          * pictures already from initrd. */
244         r = reset_terminal_fd(tty_fd, false);
245         if (r < 0)
246                 return log_error_errno(r, "Failed to reset /dev/console: %m");
247
248         return 0;
249 }
250
251 static int set_default_unit(const char *u) {
252         char *c;
253
254         assert(u);
255
256         c = strdup(u);
257         if (!c)
258                 return -ENOMEM;
259
260         free(arg_default_unit);
261         arg_default_unit = c;
262
263         return 0;
264 }
265
266 static int parse_proc_cmdline_item(const char *key, const char *value) {
267
268         static const char * const rlmap[] = {
269                 "emergency", SPECIAL_EMERGENCY_TARGET,
270                 "-b",        SPECIAL_EMERGENCY_TARGET,
271                 "rescue",    SPECIAL_RESCUE_TARGET,
272                 "single",    SPECIAL_RESCUE_TARGET,
273                 "-s",        SPECIAL_RESCUE_TARGET,
274                 "s",         SPECIAL_RESCUE_TARGET,
275                 "S",         SPECIAL_RESCUE_TARGET,
276                 "1",         SPECIAL_RESCUE_TARGET,
277                 "2",         SPECIAL_RUNLEVEL2_TARGET,
278                 "3",         SPECIAL_RUNLEVEL3_TARGET,
279                 "4",         SPECIAL_RUNLEVEL4_TARGET,
280                 "5",         SPECIAL_RUNLEVEL5_TARGET,
281         };
282         int r;
283
284         assert(key);
285
286         if (streq(key, "systemd.unit") && value) {
287
288                 if (!in_initrd())
289                         return set_default_unit(value);
290
291         } else if (streq(key, "rd.systemd.unit") && value) {
292
293                 if (in_initrd())
294                         return set_default_unit(value);
295
296         } else if (streq(key, "systemd.dump_core") && value) {
297
298                 r = parse_boolean(value);
299                 if (r < 0)
300                         log_warning("Failed to parse dump core switch %s. Ignoring.", value);
301                 else
302                         arg_dump_core = r;
303
304         } else if (streq(key, "systemd.crash_shell") && value) {
305
306                 r = parse_boolean(value);
307                 if (r < 0)
308                         log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
309                 else
310                         arg_crash_shell = r;
311
312         } else if (streq(key, "systemd.crash_chvt") && value) {
313
314                 if (safe_atoi(value, &r) < 0)
315                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
316                 else
317                         arg_crash_chvt = r;
318
319         } else if (streq(key, "systemd.confirm_spawn") && value) {
320
321                 r = parse_boolean(value);
322                 if (r < 0)
323                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", value);
324                 else
325                         arg_confirm_spawn = r;
326
327         } else if (streq(key, "systemd.show_status") && value) {
328
329                 r = parse_show_status(value, &arg_show_status);
330                 if (r < 0)
331                         log_warning("Failed to parse show status switch %s. Ignoring.", value);
332
333         } else if (streq(key, "systemd.default_standard_output") && value) {
334
335                 r = exec_output_from_string(value);
336                 if (r < 0)
337                         log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
338                 else
339                         arg_default_std_output = r;
340
341         } else if (streq(key, "systemd.default_standard_error") && value) {
342
343                 r = exec_output_from_string(value);
344                 if (r < 0)
345                         log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
346                 else
347                         arg_default_std_error = r;
348
349         } else if (streq(key, "systemd.setenv") && value) {
350
351                 if (env_assignment_is_valid(value)) {
352                         char **env;
353
354                         env = strv_env_set(arg_default_environment, value);
355                         if (env)
356                                 arg_default_environment = env;
357                         else
358                                 log_warning_errno(ENOMEM, "Setting environment variable '%s' failed, ignoring: %m", value);
359                 } else
360                         log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
361
362         } else if (streq(key, "quiet") && !value) {
363
364                 log_set_max_level(LOG_NOTICE);
365
366                 if (arg_show_status == _SHOW_STATUS_UNSET)
367                         arg_show_status = SHOW_STATUS_AUTO;
368
369         } else if (streq(key, "debug") && !value) {
370
371                 /* Note that log_parse_environment() handles 'debug'
372                  * too, and sets the log level to LOG_DEBUG. */
373
374                 if (detect_container(NULL) > 0)
375                         log_set_target(LOG_TARGET_CONSOLE);
376
377         } else if (!in_initrd() && !value) {
378                 unsigned i;
379
380                 /* SysV compatibility */
381                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
382                         if (streq(key, rlmap[i]))
383                                 return set_default_unit(rlmap[i+1]);
384         }
385
386         return 0;
387 }
388
389 #define DEFINE_SETTER(name, func, descr)                              \
390         static int name(const char *unit,                             \
391                         const char *filename,                         \
392                         unsigned line,                                \
393                         const char *section,                          \
394                         unsigned section_line,                        \
395                         const char *lvalue,                           \
396                         int ltype,                                    \
397                         const char *rvalue,                           \
398                         void *data,                                   \
399                         void *userdata) {                             \
400                                                                       \
401                 int r;                                                \
402                                                                       \
403                 assert(filename);                                     \
404                 assert(lvalue);                                       \
405                 assert(rvalue);                                       \
406                                                                       \
407                 r = func(rvalue);                                     \
408                 if (r < 0)                                            \
409                         log_syntax(unit, LOG_ERR, filename, line, -r, \
410                                    "Invalid " descr "'%s': %s",       \
411                                    rvalue, strerror(-r));             \
412                                                                       \
413                 return 0;                                             \
414         }
415
416 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
417 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
418 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
419 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
420
421 static int config_parse_cpu_affinity2(
422                 const char *unit,
423                 const char *filename,
424                 unsigned line,
425                 const char *section,
426                 unsigned section_line,
427                 const char *lvalue,
428                 int ltype,
429                 const char *rvalue,
430                 void *data,
431                 void *userdata) {
432
433         const char *word, *state;
434         size_t l;
435         cpu_set_t *c = NULL;
436         unsigned ncpus = 0;
437
438         assert(filename);
439         assert(lvalue);
440         assert(rvalue);
441
442         FOREACH_WORD_QUOTED(word, l, rvalue, state) {
443                 char *t;
444                 int r;
445                 unsigned cpu;
446
447                 if (!(t = strndup(word, l)))
448                         return log_oom();
449
450                 r = safe_atou(t, &cpu);
451                 free(t);
452
453                 if (!c)
454                         if (!(c = cpu_set_malloc(&ncpus)))
455                                 return log_oom();
456
457                 if (r < 0 || cpu >= ncpus) {
458                         log_syntax(unit, LOG_ERR, filename, line, -r,
459                                    "Failed to parse CPU affinity '%s'", rvalue);
460                         CPU_FREE(c);
461                         return -EBADMSG;
462                 }
463
464                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
465         }
466         if (!isempty(state))
467                 log_syntax(unit, LOG_ERR, filename, line, EINVAL,
468                            "Trailing garbage, ignoring.");
469
470         if (c) {
471                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
472                         log_unit_warning(unit, "Failed to set CPU affinity: %m");
473
474                 CPU_FREE(c);
475         }
476
477         return 0;
478 }
479
480 static int config_parse_show_status(
481                 const char* unit,
482                 const char *filename,
483                 unsigned line,
484                 const char *section,
485                 unsigned section_line,
486                 const char *lvalue,
487                 int ltype,
488                 const char *rvalue,
489                 void *data,
490                 void *userdata) {
491
492         int k;
493         ShowStatus *b = data;
494
495         assert(filename);
496         assert(lvalue);
497         assert(rvalue);
498         assert(data);
499
500         k = parse_show_status(rvalue, b);
501         if (k < 0) {
502                 log_syntax(unit, LOG_ERR, filename, line, -k,
503                            "Failed to parse show status setting, ignoring: %s", rvalue);
504                 return 0;
505         }
506
507         return 0;
508 }
509
510 static void strv_free_free(char ***l) {
511         char ***i;
512
513         if (!l)
514                 return;
515
516         for (i = l; *i; i++)
517                 strv_free(*i);
518
519         free(l);
520 }
521
522 static void free_join_controllers(void) {
523         strv_free_free(arg_join_controllers);
524         arg_join_controllers = NULL;
525 }
526
527 static int config_parse_join_controllers(const char *unit,
528                                          const char *filename,
529                                          unsigned line,
530                                          const char *section,
531                                          unsigned section_line,
532                                          const char *lvalue,
533                                          int ltype,
534                                          const char *rvalue,
535                                          void *data,
536                                          void *userdata) {
537
538         unsigned n = 0;
539         const char *word, *state;
540         size_t length;
541
542         assert(filename);
543         assert(lvalue);
544         assert(rvalue);
545
546         free_join_controllers();
547
548         FOREACH_WORD_QUOTED(word, length, rvalue, state) {
549                 char *s, **l;
550
551                 s = strndup(word, length);
552                 if (!s)
553                         return log_oom();
554
555                 l = strv_split(s, ",");
556                 free(s);
557
558                 strv_uniq(l);
559
560                 if (strv_length(l) <= 1) {
561                         strv_free(l);
562                         continue;
563                 }
564
565                 if (!arg_join_controllers) {
566                         arg_join_controllers = new(char**, 2);
567                         if (!arg_join_controllers) {
568                                 strv_free(l);
569                                 return log_oom();
570                         }
571
572                         arg_join_controllers[0] = l;
573                         arg_join_controllers[1] = NULL;
574
575                         n = 1;
576                 } else {
577                         char ***a;
578                         char ***t;
579
580                         t = new0(char**, n+2);
581                         if (!t) {
582                                 strv_free(l);
583                                 return log_oom();
584                         }
585
586                         n = 0;
587
588                         for (a = arg_join_controllers; *a; a++) {
589
590                                 if (strv_overlap(*a, l)) {
591                                         if (strv_extend_strv(&l, *a) < 0) {
592                                                 strv_free(l);
593                                                 strv_free_free(t);
594                                                 return log_oom();
595                                         }
596
597                                 } else {
598                                         char **c;
599
600                                         c = strv_copy(*a);
601                                         if (!c) {
602                                                 strv_free(l);
603                                                 strv_free_free(t);
604                                                 return log_oom();
605                                         }
606
607                                         t[n++] = c;
608                                 }
609                         }
610
611                         t[n++] = strv_uniq(l);
612
613                         strv_free_free(arg_join_controllers);
614                         arg_join_controllers = t;
615                 }
616         }
617         if (!isempty(state))
618                 log_syntax(unit, LOG_ERR, filename, line, EINVAL,
619                            "Trailing garbage, ignoring.");
620
621         return 0;
622 }
623
624 static int parse_config_file(void) {
625
626         const ConfigTableItem items[] = {
627                 { "Manager", "LogLevel",                  config_parse_level2,           0, NULL                                   },
628                 { "Manager", "LogTarget",                 config_parse_target,           0, NULL                                   },
629                 { "Manager", "LogColor",                  config_parse_color,            0, NULL                                   },
630                 { "Manager", "LogLocation",               config_parse_location,         0, NULL                                   },
631                 { "Manager", "DumpCore",                  config_parse_bool,             0, &arg_dump_core                         },
632                 { "Manager", "CrashShell",                config_parse_bool,             0, &arg_crash_shell                       },
633                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
634                 { "Manager", "CrashChVT",                 config_parse_int,              0, &arg_crash_chvt                        },
635                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, NULL                                   },
636                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
637                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
638                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
639                 { "Manager", "CapabilityBoundingSet",     config_parse_bounding_set,     0, &arg_capability_bounding_set_drop      },
640 #ifdef HAVE_SECCOMP
641                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
642 #endif
643                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
644                 { "Manager", "DefaultTimerAccuracySec",   config_parse_sec,              0, &arg_default_timer_accuracy_usec       },
645                 { "Manager", "DefaultStandardOutput",     config_parse_output,           0, &arg_default_std_output                },
646                 { "Manager", "DefaultStandardError",      config_parse_output,           0, &arg_default_std_error                 },
647                 { "Manager", "DefaultTimeoutStartSec",    config_parse_sec,              0, &arg_default_timeout_start_usec        },
648                 { "Manager", "DefaultTimeoutStopSec",     config_parse_sec,              0, &arg_default_timeout_stop_usec         },
649                 { "Manager", "DefaultRestartSec",         config_parse_sec,              0, &arg_default_restart_usec              },
650                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,              0, &arg_default_start_limit_interval      },
651                 { "Manager", "DefaultStartLimitBurst",    config_parse_unsigned,         0, &arg_default_start_limit_burst         },
652                 { "Manager", "DefaultEnvironment",        config_parse_environ,          0, &arg_default_environment               },
653                 { "Manager", "DefaultLimitCPU",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CPU]        },
654                 { "Manager", "DefaultLimitFSIZE",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_FSIZE]      },
655                 { "Manager", "DefaultLimitDATA",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_DATA]       },
656                 { "Manager", "DefaultLimitSTACK",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_STACK]      },
657                 { "Manager", "DefaultLimitCORE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CORE]       },
658                 { "Manager", "DefaultLimitRSS",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RSS]        },
659                 { "Manager", "DefaultLimitNOFILE",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NOFILE]     },
660                 { "Manager", "DefaultLimitAS",            config_parse_limit,            0, &arg_default_rlimit[RLIMIT_AS]         },
661                 { "Manager", "DefaultLimitNPROC",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NPROC]      },
662                 { "Manager", "DefaultLimitMEMLOCK",       config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MEMLOCK]    },
663                 { "Manager", "DefaultLimitLOCKS",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_LOCKS]      },
664                 { "Manager", "DefaultLimitSIGPENDING",    config_parse_limit,            0, &arg_default_rlimit[RLIMIT_SIGPENDING] },
665                 { "Manager", "DefaultLimitMSGQUEUE",      config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MSGQUEUE]   },
666                 { "Manager", "DefaultLimitNICE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NICE]       },
667                 { "Manager", "DefaultLimitRTPRIO",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTPRIO]     },
668                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTTIME]     },
669                 { "Manager", "DefaultCPUAccounting",      config_parse_bool,             0, &arg_default_cpu_accounting            },
670                 { "Manager", "DefaultBlockIOAccounting",  config_parse_bool,             0, &arg_default_blockio_accounting        },
671                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
672                 {}
673         };
674
675         const char *fn, *conf_dirs_nulstr;
676
677         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
678         conf_dirs_nulstr = arg_running_as == SYSTEMD_SYSTEM ? CONF_DIRS_NULSTR("systemd/system.conf") : CONF_DIRS_NULSTR("systemd/user.conf");
679         config_parse_many(fn, conf_dirs_nulstr, "Manager\0",
680                           config_item_table_lookup, items, false, NULL);
681
682         return 0;
683 }
684
685 static int parse_argv(int argc, char *argv[]) {
686
687         enum {
688                 ARG_LOG_LEVEL = 0x100,
689                 ARG_LOG_TARGET,
690                 ARG_LOG_COLOR,
691                 ARG_LOG_LOCATION,
692                 ARG_UNIT,
693                 ARG_SYSTEM,
694                 ARG_USER,
695                 ARG_TEST,
696                 ARG_NO_PAGER,
697                 ARG_VERSION,
698                 ARG_DUMP_CONFIGURATION_ITEMS,
699                 ARG_DUMP_CORE,
700                 ARG_CRASH_SHELL,
701                 ARG_CONFIRM_SPAWN,
702                 ARG_SHOW_STATUS,
703                 ARG_DESERIALIZE,
704                 ARG_SWITCHED_ROOT,
705                 ARG_DEFAULT_STD_OUTPUT,
706                 ARG_DEFAULT_STD_ERROR
707         };
708
709         static const struct option options[] = {
710                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
711                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
712                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
713                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
714                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
715                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
716                 { "user",                     no_argument,       NULL, ARG_USER                     },
717                 { "test",                     no_argument,       NULL, ARG_TEST                     },
718                 { "no-pager",                 no_argument,       NULL, ARG_NO_PAGER                 },
719                 { "help",                     no_argument,       NULL, 'h'                          },
720                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
721                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
722                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
723                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
724                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
725                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
726                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
727                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
728                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
729                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
730                 {}
731         };
732
733         int c, r;
734
735         assert(argc >= 1);
736         assert(argv);
737
738         if (getpid() == 1)
739                 opterr = 0;
740
741         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
742
743                 switch (c) {
744
745                 case ARG_LOG_LEVEL:
746                         r = log_set_max_level_from_string(optarg);
747                         if (r < 0) {
748                                 log_error("Failed to parse log level %s.", optarg);
749                                 return r;
750                         }
751
752                         break;
753
754                 case ARG_LOG_TARGET:
755                         r = log_set_target_from_string(optarg);
756                         if (r < 0) {
757                                 log_error("Failed to parse log target %s.", optarg);
758                                 return r;
759                         }
760
761                         break;
762
763                 case ARG_LOG_COLOR:
764
765                         if (optarg) {
766                                 r = log_show_color_from_string(optarg);
767                                 if (r < 0) {
768                                         log_error("Failed to parse log color setting %s.", optarg);
769                                         return r;
770                                 }
771                         } else
772                                 log_show_color(true);
773
774                         break;
775
776                 case ARG_LOG_LOCATION:
777                         if (optarg) {
778                                 r = log_show_location_from_string(optarg);
779                                 if (r < 0) {
780                                         log_error("Failed to parse log location setting %s.", optarg);
781                                         return r;
782                                 }
783                         } else
784                                 log_show_location(true);
785
786                         break;
787
788                 case ARG_DEFAULT_STD_OUTPUT:
789                         r = exec_output_from_string(optarg);
790                         if (r < 0) {
791                                 log_error("Failed to parse default standard output setting %s.", optarg);
792                                 return r;
793                         } else
794                                 arg_default_std_output = r;
795                         break;
796
797                 case ARG_DEFAULT_STD_ERROR:
798                         r = exec_output_from_string(optarg);
799                         if (r < 0) {
800                                 log_error("Failed to parse default standard error output setting %s.", optarg);
801                                 return r;
802                         } else
803                                 arg_default_std_error = r;
804                         break;
805
806                 case ARG_UNIT:
807
808                         r = set_default_unit(optarg);
809                         if (r < 0)
810                                 return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
811
812                         break;
813
814                 case ARG_SYSTEM:
815                         arg_running_as = SYSTEMD_SYSTEM;
816                         break;
817
818                 case ARG_USER:
819                         arg_running_as = SYSTEMD_USER;
820                         break;
821
822                 case ARG_TEST:
823                         arg_action = ACTION_TEST;
824                         if (arg_no_pager < 0)
825                                 arg_no_pager = true;
826                         break;
827
828                 case ARG_NO_PAGER:
829                         arg_no_pager = true;
830                         break;
831
832                 case ARG_VERSION:
833                         arg_action = ACTION_VERSION;
834                         break;
835
836                 case ARG_DUMP_CONFIGURATION_ITEMS:
837                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
838                         break;
839
840                 case ARG_DUMP_CORE:
841                         r = optarg ? parse_boolean(optarg) : 1;
842                         if (r < 0) {
843                                 log_error("Failed to parse dump core boolean %s.", optarg);
844                                 return r;
845                         }
846                         arg_dump_core = r;
847                         break;
848
849                 case ARG_CRASH_SHELL:
850                         r = optarg ? parse_boolean(optarg) : 1;
851                         if (r < 0) {
852                                 log_error("Failed to parse crash shell boolean %s.", optarg);
853                                 return r;
854                         }
855                         arg_crash_shell = r;
856                         break;
857
858                 case ARG_CONFIRM_SPAWN:
859                         r = optarg ? parse_boolean(optarg) : 1;
860                         if (r < 0) {
861                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
862                                 return r;
863                         }
864                         arg_confirm_spawn = r;
865                         break;
866
867                 case ARG_SHOW_STATUS:
868                         if (optarg) {
869                                 r = parse_show_status(optarg, &arg_show_status);
870                                 if (r < 0) {
871                                         log_error("Failed to parse show status boolean %s.", optarg);
872                                         return r;
873                                 }
874                         } else
875                                 arg_show_status = SHOW_STATUS_YES;
876                         break;
877
878                 case ARG_DESERIALIZE: {
879                         int fd;
880                         FILE *f;
881
882                         r = safe_atoi(optarg, &fd);
883                         if (r < 0 || fd < 0) {
884                                 log_error("Failed to parse deserialize option %s.", optarg);
885                                 return r < 0 ? r : -EINVAL;
886                         }
887
888                         fd_cloexec(fd, true);
889
890                         f = fdopen(fd, "r");
891                         if (!f)
892                                 return log_error_errno(errno, "Failed to open serialization fd: %m");
893
894                         if (arg_serialization)
895                                 fclose(arg_serialization);
896
897                         arg_serialization = f;
898
899                         break;
900                 }
901
902                 case ARG_SWITCHED_ROOT:
903                         arg_switched_root = true;
904                         break;
905
906                 case 'h':
907                         arg_action = ACTION_HELP;
908                         if (arg_no_pager < 0)
909                                 arg_no_pager = true;
910                         break;
911
912                 case 'D':
913                         log_set_max_level(LOG_DEBUG);
914                         break;
915
916                 case 'b':
917                 case 's':
918                 case 'z':
919                         /* Just to eat away the sysvinit kernel
920                          * cmdline args without getopt() error
921                          * messages that we'll parse in
922                          * parse_proc_cmdline_word() or ignore. */
923
924                 case '?':
925                         if (getpid() != 1)
926                                 return -EINVAL;
927                         else
928                                 return 0;
929
930                 default:
931                         assert_not_reached("Unhandled option code.");
932                 }
933
934         if (optind < argc && getpid() != 1) {
935                 /* Hmm, when we aren't run as init system
936                  * let's complain about excess arguments */
937
938                 log_error("Excess arguments.");
939                 return -EINVAL;
940         }
941
942         return 0;
943 }
944
945 static int help(void) {
946
947         printf("%s [OPTIONS...]\n\n"
948                "Starts up and maintains the system or user services.\n\n"
949                "  -h --help                      Show this help\n"
950                "     --test                      Determine startup sequence, dump it and exit\n"
951                "     --no-pager                  Do not pipe output into a pager\n"
952                "     --dump-configuration-items  Dump understood unit configuration items\n"
953                "     --unit=UNIT                 Set default unit\n"
954                "     --system                    Run a system instance, even if PID != 1\n"
955                "     --user                      Run a user instance\n"
956                "     --dump-core[=0|1]           Dump core on crash\n"
957                "     --crash-shell[=0|1]         Run shell on crash\n"
958                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
959                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
960                "     --log-target=TARGET         Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
961                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
962                "     --log-color[=0|1]           Highlight important log messages\n"
963                "     --log-location[=0|1]        Include code location in log messages\n"
964                "     --default-standard-output=  Set default standard output for services\n"
965                "     --default-standard-error=   Set default standard error output for services\n",
966                program_invocation_short_name);
967
968         return 0;
969 }
970
971 static int version(void) {
972         puts(PACKAGE_STRING);
973         puts(SYSTEMD_FEATURES);
974
975         return 0;
976 }
977
978 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
979         FILE *f = NULL;
980         FDSet *fds = NULL;
981         int r;
982
983         assert(m);
984         assert(_f);
985         assert(_fds);
986
987         r = manager_open_serialization(m, &f);
988         if (r < 0) {
989                 log_error_errno(r, "Failed to create serialization file: %m");
990                 goto fail;
991         }
992
993         /* Make sure nothing is really destructed when we shut down */
994         m->n_reloading ++;
995         bus_manager_send_reloading(m, true);
996
997         fds = fdset_new();
998         if (!fds) {
999                 r = -ENOMEM;
1000                 log_error_errno(r, "Failed to allocate fd set: %m");
1001                 goto fail;
1002         }
1003
1004         r = manager_serialize(m, f, fds, switching_root);
1005         if (r < 0) {
1006                 log_error_errno(r, "Failed to serialize state: %m");
1007                 goto fail;
1008         }
1009
1010         if (fseeko(f, 0, SEEK_SET) < 0) {
1011                 log_error_errno(errno, "Failed to rewind serialization fd: %m");
1012                 goto fail;
1013         }
1014
1015         r = fd_cloexec(fileno(f), false);
1016         if (r < 0) {
1017                 log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
1018                 goto fail;
1019         }
1020
1021         r = fdset_cloexec(fds, false);
1022         if (r < 0) {
1023                 log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
1024                 goto fail;
1025         }
1026
1027         *_f = f;
1028         *_fds = fds;
1029
1030         return 0;
1031
1032 fail:
1033         fdset_free(fds);
1034
1035         if (f)
1036                 fclose(f);
1037
1038         return r;
1039 }
1040
1041 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1042         struct rlimit nl;
1043         int r;
1044
1045         assert(saved_rlimit);
1046
1047         /* Save the original RLIMIT_NOFILE so that we can reset it
1048          * later when transitioning from the initrd to the main
1049          * systemd or suchlike. */
1050         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
1051                 return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
1052
1053         /* Make sure forked processes get the default kernel setting */
1054         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1055                 struct rlimit *rl;
1056
1057                 rl = newdup(struct rlimit, saved_rlimit, 1);
1058                 if (!rl)
1059                         return log_oom();
1060
1061                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1062         }
1063
1064         /* Bump up the resource limit for ourselves substantially */
1065         nl.rlim_cur = nl.rlim_max = 64*1024;
1066         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1067         if (r < 0)
1068                 return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
1069
1070         return 0;
1071 }
1072
1073 static void test_mtab(void) {
1074
1075         static const char ok[] =
1076                 "/proc/self/mounts\0"
1077                 "/proc/mounts\0"
1078                 "../proc/self/mounts\0"
1079                 "../proc/mounts\0";
1080
1081         _cleanup_free_ char *p = NULL;
1082         int r;
1083
1084         /* Check that /etc/mtab is a symlink to the right place or
1085          * non-existing. But certainly not a file, or a symlink to
1086          * some weird place... */
1087
1088         r = readlink_malloc("/etc/mtab", &p);
1089         if (r == -ENOENT)
1090                 return;
1091         if (r >= 0 && nulstr_contains(ok, p))
1092                 return;
1093
1094         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1095                     "This is not supported anymore. "
1096                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1097 }
1098
1099 static void test_usr(void) {
1100
1101         /* Check that /usr is not a separate fs */
1102
1103         if (dir_is_empty("/usr") <= 0)
1104                 return;
1105
1106         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1107                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1108                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1109 }
1110
1111 static int initialize_join_controllers(void) {
1112         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1113          * + "net_prio". We'd like to add "cpuset" to the mix, but
1114          * "cpuset" does't really work for groups with no initialized
1115          * attributes. */
1116
1117         arg_join_controllers = new(char**, 3);
1118         if (!arg_join_controllers)
1119                 return -ENOMEM;
1120
1121         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1122         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1123         arg_join_controllers[2] = NULL;
1124
1125         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1126                 free_join_controllers();
1127                 return -ENOMEM;
1128         }
1129
1130         return 0;
1131 }
1132
1133 static int enforce_syscall_archs(Set *archs) {
1134 #ifdef HAVE_SECCOMP
1135         scmp_filter_ctx *seccomp;
1136         Iterator i;
1137         void *id;
1138         int r;
1139
1140         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1141         if (!seccomp)
1142                 return log_oom();
1143
1144         SET_FOREACH(id, arg_syscall_archs, i) {
1145                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1146                 if (r == -EEXIST)
1147                         continue;
1148                 if (r < 0) {
1149                         log_error_errno(r, "Failed to add architecture to seccomp: %m");
1150                         goto finish;
1151                 }
1152         }
1153
1154         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1155         if (r < 0) {
1156                 log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m");
1157                 goto finish;
1158         }
1159
1160         r = seccomp_load(seccomp);
1161         if (r < 0)
1162                 log_error_errno(r, "Failed to add install architecture seccomp: %m");
1163
1164 finish:
1165         seccomp_release(seccomp);
1166         return r;
1167 #else
1168         return 0;
1169 #endif
1170 }
1171
1172 static int status_welcome(void) {
1173         _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
1174         int r;
1175
1176         r = parse_env_file("/etc/os-release", NEWLINE,
1177                            "PRETTY_NAME", &pretty_name,
1178                            "ANSI_COLOR", &ansi_color,
1179                            NULL);
1180         if (r == -ENOENT) {
1181                 r = parse_env_file("/usr/lib/os-release", NEWLINE,
1182                                    "PRETTY_NAME", &pretty_name,
1183                                    "ANSI_COLOR", &ansi_color,
1184                                    NULL);
1185         }
1186
1187         if (r < 0 && r != -ENOENT)
1188                 log_warning_errno(r, "Failed to read os-release file: %m");
1189
1190         return status_printf(NULL, false, false,
1191                              "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
1192                              isempty(ansi_color) ? "1" : ansi_color,
1193                              isempty(pretty_name) ? "Linux" : pretty_name);
1194 }
1195
1196 static int write_container_id(void) {
1197         const char *c;
1198
1199         c = getenv("container");
1200         if (isempty(c))
1201                 return 0;
1202
1203         return write_string_file("/run/systemd/container", c);
1204 }
1205
1206 int main(int argc, char *argv[]) {
1207         Manager *m = NULL;
1208         int r, retval = EXIT_FAILURE;
1209         usec_t before_startup, after_startup;
1210         char timespan[FORMAT_TIMESPAN_MAX];
1211         FDSet *fds = NULL;
1212         bool reexecute = false;
1213         const char *shutdown_verb = NULL;
1214         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1215         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1216         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1217         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1218         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1219         static char systemd[] = "systemd";
1220         bool skip_setup = false;
1221         unsigned j;
1222         bool loaded_policy = false;
1223         bool arm_reboot_watchdog = false;
1224         bool queue_default_job = false;
1225         bool empty_etc = false;
1226         char *switch_root_dir = NULL, *switch_root_init = NULL;
1227         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1228         const char *error_message = NULL;
1229
1230 #ifdef HAVE_SYSV_COMPAT
1231         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1232                 /* This is compatibility support for SysV, where
1233                  * calling init as a user is identical to telinit. */
1234
1235                 errno = -ENOENT;
1236                 execv(SYSTEMCTL_BINARY_PATH, argv);
1237                 log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1238                 return 1;
1239         }
1240 #endif
1241
1242         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1243         dual_timestamp_get(&userspace_timestamp);
1244
1245         /* Determine if this is a reexecution or normal bootup. We do
1246          * the full command line parsing much later, so let's just
1247          * have a quick peek here. */
1248         if (strv_find(argv+1, "--deserialize"))
1249                 skip_setup = true;
1250
1251         /* If we have switched root, do all the special setup
1252          * things */
1253         if (strv_find(argv+1, "--switched-root"))
1254                 skip_setup = false;
1255
1256         /* If we get started via the /sbin/init symlink then we are
1257            called 'init'. After a subsequent reexecution we are then
1258            called 'systemd'. That is confusing, hence let's call us
1259            systemd right-away. */
1260         program_invocation_short_name = systemd;
1261         prctl(PR_SET_NAME, systemd);
1262
1263         saved_argv = argv;
1264         saved_argc = argc;
1265
1266         log_show_color(isatty(STDERR_FILENO) > 0);
1267         log_set_upgrade_syslog_to_journal(true);
1268
1269         /* Disable the umask logic */
1270         if (getpid() == 1)
1271                 umask(0);
1272
1273         if (getpid() == 1 && detect_container(NULL) <= 0) {
1274
1275                 /* Running outside of a container as PID 1 */
1276                 arg_running_as = SYSTEMD_SYSTEM;
1277                 make_null_stdio();
1278                 log_set_target(LOG_TARGET_KMSG);
1279                 log_open();
1280
1281                 if (in_initrd())
1282                         initrd_timestamp = userspace_timestamp;
1283
1284                 if (!skip_setup) {
1285                         mount_setup_early();
1286                         dual_timestamp_get(&security_start_timestamp);
1287                         if (mac_selinux_setup(&loaded_policy) < 0) {
1288                                 error_message = "Failed to load SELinux policy";
1289                                 goto finish;
1290                         } else if (ima_setup() < 0) {
1291                                 error_message = "Failed to load IMA policy";
1292                                 goto finish;
1293                         } else if (mac_smack_setup(&loaded_policy) < 0) {
1294                                 error_message = "Failed to load SMACK policy";
1295                                 goto finish;
1296                         }
1297                         dual_timestamp_get(&security_finish_timestamp);
1298                 }
1299
1300                 if (mac_selinux_init(NULL) < 0) {
1301                         error_message = "Failed to initialize SELinux policy";
1302                         goto finish;
1303                 }
1304
1305                 if (!skip_setup) {
1306                         if (clock_is_localtime() > 0) {
1307                                 int min;
1308
1309                                 /*
1310                                  * The very first call of settimeofday() also does a time warp in the kernel.
1311                                  *
1312                                  * In the rtc-in-local time mode, we set the kernel's timezone, and rely on
1313                                  * external tools to take care of maintaining the RTC and do all adjustments.
1314                                  * This matches the behavior of Windows, which leaves the RTC alone if the
1315                                  * registry tells that the RTC runs in UTC.
1316                                  */
1317                                 r = clock_set_timezone(&min);
1318                                 if (r < 0)
1319                                         log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
1320                                 else
1321                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1322                         } else if (!in_initrd()) {
1323                                 /*
1324                                  * Do a dummy very first call to seal the kernel's time warp magic.
1325                                  *
1326                                  * Do not call this this from inside the initrd. The initrd might not
1327                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1328                                  * that way. In such case, we need to delay the time-warp or the sealing
1329                                  * until we reach the real system.
1330                                  *
1331                                  * Do no set the kernel's timezone. The concept of local time cannot
1332                                  * be supported reliably, the time will jump or be incorrect at every daylight
1333                                  * saving time change. All kernel local time concepts will be treated
1334                                  * as UTC that way.
1335                                  */
1336                                 clock_reset_timewarp();
1337                         }
1338                 }
1339
1340                 /* Set the default for later on, but don't actually
1341                  * open the logs like this for now. Note that if we
1342                  * are transitioning from the initrd there might still
1343                  * be journal fd open, and we shouldn't attempt
1344                  * opening that before we parsed /proc/cmdline which
1345                  * might redirect output elsewhere. */
1346                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1347
1348         } else if (getpid() == 1) {
1349                 /* Running inside a container, as PID 1 */
1350                 arg_running_as = SYSTEMD_SYSTEM;
1351                 log_set_target(LOG_TARGET_CONSOLE);
1352                 log_close_console(); /* force reopen of /dev/console */
1353                 log_open();
1354
1355                 /* For the later on, see above... */
1356                 log_set_target(LOG_TARGET_JOURNAL);
1357
1358                 /* clear the kernel timestamp,
1359                  * because we are in a container */
1360                 kernel_timestamp.monotonic = 0ULL;
1361                 kernel_timestamp.realtime = 0ULL;
1362
1363         } else {
1364                 /* Running as user instance */
1365                 arg_running_as = SYSTEMD_USER;
1366                 log_set_target(LOG_TARGET_AUTO);
1367                 log_open();
1368
1369                 /* clear the kernel timestamp,
1370                  * because we are not PID 1 */
1371                 kernel_timestamp.monotonic = 0ULL;
1372                 kernel_timestamp.realtime = 0ULL;
1373         }
1374
1375         /* Initialize default unit */
1376         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1377         if (r < 0) {
1378                 log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
1379                 error_message = "Failed to set default unit";
1380                 goto finish;
1381         }
1382
1383         r = initialize_join_controllers();
1384         if (r < 0) {
1385                 error_message = "Failed to initalize cgroup controllers";
1386                 goto finish;
1387         }
1388
1389         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1390          * /proc/$PID/fd is available. */
1391         if (getpid() == 1) {
1392
1393                 /* Load the kernel modules early, so that we kdbus.ko is loaded before kdbusfs shall be mounted */
1394                 if (!skip_setup)
1395                         kmod_setup();
1396
1397                 r = mount_setup(loaded_policy);
1398                 if (r < 0) {
1399                         error_message = "Failed to mount API filesystems";
1400                         goto finish;
1401                 }
1402         }
1403
1404         /* Reset all signal handlers. */
1405         assert_se(reset_all_signal_handlers() == 0);
1406
1407         ignore_signals(SIGNALS_IGNORE, -1);
1408
1409         if (parse_config_file() < 0) {
1410                 error_message = "Failed to parse config file";
1411                 goto finish;
1412         }
1413
1414         if (arg_running_as == SYSTEMD_SYSTEM) {
1415                 r = parse_proc_cmdline(parse_proc_cmdline_item);
1416                 if (r < 0)
1417                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1418         }
1419
1420         /* Note that this also parses bits from the kernel command
1421          * line, including "debug". */
1422         log_parse_environment();
1423
1424         if (parse_argv(argc, argv) < 0) {
1425                 error_message = "Failed to parse commandline arguments";
1426                 goto finish;
1427         }
1428
1429         if (arg_action == ACTION_TEST &&
1430             geteuid() == 0) {
1431                 log_error("Don't run test mode as root.");
1432                 goto finish;
1433         }
1434
1435         if (arg_running_as == SYSTEMD_USER &&
1436             arg_action == ACTION_RUN &&
1437             sd_booted() <= 0) {
1438                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1439                 goto finish;
1440         }
1441
1442         if (arg_running_as == SYSTEMD_SYSTEM &&
1443             arg_action == ACTION_RUN &&
1444             running_in_chroot() > 0) {
1445                 log_error("Cannot be run in a chroot() environment.");
1446                 goto finish;
1447         }
1448
1449         if (arg_action == ACTION_TEST)
1450                 skip_setup = true;
1451
1452         pager_open_if_enabled();
1453
1454         if (arg_action == ACTION_HELP) {
1455                 retval = help();
1456                 goto finish;
1457         } else if (arg_action == ACTION_VERSION) {
1458                 retval = version();
1459                 goto finish;
1460         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1461                 unit_dump_config_items(stdout);
1462                 retval = EXIT_SUCCESS;
1463                 goto finish;
1464         } else if (arg_action == ACTION_DONE) {
1465                 retval = EXIT_SUCCESS;
1466                 goto finish;
1467         }
1468
1469         if (arg_running_as == SYSTEMD_USER &&
1470             !getenv("XDG_RUNTIME_DIR")) {
1471                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1472                 goto finish;
1473         }
1474
1475         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1476
1477         /* Close logging fds, in order not to confuse fdset below */
1478         log_close();
1479
1480         /* Remember open file descriptors for later deserialization */
1481         r = fdset_new_fill(&fds);
1482         if (r < 0) {
1483                 log_emergency_errno(r, "Failed to allocate fd set: %m");
1484                 error_message = "Failed to allocate fd set";
1485                 goto finish;
1486         } else
1487                 fdset_cloexec(fds, true);
1488
1489         if (arg_serialization)
1490                 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
1491
1492         if (arg_running_as == SYSTEMD_SYSTEM)
1493                 /* Become a session leader if we aren't one yet. */
1494                 setsid();
1495
1496         /* Move out of the way, so that we won't block unmounts */
1497         assert_se(chdir("/")  == 0);
1498
1499         /* Reset the console, but only if this is really init and we
1500          * are freshly booted */
1501         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN) {
1502
1503                 /* If we are init, we connect stdin/stdout/stderr to
1504                  * /dev/null and make sure we don't have a controlling
1505                  * tty. */
1506                 release_terminal();
1507
1508                 if (getpid() == 1 && !skip_setup)
1509                         console_setup();
1510         }
1511
1512         /* Open the logging devices, if possible and necessary */
1513         log_open();
1514
1515         if (arg_show_status == _SHOW_STATUS_UNSET)
1516                 arg_show_status = SHOW_STATUS_YES;
1517
1518         /* Make sure we leave a core dump without panicing the
1519          * kernel. */
1520         if (getpid() == 1) {
1521                 install_crash_handler();
1522
1523                 r = mount_cgroup_controllers(arg_join_controllers);
1524                 if (r < 0)
1525                         goto finish;
1526         }
1527
1528         if (arg_running_as == SYSTEMD_SYSTEM) {
1529                 const char *virtualization = NULL;
1530
1531                 log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
1532                          arg_action == ACTION_TEST ? "test " : "" );
1533
1534                 detect_virtualization(&virtualization);
1535                 if (virtualization)
1536                         log_info("Detected virtualization '%s'.", virtualization);
1537
1538                 write_container_id();
1539
1540                 log_info("Detected architecture '%s'.", architecture_to_string(uname_architecture()));
1541
1542                 if (in_initrd())
1543                         log_info("Running in initial RAM disk.");
1544
1545                 /* Let's check whether /etc is already populated. We
1546                  * don't actually really check for that, but use
1547                  * /etc/machine-id as flag file. This allows container
1548                  * managers and installers to provision a couple of
1549                  * files already. If the container manager wants to
1550                  * provision the machine ID itself it should pass
1551                  * $container_uuid to PID 1. */
1552
1553                 empty_etc = access("/etc/machine-id", F_OK) < 0;
1554                 if (empty_etc)
1555                         log_info("Running with unpopulated /etc.");
1556         } else {
1557                 _cleanup_free_ char *t;
1558
1559                 t = uid_to_name(getuid());
1560                 log_debug(PACKAGE_STRING " running in %suser mode for user "UID_FMT"/%s. (" SYSTEMD_FEATURES ")",
1561                           arg_action == ACTION_TEST ? " test" : "", getuid(), t);
1562         }
1563
1564         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1565                 if (arg_show_status > 0 || plymouth_running())
1566                         status_welcome();
1567
1568                 hostname_setup();
1569                 machine_id_setup(NULL);
1570                 loopback_setup();
1571
1572                 test_mtab();
1573                 test_usr();
1574         }
1575
1576         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1577                 watchdog_set_timeout(&arg_runtime_watchdog);
1578
1579         if (arg_timer_slack_nsec != NSEC_INFINITY)
1580                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1581                         log_error_errno(errno, "Failed to adjust timer slack: %m");
1582
1583         if (arg_capability_bounding_set_drop) {
1584                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1585                 if (r < 0) {
1586                         log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
1587                         error_message = "Failed to drop capability bounding set of usermode helpers";
1588                         goto finish;
1589                 }
1590                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1591                 if (r < 0) {
1592                         log_emergency_errno(r, "Failed to drop capability bounding set: %m");
1593                         error_message = "Failed to drop capability bounding set";
1594                         goto finish;
1595                 }
1596         }
1597
1598         if (arg_syscall_archs) {
1599                 r = enforce_syscall_archs(arg_syscall_archs);
1600                 if (r < 0) {
1601                         error_message = "Failed to set syscall architectures";
1602                         goto finish;
1603                 }
1604         }
1605
1606         if (arg_running_as == SYSTEMD_USER) {
1607                 /* Become reaper of our children */
1608                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1609                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
1610                         if (errno == EINVAL)
1611                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1612                 }
1613         }
1614
1615         if (arg_running_as == SYSTEMD_SYSTEM) {
1616                 bump_rlimit_nofile(&saved_rlimit_nofile);
1617
1618                 if (empty_etc) {
1619                         r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_FULL, false, NULL, 0);
1620                         if (r < 0)
1621                                 log_warning_errno(r, "Failed to populate /etc with preset unit settings, ignoring: %m");
1622                         else
1623                                 log_info("Populated /etc with preset unit settings.");
1624                 }
1625         }
1626
1627         r = manager_new(arg_running_as, arg_action == ACTION_TEST, &m);
1628         if (r < 0) {
1629                 log_emergency_errno(r, "Failed to allocate manager object: %m");
1630                 error_message = "Failed to allocate manager object";
1631                 goto finish;
1632         }
1633
1634         m->confirm_spawn = arg_confirm_spawn;
1635         m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
1636         m->default_std_output = arg_default_std_output;
1637         m->default_std_error = arg_default_std_error;
1638         m->default_restart_usec = arg_default_restart_usec;
1639         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1640         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1641         m->default_start_limit_interval = arg_default_start_limit_interval;
1642         m->default_start_limit_burst = arg_default_start_limit_burst;
1643         m->default_cpu_accounting = arg_default_cpu_accounting;
1644         m->default_blockio_accounting = arg_default_blockio_accounting;
1645         m->default_memory_accounting = arg_default_memory_accounting;
1646         m->runtime_watchdog = arg_runtime_watchdog;
1647         m->shutdown_watchdog = arg_shutdown_watchdog;
1648
1649         m->userspace_timestamp = userspace_timestamp;
1650         m->kernel_timestamp = kernel_timestamp;
1651         m->initrd_timestamp = initrd_timestamp;
1652         m->security_start_timestamp = security_start_timestamp;
1653         m->security_finish_timestamp = security_finish_timestamp;
1654
1655         manager_set_default_rlimits(m, arg_default_rlimit);
1656         manager_environment_add(m, NULL, arg_default_environment);
1657         manager_set_show_status(m, arg_show_status);
1658         manager_set_first_boot(m, empty_etc);
1659
1660         /* Remember whether we should queue the default job */
1661         queue_default_job = !arg_serialization || arg_switched_root;
1662
1663         before_startup = now(CLOCK_MONOTONIC);
1664
1665         r = manager_startup(m, arg_serialization, fds);
1666         if (r < 0)
1667                 log_error_errno(r, "Failed to fully start up daemon: %m");
1668
1669         /* This will close all file descriptors that were opened, but
1670          * not claimed by any unit. */
1671         fdset_free(fds);
1672         fds = NULL;
1673
1674         if (arg_serialization) {
1675                 fclose(arg_serialization);
1676                 arg_serialization = NULL;
1677         }
1678
1679         if (queue_default_job) {
1680                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1681                 Unit *target = NULL;
1682                 Job *default_unit_job;
1683
1684                 log_debug("Activating default unit: %s", arg_default_unit);
1685
1686                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1687                 if (r < 0)
1688                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1689                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1690                         log_error_errno(target->load_error, "Failed to load default target: %m");
1691                 else if (target->load_state == UNIT_MASKED)
1692                         log_error("Default target masked.");
1693
1694                 if (!target || target->load_state != UNIT_LOADED) {
1695                         log_info("Trying to load rescue target...");
1696
1697                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1698                         if (r < 0) {
1699                                 log_emergency("Failed to load rescue target: %s", bus_error_message(&error, r));
1700                                 error_message = "Failed to load rescue target";
1701                                 goto finish;
1702                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1703                                 log_emergency_errno(target->load_error, "Failed to load rescue target: %m");
1704                                 error_message = "Failed to load rescue target";
1705                                 goto finish;
1706                         } else if (target->load_state == UNIT_MASKED) {
1707                                 log_emergency("Rescue target masked.");
1708                                 error_message = "Rescue target masked";
1709                                 goto finish;
1710                         }
1711                 }
1712
1713                 assert(target->load_state == UNIT_LOADED);
1714
1715                 if (arg_action == ACTION_TEST) {
1716                         printf("-> By units:\n");
1717                         manager_dump_units(m, stdout, "\t");
1718                 }
1719
1720                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1721                 if (r == -EPERM) {
1722                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1723
1724                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1725                         if (r < 0) {
1726                                 log_emergency("Failed to start default target: %s", bus_error_message(&error, r));
1727                                 error_message = "Failed to start default target";
1728                                 goto finish;
1729                         }
1730                 } else if (r < 0) {
1731                         log_emergency("Failed to isolate default target: %s", bus_error_message(&error, r));
1732                         error_message = "Failed to isolate default target";
1733                         goto finish;
1734                 }
1735
1736                 m->default_unit_job_id = default_unit_job->id;
1737
1738                 after_startup = now(CLOCK_MONOTONIC);
1739                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1740                          "Loaded units and determined initial transaction in %s.",
1741                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
1742
1743                 if (arg_action == ACTION_TEST) {
1744                         printf("-> By jobs:\n");
1745                         manager_dump_jobs(m, stdout, "\t");
1746                         retval = EXIT_SUCCESS;
1747                         goto finish;
1748                 }
1749         }
1750
1751         for (;;) {
1752                 r = manager_loop(m);
1753                 if (r < 0) {
1754                         log_emergency_errno(r, "Failed to run main loop: %m");
1755                         error_message = "Failed to run main loop";
1756                         goto finish;
1757                 }
1758
1759                 switch (m->exit_code) {
1760
1761                 case MANAGER_EXIT:
1762                         retval = EXIT_SUCCESS;
1763                         log_debug("Exit.");
1764                         goto finish;
1765
1766                 case MANAGER_RELOAD:
1767                         log_info("Reloading.");
1768                         r = manager_reload(m);
1769                         if (r < 0)
1770                                 log_error_errno(r, "Failed to reload: %m");
1771                         break;
1772
1773                 case MANAGER_REEXECUTE:
1774
1775                         if (prepare_reexecute(m, &arg_serialization, &fds, false) < 0) {
1776                                 error_message = "Failed to prepare for reexection";
1777                                 goto finish;
1778                         }
1779
1780                         reexecute = true;
1781                         log_notice("Reexecuting.");
1782                         goto finish;
1783
1784                 case MANAGER_SWITCH_ROOT:
1785                         /* Steal the switch root parameters */
1786                         switch_root_dir = m->switch_root;
1787                         switch_root_init = m->switch_root_init;
1788                         m->switch_root = m->switch_root_init = NULL;
1789
1790                         if (!switch_root_init)
1791                                 if (prepare_reexecute(m, &arg_serialization, &fds, true) < 0) {
1792                                         error_message = "Failed to prepare for reexection";
1793                                         goto finish;
1794                                 }
1795
1796                         reexecute = true;
1797                         log_notice("Switching root.");
1798                         goto finish;
1799
1800                 case MANAGER_REBOOT:
1801                 case MANAGER_POWEROFF:
1802                 case MANAGER_HALT:
1803                 case MANAGER_KEXEC: {
1804                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1805                                 [MANAGER_REBOOT] = "reboot",
1806                                 [MANAGER_POWEROFF] = "poweroff",
1807                                 [MANAGER_HALT] = "halt",
1808                                 [MANAGER_KEXEC] = "kexec"
1809                         };
1810
1811                         assert_se(shutdown_verb = table[m->exit_code]);
1812                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1813
1814                         log_notice("Shutting down.");
1815                         goto finish;
1816                 }
1817
1818                 default:
1819                         assert_not_reached("Unknown exit code.");
1820                 }
1821         }
1822
1823 finish:
1824         pager_close();
1825
1826         m = manager_free(m);
1827
1828         for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++) {
1829                 free(arg_default_rlimit[j]);
1830                 arg_default_rlimit[j] = NULL;
1831         }
1832
1833         free(arg_default_unit);
1834         arg_default_unit = NULL;
1835
1836         free_join_controllers();
1837
1838         strv_free(arg_default_environment);
1839         arg_default_environment = NULL;
1840
1841         set_free(arg_syscall_archs);
1842         arg_syscall_archs = NULL;
1843
1844         mac_selinux_finish();
1845
1846         if (reexecute) {
1847                 const char **args;
1848                 unsigned i, args_size;
1849
1850                 /* Close and disarm the watchdog, so that the new
1851                  * instance can reinitialize it, but doesn't get
1852                  * rebooted while we do that */
1853                 watchdog_close(true);
1854
1855                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1856                  * that the new systemd can pass the kernel default to
1857                  * its child processes */
1858                 if (saved_rlimit_nofile.rlim_cur > 0)
1859                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1860
1861                 if (switch_root_dir) {
1862                         /* Kill all remaining processes from the
1863                          * initrd, but don't wait for them, so that we
1864                          * can handle the SIGCHLD for them after
1865                          * deserializing. */
1866                         broadcast_signal(SIGTERM, false, true);
1867
1868                         /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
1869                         r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
1870                         if (r < 0)
1871                                 log_error_errno(r, "Failed to switch root, trying to continue: %m");
1872                 }
1873
1874                 args_size = MAX(6, argc+1);
1875                 args = newa(const char*, args_size);
1876
1877                 if (!switch_root_init) {
1878                         char sfd[16];
1879
1880                         /* First try to spawn ourselves with the right
1881                          * path, and with full serialization. We do
1882                          * this only if the user didn't specify an
1883                          * explicit init to spawn. */
1884
1885                         assert(arg_serialization);
1886                         assert(fds);
1887
1888                         snprintf(sfd, sizeof(sfd), "%i", fileno(arg_serialization));
1889                         char_array_0(sfd);
1890
1891                         i = 0;
1892                         args[i++] = SYSTEMD_BINARY_PATH;
1893                         if (switch_root_dir)
1894                                 args[i++] = "--switched-root";
1895                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1896                         args[i++] = "--deserialize";
1897                         args[i++] = sfd;
1898                         args[i++] = NULL;
1899
1900                         /* do not pass along the environment we inherit from the kernel or initrd */
1901                         if (switch_root_dir)
1902                                 clearenv();
1903
1904                         assert(i <= args_size);
1905                         execv(args[0], (char* const*) args);
1906                 }
1907
1908                 /* Try the fallback, if there is any, without any
1909                  * serialization. We pass the original argv[] and
1910                  * envp[]. (Well, modulo the ordering changes due to
1911                  * getopt() in argv[], and some cleanups in envp[],
1912                  * but let's hope that doesn't matter.) */
1913
1914                 if (arg_serialization) {
1915                         fclose(arg_serialization);
1916                         arg_serialization = NULL;
1917                 }
1918
1919                 if (fds) {
1920                         fdset_free(fds);
1921                         fds = NULL;
1922                 }
1923
1924                 /* Reopen the console */
1925                 make_console_stdio();
1926
1927                 for (j = 1, i = 1; j < (unsigned) argc; j++)
1928                         args[i++] = argv[j];
1929                 args[i++] = NULL;
1930                 assert(i <= args_size);
1931
1932                 /* Reenable any blocked signals, especially important
1933                  * if we switch from initial ramdisk to init=... */
1934                 reset_all_signal_handlers();
1935                 reset_signal_mask();
1936
1937                 if (switch_root_init) {
1938                         args[0] = switch_root_init;
1939                         execv(args[0], (char* const*) args);
1940                         log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
1941                 }
1942
1943                 args[0] = "/sbin/init";
1944                 execv(args[0], (char* const*) args);
1945
1946                 if (errno == ENOENT) {
1947                         log_warning("No /sbin/init, trying fallback");
1948
1949                         args[0] = "/bin/sh";
1950                         args[1] = NULL;
1951                         execv(args[0], (char* const*) args);
1952                         log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
1953                 } else
1954                         log_warning_errno(errno, "Failed to execute /sbin/init, giving up: %m");
1955         }
1956
1957         if (arg_serialization) {
1958                 fclose(arg_serialization);
1959                 arg_serialization = NULL;
1960         }
1961
1962         if (fds) {
1963                 fdset_free(fds);
1964                 fds = NULL;
1965         }
1966
1967 #ifdef HAVE_VALGRIND_VALGRIND_H
1968         /* If we are PID 1 and running under valgrind, then let's exit
1969          * here explicitly. valgrind will only generate nice output on
1970          * exit(), not on exec(), hence let's do the former not the
1971          * latter here. */
1972         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1973                 return 0;
1974 #endif
1975
1976         if (shutdown_verb) {
1977                 char log_level[DECIMAL_STR_MAX(int) + 1];
1978                 const char* command_line[9] = {
1979                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1980                         shutdown_verb,
1981                         "--log-level", log_level,
1982                         "--log-target",
1983                 };
1984                 unsigned pos = 5;
1985                 _cleanup_strv_free_ char **env_block = NULL;
1986
1987                 assert(command_line[pos] == NULL);
1988                 env_block = strv_copy(environ);
1989
1990                 snprintf(log_level, sizeof(log_level), "%d", log_get_max_level());
1991
1992                 switch (log_get_target()) {
1993                 case LOG_TARGET_KMSG:
1994                 case LOG_TARGET_JOURNAL_OR_KMSG:
1995                 case LOG_TARGET_SYSLOG_OR_KMSG:
1996                         command_line[pos++] = "kmsg";
1997                         break;
1998
1999                 case LOG_TARGET_CONSOLE:
2000                 default:
2001                         command_line[pos++] = "console";
2002                         break;
2003                 };
2004
2005                 if (log_get_show_color())
2006                         command_line[pos++] = "--log-color";
2007
2008                 if (log_get_show_location())
2009                         command_line[pos++] = "--log-location";
2010
2011                 assert(pos < ELEMENTSOF(command_line));
2012
2013                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
2014                         char *e;
2015
2016                         /* If we reboot let's set the shutdown
2017                          * watchdog and tell the shutdown binary to
2018                          * repeatedly ping it */
2019                         watchdog_set_timeout(&arg_shutdown_watchdog);
2020                         watchdog_close(false);
2021
2022                         /* Tell the binary how often to ping, ignore failure */
2023                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
2024                                 strv_push(&env_block, e);
2025                 } else
2026                         watchdog_close(true);
2027
2028                 /* Avoid the creation of new processes forked by the
2029                  * kernel; at this point, we will not listen to the
2030                  * signals anyway */
2031                 if (detect_container(NULL) <= 0)
2032                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
2033
2034                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
2035                 log_error_errno(errno, "Failed to execute shutdown binary, %s: %m",
2036                           getpid() == 1 ? "freezing" : "quitting");
2037         }
2038
2039         if (getpid() == 1) {
2040                 if (error_message)
2041                         manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
2042                                               ANSI_HIGHLIGHT_RED_ON "!!!!!!" ANSI_HIGHLIGHT_OFF,
2043                                               "%s, freezing.", error_message);
2044                 freeze();
2045         }
2046
2047         return retval;
2048 }