chiark / gitweb /
Extract looping over /proc/cmdline into a shared function
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85 static char *arg_default_unit = NULL;
86 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
87 static bool arg_dump_core = true;
88 static bool arg_crash_shell = false;
89 static int arg_crash_chvt = -1;
90 static bool arg_confirm_spawn = false;
91 static ShowStatus arg_show_status = SHOW_STATUS_UNSET;
92 static bool arg_switched_root = false;
93 static char ***arg_join_controllers = NULL;
94 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
95 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
96 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
97 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
98 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
99 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
100 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
101 static usec_t arg_runtime_watchdog = 0;
102 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
103 static char **arg_default_environment = NULL;
104 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
105 static uint64_t arg_capability_bounding_set_drop = 0;
106 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
107 static Set* arg_syscall_archs = NULL;
108 static FILE* arg_serialization = NULL;
109
110 static void nop_handler(int sig) {}
111
112 noreturn static void crash(int sig) {
113
114         if (getpid() != 1)
115                 /* Pass this on immediately, if this is not PID 1 */
116                 raise(sig);
117         else if (!arg_dump_core)
118                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
119         else {
120                 struct sigaction sa = {
121                         .sa_handler = nop_handler,
122                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
123                 };
124                 pid_t pid;
125
126                 /* We want to wait for the core process, hence let's enable SIGCHLD */
127                 sigaction(SIGCHLD, &sa, NULL);
128
129                 pid = fork();
130                 if (pid < 0)
131                         log_error("Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
132
133                 else if (pid == 0) {
134                         struct rlimit rl = {};
135
136                         /* Enable default signal handler for core dump */
137                         zero(sa);
138                         sa.sa_handler = SIG_DFL;
139                         sigaction(sig, &sa, NULL);
140
141                         /* Don't limit the core dump size */
142                         rl.rlim_cur = RLIM_INFINITY;
143                         rl.rlim_max = RLIM_INFINITY;
144                         setrlimit(RLIMIT_CORE, &rl);
145
146                         /* Just to be sure... */
147                         chdir("/");
148
149                         /* Raise the signal again */
150                         raise(sig);
151
152                         assert_not_reached("We shouldn't be here...");
153                         _exit(1);
154
155                 } else {
156                         siginfo_t status;
157                         int r;
158
159                         /* Order things nicely. */
160                         r = wait_for_terminate(pid, &status);
161                         if (r < 0)
162                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
163                         else if (status.si_code != CLD_DUMPED)
164                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
165                         else
166                                 log_error("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
167                 }
168         }
169
170         if (arg_crash_chvt)
171                 chvt(arg_crash_chvt);
172
173         if (arg_crash_shell) {
174                 struct sigaction sa = {
175                         .sa_handler = SIG_IGN,
176                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
177                 };
178                 pid_t pid;
179
180                 log_info("Executing crash shell in 10s...");
181                 sleep(10);
182
183                 /* Let the kernel reap children for us */
184                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
185
186                 pid = fork();
187                 if (pid < 0)
188                         log_error("Failed to fork off crash shell: %m");
189                 else if (pid == 0) {
190                         make_console_stdio();
191                         execl("/bin/sh", "/bin/sh", NULL);
192
193                         log_error("execl() failed: %m");
194                         _exit(1);
195                 }
196
197                 log_info("Successfully spawned crash shell as pid "PID_FMT".", pid);
198         }
199
200         log_info("Freezing execution.");
201         freeze();
202 }
203
204 static void install_crash_handler(void) {
205         struct sigaction sa = {
206                 .sa_handler = crash,
207                 .sa_flags = SA_NODEFER,
208         };
209
210         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
211 }
212
213 static int console_setup(bool do_reset) {
214         int tty_fd, r;
215
216         /* If we are init, we connect stdin/stdout/stderr to /dev/null
217          * and make sure we don't have a controlling tty. */
218
219         release_terminal();
220
221         if (!do_reset)
222                 return 0;
223
224         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
225         if (tty_fd < 0) {
226                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
227                 return -tty_fd;
228         }
229
230         /* We don't want to force text mode.
231          * plymouth may be showing pictures already from initrd. */
232         r = reset_terminal_fd(tty_fd, false);
233         if (r < 0)
234                 log_error("Failed to reset /dev/console: %s", strerror(-r));
235
236         close_nointr_nofail(tty_fd);
237         return r;
238 }
239
240 static int set_default_unit(const char *u) {
241         char *c;
242
243         assert(u);
244
245         c = strdup(u);
246         if (!c)
247                 return -ENOMEM;
248
249         free(arg_default_unit);
250         arg_default_unit = c;
251
252         return 0;
253 }
254
255 static int parse_proc_cmdline_word(const char *word) {
256
257         static const char * const rlmap[] = {
258                 "emergency", SPECIAL_EMERGENCY_TARGET,
259                 "-b",        SPECIAL_EMERGENCY_TARGET,
260                 "single",    SPECIAL_RESCUE_TARGET,
261                 "-s",        SPECIAL_RESCUE_TARGET,
262                 "s",         SPECIAL_RESCUE_TARGET,
263                 "S",         SPECIAL_RESCUE_TARGET,
264                 "1",         SPECIAL_RESCUE_TARGET,
265                 "2",         SPECIAL_RUNLEVEL2_TARGET,
266                 "3",         SPECIAL_RUNLEVEL3_TARGET,
267                 "4",         SPECIAL_RUNLEVEL4_TARGET,
268                 "5",         SPECIAL_RUNLEVEL5_TARGET,
269         };
270
271         assert(word);
272
273         if (startswith(word, "systemd.unit=")) {
274
275                 if (!in_initrd())
276                         return set_default_unit(word + 13);
277
278         } else if (startswith(word, "rd.systemd.unit=")) {
279
280                 if (in_initrd())
281                         return set_default_unit(word + 16);
282
283         } else if (startswith(word, "systemd.log_target=")) {
284
285                 if (log_set_target_from_string(word + 19) < 0)
286                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
287
288         } else if (startswith(word, "systemd.log_level=")) {
289
290                 if (log_set_max_level_from_string(word + 18) < 0)
291                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
292
293         } else if (startswith(word, "systemd.log_color=")) {
294
295                 if (log_show_color_from_string(word + 18) < 0)
296                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
297
298         } else if (startswith(word, "systemd.log_location=")) {
299
300                 if (log_show_location_from_string(word + 21) < 0)
301                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
302
303         } else if (startswith(word, "systemd.dump_core=")) {
304                 int r;
305
306                 if ((r = parse_boolean(word + 18)) < 0)
307                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
308                 else
309                         arg_dump_core = r;
310
311         } else if (startswith(word, "systemd.crash_shell=")) {
312                 int r;
313
314                 if ((r = parse_boolean(word + 20)) < 0)
315                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
316                 else
317                         arg_crash_shell = r;
318
319         } else if (startswith(word, "systemd.confirm_spawn=")) {
320                 int r;
321
322                 if ((r = parse_boolean(word + 22)) < 0)
323                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
324                 else
325                         arg_confirm_spawn = r;
326
327         } else if (startswith(word, "systemd.crash_chvt=")) {
328                 int k;
329
330                 if (safe_atoi(word + 19, &k) < 0)
331                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
332                 else
333                         arg_crash_chvt = k;
334
335         } else if (startswith(word, "systemd.show_status=")) {
336                 int r;
337
338                 r = parse_show_status(word + 20, &arg_show_status);
339                 if (r < 0)
340                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
341         } else if (startswith(word, "systemd.default_standard_output=")) {
342                 int r;
343
344                 if ((r = exec_output_from_string(word + 32)) < 0)
345                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
346                 else
347                         arg_default_std_output = r;
348         } else if (startswith(word, "systemd.default_standard_error=")) {
349                 int r;
350
351                 if ((r = exec_output_from_string(word + 31)) < 0)
352                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
353                 else
354                         arg_default_std_error = r;
355         } else if (startswith(word, "systemd.setenv=")) {
356                 _cleanup_free_ char *cenv = NULL;
357
358                 cenv = strdup(word + 15);
359                 if (!cenv)
360                         return -ENOMEM;
361
362                 if (env_assignment_is_valid(cenv)) {
363                         char **env;
364
365                         env = strv_env_set(arg_default_environment, cenv);
366                         if (env)
367                                 arg_default_environment = env;
368                         else
369                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
370                 } else
371                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
372
373         } else if (startswith(word, "systemd.") ||
374                    (in_initrd() && startswith(word, "rd.systemd."))) {
375
376                 const char *c;
377
378                 /* Ignore systemd.journald.xyz and friends */
379                 c = word;
380                 if (startswith(c, "rd."))
381                         c += 3;
382                 if (startswith(c, "systemd."))
383                         c += 8;
384                 if (c[strcspn(c, ".=")] != '.')  {
385
386                         log_warning("Unknown kernel switch %s. Ignoring.", word);
387
388                         log_info("Supported kernel switches:\n"
389                                  "systemd.unit=UNIT                        Default unit to start\n"
390                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
391                                  "systemd.dump_core=0|1                    Dump core on crash\n"
392                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
393                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
394                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
395                                  "systemd.show_status=0|1|auto             Show status updates on the console during bootup\n"
396                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
397                                  "                                         Log target\n"
398                                  "systemd.log_level=LEVEL                  Log level\n"
399                                  "systemd.log_color=0|1                    Highlight important log messages\n"
400                                  "systemd.log_location=0|1                 Include code location in log messages\n"
401                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
402                                  "                                         Set default log output for services\n"
403                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
404                                  "                                         Set default log error output for services\n"
405                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
406                 }
407
408         } else if (streq(word, "quiet")) {
409                 if (arg_show_status == SHOW_STATUS_UNSET)
410                         arg_show_status = SHOW_STATUS_AUTO;
411         } else if (streq(word, "debug")) {
412                 /* Log to kmsg, the journal socket will fill up before the
413                  * journal is started and tools running during that time
414                  * will block with every log message for for 60 seconds,
415                  * before they give up. */
416                 log_set_max_level(LOG_DEBUG);
417                 log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG);
418         } else if (!in_initrd()) {
419                 unsigned i;
420
421                 /* SysV compatibility */
422                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
423                         if (streq(word, rlmap[i]))
424                                 return set_default_unit(rlmap[i+1]);
425         }
426
427         return 0;
428 }
429
430 #define DEFINE_SETTER(name, func, descr)                              \
431         static int name(const char *unit,                             \
432                         const char *filename,                         \
433                         unsigned line,                                \
434                         const char *section,                          \
435                         unsigned section_line,                        \
436                         const char *lvalue,                           \
437                         int ltype,                                    \
438                         const char *rvalue,                           \
439                         void *data,                                   \
440                         void *userdata) {                             \
441                                                                       \
442                 int r;                                                \
443                                                                       \
444                 assert(filename);                                     \
445                 assert(lvalue);                                       \
446                 assert(rvalue);                                       \
447                                                                       \
448                 r = func(rvalue);                                     \
449                 if (r < 0)                                            \
450                         log_syntax(unit, LOG_ERR, filename, line, -r, \
451                                    "Invalid " descr "'%s': %s",       \
452                                    rvalue, strerror(-r));             \
453                                                                       \
454                 return 0;                                             \
455         }
456
457 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
458 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
459 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
460 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
461
462 static int config_parse_cpu_affinity2(const char *unit,
463                                       const char *filename,
464                                       unsigned line,
465                                       const char *section,
466                                       unsigned section_line,
467                                       const char *lvalue,
468                                       int ltype,
469                                       const char *rvalue,
470                                       void *data,
471                                       void *userdata) {
472
473         char *w;
474         size_t l;
475         char *state;
476         cpu_set_t *c = NULL;
477         unsigned ncpus = 0;
478
479         assert(filename);
480         assert(lvalue);
481         assert(rvalue);
482
483         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
484                 char *t;
485                 int r;
486                 unsigned cpu;
487
488                 if (!(t = strndup(w, l)))
489                         return log_oom();
490
491                 r = safe_atou(t, &cpu);
492                 free(t);
493
494                 if (!c)
495                         if (!(c = cpu_set_malloc(&ncpus)))
496                                 return log_oom();
497
498                 if (r < 0 || cpu >= ncpus) {
499                         log_syntax(unit, LOG_ERR, filename, line, -r,
500                                    "Failed to parse CPU affinity '%s'", rvalue);
501                         CPU_FREE(c);
502                         return -EBADMSG;
503                 }
504
505                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
506         }
507
508         if (c) {
509                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
510                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
511
512                 CPU_FREE(c);
513         }
514
515         return 0;
516 }
517
518 static void strv_free_free(char ***l) {
519         char ***i;
520
521         if (!l)
522                 return;
523
524         for (i = l; *i; i++)
525                 strv_free(*i);
526
527         free(l);
528 }
529
530 static void free_join_controllers(void) {
531         strv_free_free(arg_join_controllers);
532         arg_join_controllers = NULL;
533 }
534
535 static int config_parse_join_controllers(const char *unit,
536                                          const char *filename,
537                                          unsigned line,
538                                          const char *section,
539                                          unsigned section_line,
540                                          const char *lvalue,
541                                          int ltype,
542                                          const char *rvalue,
543                                          void *data,
544                                          void *userdata) {
545
546         unsigned n = 0;
547         char *state, *w;
548         size_t length;
549
550         assert(filename);
551         assert(lvalue);
552         assert(rvalue);
553
554         free_join_controllers();
555
556         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
557                 char *s, **l;
558
559                 s = strndup(w, length);
560                 if (!s)
561                         return log_oom();
562
563                 l = strv_split(s, ",");
564                 free(s);
565
566                 strv_uniq(l);
567
568                 if (strv_length(l) <= 1) {
569                         strv_free(l);
570                         continue;
571                 }
572
573                 if (!arg_join_controllers) {
574                         arg_join_controllers = new(char**, 2);
575                         if (!arg_join_controllers) {
576                                 strv_free(l);
577                                 return log_oom();
578                         }
579
580                         arg_join_controllers[0] = l;
581                         arg_join_controllers[1] = NULL;
582
583                         n = 1;
584                 } else {
585                         char ***a;
586                         char ***t;
587
588                         t = new0(char**, n+2);
589                         if (!t) {
590                                 strv_free(l);
591                                 return log_oom();
592                         }
593
594                         n = 0;
595
596                         for (a = arg_join_controllers; *a; a++) {
597
598                                 if (strv_overlap(*a, l)) {
599                                         if (strv_extend_strv(&l, *a) < 0) {
600                                                 strv_free(l);
601                                                 strv_free_free(t);
602                                                 return log_oom();
603                                         }
604
605                                 } else {
606                                         char **c;
607
608                                         c = strv_copy(*a);
609                                         if (!c) {
610                                                 strv_free(l);
611                                                 strv_free_free(t);
612                                                 return log_oom();
613                                         }
614
615                                         t[n++] = c;
616                                 }
617                         }
618
619                         t[n++] = strv_uniq(l);
620
621                         strv_free_free(arg_join_controllers);
622                         arg_join_controllers = t;
623                 }
624         }
625
626         return 0;
627 }
628
629 static int parse_config_file(void) {
630
631         const ConfigTableItem items[] = {
632                 { "Manager", "LogLevel",                  config_parse_level2,           0, NULL                                   },
633                 { "Manager", "LogTarget",                 config_parse_target,           0, NULL                                   },
634                 { "Manager", "LogColor",                  config_parse_color,            0, NULL                                   },
635                 { "Manager", "LogLocation",               config_parse_location,         0, NULL                                   },
636                 { "Manager", "DumpCore",                  config_parse_bool,             0, &arg_dump_core                         },
637                 { "Manager", "CrashShell",                config_parse_bool,             0, &arg_crash_shell                       },
638                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
639                 { "Manager", "CrashChVT",                 config_parse_int,              0, &arg_crash_chvt                        },
640                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, NULL                                   },
641                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
642                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
643                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
644                 { "Manager", "CapabilityBoundingSet",     config_parse_bounding_set,     0, &arg_capability_bounding_set_drop      },
645 #ifdef HAVE_SECCOMP
646                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
647 #endif
648                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
649                 { "Manager", "DefaultStandardOutput",     config_parse_output,           0, &arg_default_std_output                },
650                 { "Manager", "DefaultStandardError",      config_parse_output,           0, &arg_default_std_error                 },
651                 { "Manager", "DefaultTimeoutStartSec",    config_parse_sec,              0, &arg_default_timeout_start_usec        },
652                 { "Manager", "DefaultTimeoutStopSec",     config_parse_sec,              0, &arg_default_timeout_stop_usec         },
653                 { "Manager", "DefaultRestartSec",         config_parse_sec,              0, &arg_default_restart_usec              },
654                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,              0, &arg_default_start_limit_interval      },
655                 { "Manager", "DefaultStartLimitBurst",    config_parse_unsigned,         0, &arg_default_start_limit_burst         },
656                 { "Manager", "DefaultEnvironment",        config_parse_environ,          0, &arg_default_environment               },
657                 { "Manager", "DefaultLimitCPU",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CPU]        },
658                 { "Manager", "DefaultLimitFSIZE",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_FSIZE]      },
659                 { "Manager", "DefaultLimitDATA",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_DATA]       },
660                 { "Manager", "DefaultLimitSTACK",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_STACK]      },
661                 { "Manager", "DefaultLimitCORE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CORE]       },
662                 { "Manager", "DefaultLimitRSS",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RSS]        },
663                 { "Manager", "DefaultLimitNOFILE",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NOFILE]     },
664                 { "Manager", "DefaultLimitAS",            config_parse_limit,            0, &arg_default_rlimit[RLIMIT_AS]         },
665                 { "Manager", "DefaultLimitNPROC",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NPROC]      },
666                 { "Manager", "DefaultLimitMEMLOCK",       config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MEMLOCK]    },
667                 { "Manager", "DefaultLimitLOCKS",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_LOCKS]      },
668                 { "Manager", "DefaultLimitSIGPENDING",    config_parse_limit,            0, &arg_default_rlimit[RLIMIT_SIGPENDING] },
669                 { "Manager", "DefaultLimitMSGQUEUE",      config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MSGQUEUE]   },
670                 { "Manager", "DefaultLimitNICE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NICE]       },
671                 { "Manager", "DefaultLimitRTPRIO",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTPRIO]     },
672                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTTIME]     },
673                 {}
674         };
675
676         _cleanup_fclose_ FILE *f;
677         const char *fn;
678         int r;
679
680         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
681         f = fopen(fn, "re");
682         if (!f) {
683                 if (errno == ENOENT)
684                         return 0;
685
686                 log_warning("Failed to open configuration file '%s': %m", fn);
687                 return 0;
688         }
689
690         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
691         if (r < 0)
692                 log_warning("Failed to parse configuration file: %s", strerror(-r));
693
694         return 0;
695 }
696
697 static int parse_argv(int argc, char *argv[]) {
698
699         enum {
700                 ARG_LOG_LEVEL = 0x100,
701                 ARG_LOG_TARGET,
702                 ARG_LOG_COLOR,
703                 ARG_LOG_LOCATION,
704                 ARG_UNIT,
705                 ARG_SYSTEM,
706                 ARG_USER,
707                 ARG_TEST,
708                 ARG_VERSION,
709                 ARG_DUMP_CONFIGURATION_ITEMS,
710                 ARG_DUMP_CORE,
711                 ARG_CRASH_SHELL,
712                 ARG_CONFIRM_SPAWN,
713                 ARG_SHOW_STATUS,
714                 ARG_DESERIALIZE,
715                 ARG_SWITCHED_ROOT,
716                 ARG_DEFAULT_STD_OUTPUT,
717                 ARG_DEFAULT_STD_ERROR
718         };
719
720         static const struct option options[] = {
721                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
722                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
723                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
724                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
725                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
726                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
727                 { "user",                     no_argument,       NULL, ARG_USER                     },
728                 { "test",                     no_argument,       NULL, ARG_TEST                     },
729                 { "help",                     no_argument,       NULL, 'h'                          },
730                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
731                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
732                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
733                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
734                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
735                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
736                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
737                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
738                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
739                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
740                 { NULL,                       0,                 NULL, 0                            }
741         };
742
743         int c, r;
744
745         assert(argc >= 1);
746         assert(argv);
747
748         if (getpid() == 1)
749                 opterr = 0;
750
751         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
752
753                 switch (c) {
754
755                 case ARG_LOG_LEVEL:
756                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
757                                 log_error("Failed to parse log level %s.", optarg);
758                                 return r;
759                         }
760
761                         break;
762
763                 case ARG_LOG_TARGET:
764
765                         if ((r = log_set_target_from_string(optarg)) < 0) {
766                                 log_error("Failed to parse log target %s.", optarg);
767                                 return r;
768                         }
769
770                         break;
771
772                 case ARG_LOG_COLOR:
773
774                         if (optarg) {
775                                 if ((r = log_show_color_from_string(optarg)) < 0) {
776                                         log_error("Failed to parse log color setting %s.", optarg);
777                                         return r;
778                                 }
779                         } else
780                                 log_show_color(true);
781
782                         break;
783
784                 case ARG_LOG_LOCATION:
785
786                         if (optarg) {
787                                 if ((r = log_show_location_from_string(optarg)) < 0) {
788                                         log_error("Failed to parse log location setting %s.", optarg);
789                                         return r;
790                                 }
791                         } else
792                                 log_show_location(true);
793
794                         break;
795
796                 case ARG_DEFAULT_STD_OUTPUT:
797
798                         if ((r = exec_output_from_string(optarg)) < 0) {
799                                 log_error("Failed to parse default standard output setting %s.", optarg);
800                                 return r;
801                         } else
802                                 arg_default_std_output = r;
803                         break;
804
805                 case ARG_DEFAULT_STD_ERROR:
806
807                         if ((r = exec_output_from_string(optarg)) < 0) {
808                                 log_error("Failed to parse default standard error output setting %s.", optarg);
809                                 return r;
810                         } else
811                                 arg_default_std_error = r;
812                         break;
813
814                 case ARG_UNIT:
815
816                         if ((r = set_default_unit(optarg)) < 0) {
817                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
818                                 return r;
819                         }
820
821                         break;
822
823                 case ARG_SYSTEM:
824                         arg_running_as = SYSTEMD_SYSTEM;
825                         break;
826
827                 case ARG_USER:
828                         arg_running_as = SYSTEMD_USER;
829                         break;
830
831                 case ARG_TEST:
832                         arg_action = ACTION_TEST;
833                         break;
834
835                 case ARG_VERSION:
836                         arg_action = ACTION_VERSION;
837                         break;
838
839                 case ARG_DUMP_CONFIGURATION_ITEMS:
840                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
841                         break;
842
843                 case ARG_DUMP_CORE:
844                         r = optarg ? parse_boolean(optarg) : 1;
845                         if (r < 0) {
846                                 log_error("Failed to parse dump core boolean %s.", optarg);
847                                 return r;
848                         }
849                         arg_dump_core = r;
850                         break;
851
852                 case ARG_CRASH_SHELL:
853                         r = optarg ? parse_boolean(optarg) : 1;
854                         if (r < 0) {
855                                 log_error("Failed to parse crash shell boolean %s.", optarg);
856                                 return r;
857                         }
858                         arg_crash_shell = r;
859                         break;
860
861                 case ARG_CONFIRM_SPAWN:
862                         r = optarg ? parse_boolean(optarg) : 1;
863                         if (r < 0) {
864                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
865                                 return r;
866                         }
867                         arg_confirm_spawn = r;
868                         break;
869
870                 case ARG_SHOW_STATUS:
871                         if (optarg) {
872                                 r = parse_show_status(optarg, &arg_show_status);
873                                 if (r < 0) {
874                                         log_error("Failed to parse show status boolean %s.", optarg);
875                                         return r;
876                                 }
877                         } else
878                                 arg_show_status = SHOW_STATUS_YES;
879                         break;
880
881                 case ARG_DESERIALIZE: {
882                         int fd;
883                         FILE *f;
884
885                         r = safe_atoi(optarg, &fd);
886                         if (r < 0 || fd < 0) {
887                                 log_error("Failed to parse deserialize option %s.", optarg);
888                                 return r < 0 ? r : -EINVAL;
889                         }
890
891                         fd_cloexec(fd, true);
892
893                         f = fdopen(fd, "r");
894                         if (!f) {
895                                 log_error("Failed to open serialization fd: %m");
896                                 return -errno;
897                         }
898
899                         if (arg_serialization)
900                                 fclose(arg_serialization);
901
902                         arg_serialization = f;
903
904                         break;
905                 }
906
907                 case ARG_SWITCHED_ROOT:
908                         arg_switched_root = true;
909                         break;
910
911                 case 'h':
912                         arg_action = ACTION_HELP;
913                         break;
914
915                 case 'D':
916                         log_set_max_level(LOG_DEBUG);
917                         break;
918
919                 case 'b':
920                 case 's':
921                 case 'z':
922                         /* Just to eat away the sysvinit kernel
923                          * cmdline args without getopt() error
924                          * messages that we'll parse in
925                          * parse_proc_cmdline_word() or ignore. */
926
927                 case '?':
928                 default:
929                         if (getpid() != 1) {
930                                 log_error("Unknown option code %c", c);
931                                 return -EINVAL;
932                         }
933
934                         break;
935                 }
936
937         if (optind < argc && getpid() != 1) {
938                 /* Hmm, when we aren't run as init system
939                  * let's complain about excess arguments */
940
941                 log_error("Excess arguments.");
942                 return -EINVAL;
943         }
944
945         if (detect_container(NULL) > 0) {
946                 char **a;
947
948                 /* All /proc/cmdline arguments the kernel didn't
949                  * understand it passed to us. We're not really
950                  * interested in that usually since /proc/cmdline is
951                  * more interesting and complete. With one exception:
952                  * if we are run in a container /proc/cmdline is not
953                  * relevant for the container, hence we rely on argv[]
954                  * instead. */
955
956                 for (a = argv; a < argv + argc; a++) {
957                         r = parse_proc_cmdline_word(*a);
958                         if (r < 0) {
959                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
960                                 return r;
961                         }
962                 }
963         }
964
965         return 0;
966 }
967
968 static int help(void) {
969
970         printf("%s [OPTIONS...]\n\n"
971                "Starts up and maintains the system or user services.\n\n"
972                "  -h --help                      Show this help\n"
973                "     --test                      Determine startup sequence, dump it and exit\n"
974                "     --dump-configuration-items  Dump understood unit configuration items\n"
975                "     --unit=UNIT                 Set default unit\n"
976                "     --system                    Run a system instance, even if PID != 1\n"
977                "     --user                      Run a user instance\n"
978                "     --dump-core[=0|1]           Dump core on crash\n"
979                "     --crash-shell[=0|1]         Run shell on crash\n"
980                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
981                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
982                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
983                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
984                "     --log-color[=0|1]           Highlight important log messages\n"
985                "     --log-location[=0|1]        Include code location in log messages\n"
986                "     --default-standard-output=  Set default standard output for services\n"
987                "     --default-standard-error=   Set default standard error output for services\n",
988                program_invocation_short_name);
989
990         return 0;
991 }
992
993 static int version(void) {
994         puts(PACKAGE_STRING);
995         puts(SYSTEMD_FEATURES);
996
997         return 0;
998 }
999
1000 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1001         FILE *f = NULL;
1002         FDSet *fds = NULL;
1003         int r;
1004
1005         assert(m);
1006         assert(_f);
1007         assert(_fds);
1008
1009         r = manager_open_serialization(m, &f);
1010         if (r < 0) {
1011                 log_error("Failed to create serialization file: %s", strerror(-r));
1012                 goto fail;
1013         }
1014
1015         /* Make sure nothing is really destructed when we shut down */
1016         m->n_reloading ++;
1017         bus_manager_send_reloading(m, true);
1018
1019         fds = fdset_new();
1020         if (!fds) {
1021                 r = -ENOMEM;
1022                 log_error("Failed to allocate fd set: %s", strerror(-r));
1023                 goto fail;
1024         }
1025
1026         r = manager_serialize(m, f, fds, switching_root);
1027         if (r < 0) {
1028                 log_error("Failed to serialize state: %s", strerror(-r));
1029                 goto fail;
1030         }
1031
1032         if (fseeko(f, 0, SEEK_SET) < 0) {
1033                 log_error("Failed to rewind serialization fd: %m");
1034                 goto fail;
1035         }
1036
1037         r = fd_cloexec(fileno(f), false);
1038         if (r < 0) {
1039                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1040                 goto fail;
1041         }
1042
1043         r = fdset_cloexec(fds, false);
1044         if (r < 0) {
1045                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1046                 goto fail;
1047         }
1048
1049         *_f = f;
1050         *_fds = fds;
1051
1052         return 0;
1053
1054 fail:
1055         fdset_free(fds);
1056
1057         if (f)
1058                 fclose(f);
1059
1060         return r;
1061 }
1062
1063 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1064         struct rlimit nl;
1065         int r;
1066
1067         assert(saved_rlimit);
1068
1069         /* Save the original RLIMIT_NOFILE so that we can reset it
1070          * later when transitioning from the initrd to the main
1071          * systemd or suchlike. */
1072         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1073                 log_error("Reading RLIMIT_NOFILE failed: %m");
1074                 return -errno;
1075         }
1076
1077         /* Make sure forked processes get the default kernel setting */
1078         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1079                 struct rlimit *rl;
1080
1081                 rl = newdup(struct rlimit, saved_rlimit, 1);
1082                 if (!rl)
1083                         return log_oom();
1084
1085                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1086         }
1087
1088         /* Bump up the resource limit for ourselves substantially */
1089         nl.rlim_cur = nl.rlim_max = 64*1024;
1090         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1091         if (r < 0) {
1092                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1093                 return r;
1094         }
1095
1096         return 0;
1097 }
1098
1099 static void test_mtab(void) {
1100         char *p;
1101
1102         /* Check that /etc/mtab is a symlink */
1103
1104         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1105                 bool b;
1106
1107                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1108                 free(p);
1109
1110                 if (b)
1111                         return;
1112         }
1113
1114         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1115                     "This is not supported anymore. "
1116                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1117 }
1118
1119 static void test_usr(void) {
1120
1121         /* Check that /usr is not a separate fs */
1122
1123         if (dir_is_empty("/usr") <= 0)
1124                 return;
1125
1126         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1127                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1128                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1129 }
1130
1131 static void test_cgroups(void) {
1132
1133         if (access("/proc/cgroups", F_OK) >= 0)
1134                 return;
1135
1136         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1137                     "Systems without control groups are not supported. "
1138                     "We will now sleep for 10s, and then continue boot-up. "
1139                     "Expect breakage and please do not file bugs. "
1140                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1141                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1142
1143         sleep(10);
1144 }
1145
1146 static int initialize_join_controllers(void) {
1147         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1148          * + "net_prio". We'd like to add "cpuset" to the mix, but
1149          * "cpuset" does't really work for groups with no initialized
1150          * attributes. */
1151
1152         arg_join_controllers = new(char**, 3);
1153         if (!arg_join_controllers)
1154                 return -ENOMEM;
1155
1156         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1157         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1158         arg_join_controllers[2] = NULL;
1159
1160         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1161                 free_join_controllers();
1162                 return -ENOMEM;
1163         }
1164
1165         return 0;
1166 }
1167
1168 static int enforce_syscall_archs(Set *archs) {
1169 #ifdef HAVE_SECCOMP
1170         scmp_filter_ctx *seccomp;
1171         Iterator i;
1172         void *id;
1173         int r;
1174
1175         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1176         if (!seccomp)
1177                 return log_oom();
1178
1179         SET_FOREACH(id, arg_syscall_archs, i) {
1180                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1181                 if (r == -EEXIST)
1182                         continue;
1183                 if (r < 0) {
1184                         log_error("Failed to add architecture to seccomp: %s", strerror(-r));
1185                         goto finish;
1186                 }
1187         }
1188
1189         r = seccomp_load(seccomp);
1190         if (r < 0)
1191                 log_error("Failed to add install architecture seccomp: %s", strerror(-r));
1192
1193 finish:
1194         seccomp_release(seccomp);
1195         return r;
1196 #else
1197         return 0;
1198 #endif
1199 }
1200
1201 int main(int argc, char *argv[]) {
1202         Manager *m = NULL;
1203         int r, retval = EXIT_FAILURE;
1204         usec_t before_startup, after_startup;
1205         char timespan[FORMAT_TIMESPAN_MAX];
1206         FDSet *fds = NULL;
1207         bool reexecute = false;
1208         const char *shutdown_verb = NULL;
1209         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1210         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1211         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1212         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1213         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1214         static char systemd[] = "systemd";
1215         bool skip_setup = false;
1216         unsigned j;
1217         bool loaded_policy = false;
1218         bool arm_reboot_watchdog = false;
1219         bool queue_default_job = false;
1220         char *switch_root_dir = NULL, *switch_root_init = NULL;
1221         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1222
1223 #ifdef HAVE_SYSV_COMPAT
1224         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1225                 /* This is compatibility support for SysV, where
1226                  * calling init as a user is identical to telinit. */
1227
1228                 errno = -ENOENT;
1229                 execv(SYSTEMCTL_BINARY_PATH, argv);
1230                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1231                 return 1;
1232         }
1233 #endif
1234
1235         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1236         dual_timestamp_get(&userspace_timestamp);
1237
1238         /* Determine if this is a reexecution or normal bootup. We do
1239          * the full command line parsing much later, so let's just
1240          * have a quick peek here. */
1241         if (strv_find(argv+1, "--deserialize"))
1242                 skip_setup = true;
1243
1244         /* If we have switched root, do all the special setup
1245          * things */
1246         if (strv_find(argv+1, "--switched-root"))
1247                 skip_setup = false;
1248
1249         /* If we get started via the /sbin/init symlink then we are
1250            called 'init'. After a subsequent reexecution we are then
1251            called 'systemd'. That is confusing, hence let's call us
1252            systemd right-away. */
1253         program_invocation_short_name = systemd;
1254         prctl(PR_SET_NAME, systemd);
1255
1256         saved_argv = argv;
1257         saved_argc = argc;
1258
1259         log_show_color(isatty(STDERR_FILENO) > 0);
1260
1261         /* Disable the umask logic */
1262         if (getpid() == 1)
1263                 umask(0);
1264
1265         if (getpid() == 1 && detect_container(NULL) <= 0) {
1266
1267                 /* Running outside of a container as PID 1 */
1268                 arg_running_as = SYSTEMD_SYSTEM;
1269                 make_null_stdio();
1270                 log_set_target(LOG_TARGET_KMSG);
1271                 log_open();
1272
1273                 if (in_initrd())
1274                         initrd_timestamp = userspace_timestamp;
1275
1276                 if (!skip_setup) {
1277                         mount_setup_early();
1278                         dual_timestamp_get(&security_start_timestamp);
1279                         if (selinux_setup(&loaded_policy) < 0)
1280                                 goto finish;
1281                         if (ima_setup() < 0)
1282                                 goto finish;
1283                         if (smack_setup() < 0)
1284                                 goto finish;
1285                         dual_timestamp_get(&security_finish_timestamp);
1286                 }
1287
1288                 if (label_init(NULL) < 0)
1289                         goto finish;
1290
1291                 if (!skip_setup) {
1292                         if (hwclock_is_localtime() > 0) {
1293                                 int min;
1294
1295                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1296                                 r = hwclock_set_timezone(&min);
1297                                 if (r < 0)
1298                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1299                                 else
1300                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1301                         } else if (!in_initrd()) {
1302                                 /*
1303                                  * Do dummy first-time call to seal the kernel's time warp magic
1304                                  *
1305                                  * Do not call this this from inside the initrd. The initrd might not
1306                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1307                                  * that way. In such case, we need to delay the time-warp or the sealing
1308                                  * until we reach the real system.
1309                                  */
1310                                 hwclock_reset_timezone();
1311
1312                                 /* Tell the kernel our timezone */
1313                                 r = hwclock_set_timezone(NULL);
1314                                 if (r < 0)
1315                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1316                         }
1317                 }
1318
1319                 /* Set the default for later on, but don't actually
1320                  * open the logs like this for now. Note that if we
1321                  * are transitioning from the initrd there might still
1322                  * be journal fd open, and we shouldn't attempt
1323                  * opening that before we parsed /proc/cmdline which
1324                  * might redirect output elsewhere. */
1325                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1326
1327         } else if (getpid() == 1) {
1328                 /* Running inside a container, as PID 1 */
1329                 arg_running_as = SYSTEMD_SYSTEM;
1330                 log_set_target(LOG_TARGET_CONSOLE);
1331                 log_close_console(); /* force reopen of /dev/console */
1332                 log_open();
1333
1334                 /* For the later on, see above... */
1335                 log_set_target(LOG_TARGET_JOURNAL);
1336
1337                 /* clear the kernel timestamp,
1338                  * because we are in a container */
1339                 kernel_timestamp.monotonic = 0ULL;
1340                 kernel_timestamp.realtime = 0ULL;
1341
1342         } else {
1343                 /* Running as user instance */
1344                 arg_running_as = SYSTEMD_USER;
1345                 log_set_target(LOG_TARGET_AUTO);
1346                 log_open();
1347
1348                 /* clear the kernel timestamp,
1349                  * because we are not PID 1 */
1350                 kernel_timestamp.monotonic = 0ULL;
1351                 kernel_timestamp.realtime = 0ULL;
1352         }
1353
1354         /* Initialize default unit */
1355         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1356         if (r < 0) {
1357                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1358                 goto finish;
1359         }
1360
1361         r = initialize_join_controllers();
1362         if (r < 0)
1363                 goto finish;
1364
1365         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1366          * /proc/$PID/fd is available. */
1367         if (getpid() == 1) {
1368                 r = mount_setup(loaded_policy);
1369                 if (r < 0)
1370                         goto finish;
1371         }
1372
1373         /* Reset all signal handlers. */
1374         assert_se(reset_all_signal_handlers() == 0);
1375
1376         ignore_signals(SIGNALS_IGNORE, -1);
1377
1378         if (parse_config_file() < 0)
1379                 goto finish;
1380
1381         if (arg_running_as == SYSTEMD_SYSTEM)
1382                 if (parse_proc_cmdline(parse_proc_cmdline_word) < 0)
1383                         goto finish;
1384
1385         log_parse_environment();
1386
1387         if (parse_argv(argc, argv) < 0)
1388                 goto finish;
1389
1390         if (arg_action == ACTION_TEST &&
1391             geteuid() == 0) {
1392                 log_error("Don't run test mode as root.");
1393                 goto finish;
1394         }
1395
1396         if (arg_running_as == SYSTEMD_USER &&
1397             arg_action == ACTION_RUN &&
1398             sd_booted() <= 0) {
1399                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1400                 goto finish;
1401         }
1402
1403         if (arg_running_as == SYSTEMD_SYSTEM &&
1404             arg_action == ACTION_RUN &&
1405             running_in_chroot() > 0) {
1406                 log_error("Cannot be run in a chroot() environment.");
1407                 goto finish;
1408         }
1409
1410         if (arg_action == ACTION_HELP) {
1411                 retval = help();
1412                 goto finish;
1413         } else if (arg_action == ACTION_VERSION) {
1414                 retval = version();
1415                 goto finish;
1416         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1417                 unit_dump_config_items(stdout);
1418                 retval = EXIT_SUCCESS;
1419                 goto finish;
1420         } else if (arg_action == ACTION_DONE) {
1421                 retval = EXIT_SUCCESS;
1422                 goto finish;
1423         }
1424
1425         if (arg_running_as == SYSTEMD_USER &&
1426             !getenv("XDG_RUNTIME_DIR")) {
1427                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1428                 goto finish;
1429         }
1430
1431         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1432
1433         /* Close logging fds, in order not to confuse fdset below */
1434         log_close();
1435
1436         /* Remember open file descriptors for later deserialization */
1437         r = fdset_new_fill(&fds);
1438         if (r < 0) {
1439                 log_error("Failed to allocate fd set: %s", strerror(-r));
1440                 goto finish;
1441         } else
1442                 fdset_cloexec(fds, true);
1443
1444         if (arg_serialization)
1445                 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
1446
1447         if (arg_running_as == SYSTEMD_SYSTEM)
1448                 /* Become a session leader if we aren't one yet. */
1449                 setsid();
1450
1451         /* Move out of the way, so that we won't block unmounts */
1452         assert_se(chdir("/")  == 0);
1453
1454         /* Reset the console, but only if this is really init and we
1455          * are freshly booted */
1456         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1457                 console_setup(getpid() == 1 && !skip_setup);
1458
1459         /* Open the logging devices, if possible and necessary */
1460         log_open();
1461
1462         /* Make sure we leave a core dump without panicing the
1463          * kernel. */
1464         if (getpid() == 1) {
1465                 install_crash_handler();
1466
1467                 r = mount_cgroup_controllers(arg_join_controllers);
1468                 if (r < 0)
1469                         goto finish;
1470         }
1471
1472         if (arg_running_as == SYSTEMD_SYSTEM) {
1473                 const char *virtualization = NULL;
1474
1475                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1476
1477                 detect_virtualization(&virtualization);
1478                 if (virtualization)
1479                         log_info("Detected virtualization '%s'.", virtualization);
1480
1481                 if (in_initrd())
1482                         log_info("Running in initial RAM disk.");
1483
1484         } else {
1485                 _cleanup_free_ char *t = uid_to_name(getuid());
1486                 log_debug(PACKAGE_STRING " running in user mode for user "PID_FMT"/%s. (" SYSTEMD_FEATURES ")",
1487                           getuid(), t);
1488         }
1489
1490         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1491                 if (arg_show_status > 0 || plymouth_running())
1492                         status_welcome();
1493
1494 #ifdef HAVE_KMOD
1495                 if (detect_container(NULL) <= 0)
1496                         kmod_setup();
1497 #endif
1498                 hostname_setup();
1499                 machine_id_setup();
1500                 loopback_setup();
1501
1502                 test_mtab();
1503                 test_usr();
1504                 test_cgroups();
1505         }
1506
1507         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1508                 watchdog_set_timeout(&arg_runtime_watchdog);
1509
1510         if (arg_timer_slack_nsec != (nsec_t) -1)
1511                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1512                         log_error("Failed to adjust timer slack: %m");
1513
1514         if (arg_capability_bounding_set_drop) {
1515                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1516                 if (r < 0) {
1517                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1518                         goto finish;
1519                 }
1520                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1521                 if (r < 0) {
1522                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1523                         goto finish;
1524                 }
1525         }
1526
1527         if (arg_syscall_archs) {
1528                 r = enforce_syscall_archs(arg_syscall_archs);
1529                 if (r < 0)
1530                         goto finish;
1531         }
1532
1533         if (arg_running_as == SYSTEMD_USER) {
1534                 /* Become reaper of our children */
1535                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1536                         log_warning("Failed to make us a subreaper: %m");
1537                         if (errno == EINVAL)
1538                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1539                 }
1540         }
1541
1542         if (arg_running_as == SYSTEMD_SYSTEM)
1543                 bump_rlimit_nofile(&saved_rlimit_nofile);
1544
1545         r = manager_new(arg_running_as, &m);
1546         if (r < 0) {
1547                 log_error("Failed to allocate manager object: %s", strerror(-r));
1548                 goto finish;
1549         }
1550
1551         m->confirm_spawn = arg_confirm_spawn;
1552         m->default_std_output = arg_default_std_output;
1553         m->default_std_error = arg_default_std_error;
1554         m->default_restart_usec = arg_default_restart_usec;
1555         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1556         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1557         m->default_start_limit_interval = arg_default_start_limit_interval;
1558         m->default_start_limit_burst = arg_default_start_limit_burst;
1559         m->runtime_watchdog = arg_runtime_watchdog;
1560         m->shutdown_watchdog = arg_shutdown_watchdog;
1561         m->userspace_timestamp = userspace_timestamp;
1562         m->kernel_timestamp = kernel_timestamp;
1563         m->initrd_timestamp = initrd_timestamp;
1564         m->security_start_timestamp = security_start_timestamp;
1565         m->security_finish_timestamp = security_finish_timestamp;
1566
1567         manager_set_default_rlimits(m, arg_default_rlimit);
1568
1569         if (arg_default_environment)
1570                 manager_environment_add(m, NULL, arg_default_environment);
1571
1572         if (arg_show_status == SHOW_STATUS_UNSET)
1573                 arg_show_status = SHOW_STATUS_YES;
1574         manager_set_show_status(m, arg_show_status);
1575
1576         /* Remember whether we should queue the default job */
1577         queue_default_job = !arg_serialization || arg_switched_root;
1578
1579         before_startup = now(CLOCK_MONOTONIC);
1580
1581         r = manager_startup(m, arg_serialization, fds);
1582         if (r < 0)
1583                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1584
1585         /* This will close all file descriptors that were opened, but
1586          * not claimed by any unit. */
1587         fdset_free(fds);
1588         fds = NULL;
1589
1590         if (arg_serialization) {
1591                 fclose(arg_serialization);
1592                 arg_serialization = NULL;
1593         }
1594
1595         if (queue_default_job) {
1596                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1597                 Unit *target = NULL;
1598                 Job *default_unit_job;
1599
1600                 log_debug("Activating default unit: %s", arg_default_unit);
1601
1602                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1603                 if (r < 0)
1604                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1605                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1606                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1607                 else if (target->load_state == UNIT_MASKED)
1608                         log_error("Default target masked.");
1609
1610                 if (!target || target->load_state != UNIT_LOADED) {
1611                         log_info("Trying to load rescue target...");
1612
1613                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1614                         if (r < 0) {
1615                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1616                                 goto finish;
1617                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1618                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1619                                 goto finish;
1620                         } else if (target->load_state == UNIT_MASKED) {
1621                                 log_error("Rescue target masked.");
1622                                 goto finish;
1623                         }
1624                 }
1625
1626                 assert(target->load_state == UNIT_LOADED);
1627
1628                 if (arg_action == ACTION_TEST) {
1629                         printf("-> By units:\n");
1630                         manager_dump_units(m, stdout, "\t");
1631                 }
1632
1633                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1634                 if (r == -EPERM) {
1635                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1636
1637                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1638                         if (r < 0) {
1639                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1640                                 goto finish;
1641                         }
1642                 } else if (r < 0) {
1643                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1644                         goto finish;
1645                 }
1646
1647                 m->default_unit_job_id = default_unit_job->id;
1648
1649                 after_startup = now(CLOCK_MONOTONIC);
1650                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1651                          "Loaded units and determined initial transaction in %s.",
1652                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1653
1654                 if (arg_action == ACTION_TEST) {
1655                         printf("-> By jobs:\n");
1656                         manager_dump_jobs(m, stdout, "\t");
1657                         retval = EXIT_SUCCESS;
1658                         goto finish;
1659                 }
1660         }
1661
1662         for (;;) {
1663                 r = manager_loop(m);
1664                 if (r < 0) {
1665                         log_error("Failed to run mainloop: %s", strerror(-r));
1666                         goto finish;
1667                 }
1668
1669                 switch (m->exit_code) {
1670
1671                 case MANAGER_EXIT:
1672                         retval = EXIT_SUCCESS;
1673                         log_debug("Exit.");
1674                         goto finish;
1675
1676                 case MANAGER_RELOAD:
1677                         log_info("Reloading.");
1678                         r = manager_reload(m);
1679                         if (r < 0)
1680                                 log_error("Failed to reload: %s", strerror(-r));
1681                         break;
1682
1683                 case MANAGER_REEXECUTE:
1684
1685                         if (prepare_reexecute(m, &arg_serialization, &fds, false) < 0)
1686                                 goto finish;
1687
1688                         reexecute = true;
1689                         log_notice("Reexecuting.");
1690                         goto finish;
1691
1692                 case MANAGER_SWITCH_ROOT:
1693                         /* Steal the switch root parameters */
1694                         switch_root_dir = m->switch_root;
1695                         switch_root_init = m->switch_root_init;
1696                         m->switch_root = m->switch_root_init = NULL;
1697
1698                         if (!switch_root_init)
1699                                 if (prepare_reexecute(m, &arg_serialization, &fds, true) < 0)
1700                                         goto finish;
1701
1702                         reexecute = true;
1703                         log_notice("Switching root.");
1704                         goto finish;
1705
1706                 case MANAGER_REBOOT:
1707                 case MANAGER_POWEROFF:
1708                 case MANAGER_HALT:
1709                 case MANAGER_KEXEC: {
1710                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1711                                 [MANAGER_REBOOT] = "reboot",
1712                                 [MANAGER_POWEROFF] = "poweroff",
1713                                 [MANAGER_HALT] = "halt",
1714                                 [MANAGER_KEXEC] = "kexec"
1715                         };
1716
1717                         assert_se(shutdown_verb = table[m->exit_code]);
1718                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1719
1720                         log_notice("Shutting down.");
1721                         goto finish;
1722                 }
1723
1724                 default:
1725                         assert_not_reached("Unknown exit code.");
1726                 }
1727         }
1728
1729 finish:
1730         if (m) {
1731                 manager_free(m);
1732                 m = NULL;
1733         }
1734
1735         for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++) {
1736                 free(arg_default_rlimit[j]);
1737                 arg_default_rlimit[j] = NULL;
1738         }
1739
1740         free(arg_default_unit);
1741         arg_default_unit = NULL;
1742
1743         free_join_controllers();
1744
1745         strv_free(arg_default_environment);
1746         arg_default_environment = NULL;
1747
1748         set_free(arg_syscall_archs);
1749         arg_syscall_archs = NULL;
1750
1751         label_finish();
1752
1753         if (reexecute) {
1754                 const char **args;
1755                 unsigned i, args_size;
1756
1757                 /* Close and disarm the watchdog, so that the new
1758                  * instance can reinitialize it, but doesn't get
1759                  * rebooted while we do that */
1760                 watchdog_close(true);
1761
1762                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1763                  * that the new systemd can pass the kernel default to
1764                  * its child processes */
1765                 if (saved_rlimit_nofile.rlim_cur > 0)
1766                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1767
1768                 if (switch_root_dir) {
1769                         /* Kill all remaining processes from the
1770                          * initrd, but don't wait for them, so that we
1771                          * can handle the SIGCHLD for them after
1772                          * deserializing. */
1773                         broadcast_signal(SIGTERM, false, true);
1774
1775                         /* And switch root */
1776                         r = switch_root(switch_root_dir);
1777                         if (r < 0)
1778                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1779                 }
1780
1781                 args_size = MAX(6, argc+1);
1782                 args = newa(const char*, args_size);
1783
1784                 if (!switch_root_init) {
1785                         char sfd[16];
1786
1787                         /* First try to spawn ourselves with the right
1788                          * path, and with full serialization. We do
1789                          * this only if the user didn't specify an
1790                          * explicit init to spawn. */
1791
1792                         assert(arg_serialization);
1793                         assert(fds);
1794
1795                         snprintf(sfd, sizeof(sfd), "%i", fileno(arg_serialization));
1796                         char_array_0(sfd);
1797
1798                         i = 0;
1799                         args[i++] = SYSTEMD_BINARY_PATH;
1800                         if (switch_root_dir)
1801                                 args[i++] = "--switched-root";
1802                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1803                         args[i++] = "--deserialize";
1804                         args[i++] = sfd;
1805                         args[i++] = NULL;
1806
1807                         /* do not pass along the environment we inherit from the kernel or initrd */
1808                         if (switch_root_dir)
1809                                 clearenv();
1810
1811                         assert(i <= args_size);
1812                         execv(args[0], (char* const*) args);
1813                 }
1814
1815                 /* Try the fallback, if there is any, without any
1816                  * serialization. We pass the original argv[] and
1817                  * envp[]. (Well, modulo the ordering changes due to
1818                  * getopt() in argv[], and some cleanups in envp[],
1819                  * but let's hope that doesn't matter.) */
1820
1821                 if (arg_serialization) {
1822                         fclose(arg_serialization);
1823                         arg_serialization = NULL;
1824                 }
1825
1826                 if (fds) {
1827                         fdset_free(fds);
1828                         fds = NULL;
1829                 }
1830
1831                 /* Reopen the console */
1832                 make_console_stdio();
1833
1834                 for (j = 1, i = 1; j < (unsigned) argc; j++)
1835                         args[i++] = argv[j];
1836                 args[i++] = NULL;
1837                 assert(i <= args_size);
1838
1839                 if (switch_root_init) {
1840                         args[0] = switch_root_init;
1841                         execv(args[0], (char* const*) args);
1842                         log_warning("Failed to execute configured init, trying fallback: %m");
1843                 }
1844
1845                 args[0] = "/sbin/init";
1846                 execv(args[0], (char* const*) args);
1847
1848                 if (errno == ENOENT) {
1849                         log_warning("No /sbin/init, trying fallback");
1850
1851                         args[0] = "/bin/sh";
1852                         args[1] = NULL;
1853                         execv(args[0], (char* const*) args);
1854                         log_error("Failed to execute /bin/sh, giving up: %m");
1855                 } else
1856                         log_warning("Failed to execute /sbin/init, giving up: %m");
1857         }
1858
1859         if (arg_serialization) {
1860                 fclose(arg_serialization);
1861                 arg_serialization = NULL;
1862         }
1863
1864         if (fds) {
1865                 fdset_free(fds);
1866                 fds = NULL;
1867         }
1868
1869 #ifdef HAVE_VALGRIND_VALGRIND_H
1870         /* If we are PID 1 and running under valgrind, then let's exit
1871          * here explicitly. valgrind will only generate nice output on
1872          * exit(), not on exec(), hence let's do the former not the
1873          * latter here. */
1874         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1875                 return 0;
1876 #endif
1877
1878         if (shutdown_verb) {
1879                 const char * command_line[] = {
1880                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1881                         shutdown_verb,
1882                         NULL
1883                 };
1884                 _cleanup_strv_free_ char **env_block = NULL;
1885                 env_block = strv_copy(environ);
1886
1887                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1888                         char *e;
1889
1890                         /* If we reboot let's set the shutdown
1891                          * watchdog and tell the shutdown binary to
1892                          * repeatedly ping it */
1893                         watchdog_set_timeout(&arg_shutdown_watchdog);
1894                         watchdog_close(false);
1895
1896                         /* Tell the binary how often to ping, ignore failure */
1897                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1898                                 strv_push(&env_block, e);
1899                 } else
1900                         watchdog_close(true);
1901
1902                 /* Avoid the creation of new processes forked by the
1903                  * kernel; at this point, we will not listen to the
1904                  * signals anyway */
1905                 if (detect_container(NULL) <= 0)
1906                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1907
1908                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1909                 log_error("Failed to execute shutdown binary, freezing: %m");
1910         }
1911
1912         if (getpid() == 1)
1913                 freeze();
1914
1915         return retval;
1916 }