chiark / gitweb /
21c0d274c615117f3bf9ba6f3f7cb4d43c3881fc
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dbus/dbus.h>
23
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <getopt.h>
31 #include <signal.h>
32 #include <sys/wait.h>
33 #include <fcntl.h>
34 #include <sys/prctl.h>
35 #include <sys/mount.h>
36
37 #include "manager.h"
38 #include "log.h"
39 #include "load-fragment.h"
40 #include "fdset.h"
41 #include "special.h"
42 #include "conf-parser.h"
43 #include "bus-errors.h"
44 #include "missing.h"
45 #include "label.h"
46 #include "build.h"
47 #include "strv.h"
48 #include "def.h"
49 #include "virt.h"
50 #include "watchdog.h"
51 #include "path-util.h"
52 #include "switch-root.h"
53 #include "capability.h"
54 #include "killall.h"
55 #include "env-util.h"
56 #include "hwclock.h"
57 #include "sd-daemon.h"
58
59 #include "mount-setup.h"
60 #include "loopback-setup.h"
61 #ifdef HAVE_KMOD
62 #include "kmod-setup.h"
63 #endif
64 #include "hostname-setup.h"
65 #include "machine-id-setup.h"
66 #include "locale-setup.h"
67 #include "selinux-setup.h"
68 #include "ima-setup.h"
69 #include "fileio.h"
70 #include "smack-setup.h"
71
72 static enum {
73         ACTION_RUN,
74         ACTION_HELP,
75         ACTION_VERSION,
76         ACTION_TEST,
77         ACTION_DUMP_CONFIGURATION_ITEMS,
78         ACTION_DONE
79 } arg_action = ACTION_RUN;
80
81 static char *arg_default_unit = NULL;
82 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
83
84 static bool arg_dump_core = true;
85 static bool arg_crash_shell = false;
86 static int arg_crash_chvt = -1;
87 static bool arg_confirm_spawn = false;
88 static bool arg_show_status = true;
89 static bool arg_switched_root = false;
90 static char **arg_default_controllers = NULL;
91 static char ***arg_join_controllers = NULL;
92 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
93 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
94 static usec_t arg_runtime_watchdog = 0;
95 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
96 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
97 static uint64_t arg_capability_bounding_set_drop = 0;
98 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
99
100 static FILE* serialization = NULL;
101
102 static void nop_handler(int sig) {
103 }
104
105 _noreturn_ static void crash(int sig) {
106
107         if (!arg_dump_core)
108                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
109         else {
110                 struct sigaction sa;
111                 pid_t pid;
112
113                 /* We want to wait for the core process, hence let's enable SIGCHLD */
114                 zero(sa);
115                 sa.sa_handler = nop_handler;
116                 sa.sa_flags = SA_NOCLDSTOP|SA_RESTART;
117                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
118
119                 if ((pid = fork()) < 0)
120                         log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno));
121
122                 else if (pid == 0) {
123                         struct rlimit rl;
124
125                         /* Enable default signal handler for core dump */
126                         zero(sa);
127                         sa.sa_handler = SIG_DFL;
128                         assert_se(sigaction(sig, &sa, NULL) == 0);
129
130                         /* Don't limit the core dump size */
131                         zero(rl);
132                         rl.rlim_cur = RLIM_INFINITY;
133                         rl.rlim_max = RLIM_INFINITY;
134                         setrlimit(RLIMIT_CORE, &rl);
135
136                         /* Just to be sure... */
137                         assert_se(chdir("/") == 0);
138
139                         /* Raise the signal again */
140                         raise(sig);
141
142                         assert_not_reached("We shouldn't be here...");
143                         _exit(1);
144
145                 } else {
146                         siginfo_t status;
147                         int r;
148
149                         /* Order things nicely. */
150                         if ((r = wait_for_terminate(pid, &status)) < 0)
151                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
152                         else if (status.si_code != CLD_DUMPED)
153                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
154                         else
155                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
156                 }
157         }
158
159         if (arg_crash_chvt)
160                 chvt(arg_crash_chvt);
161
162         if (arg_crash_shell) {
163                 struct sigaction sa;
164                 pid_t pid;
165
166                 log_info("Executing crash shell in 10s...");
167                 sleep(10);
168
169                 /* Let the kernel reap children for us */
170                 zero(sa);
171                 sa.sa_handler = SIG_IGN;
172                 sa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART;
173                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
174
175                 pid = fork();
176                 if (pid < 0)
177                         log_error("Failed to fork off crash shell: %m");
178                 else if (pid == 0) {
179                         make_console_stdio();
180                         execl("/bin/sh", "/bin/sh", NULL);
181
182                         log_error("execl() failed: %m");
183                         _exit(1);
184                 }
185
186                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
187         }
188
189         log_info("Freezing execution.");
190         freeze();
191 }
192
193 static void install_crash_handler(void) {
194         struct sigaction sa;
195
196         zero(sa);
197
198         sa.sa_handler = crash;
199         sa.sa_flags = SA_NODEFER;
200
201         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
202 }
203
204 static int console_setup(bool do_reset) {
205         int tty_fd, r;
206
207         /* If we are init, we connect stdin/stdout/stderr to /dev/null
208          * and make sure we don't have a controlling tty. */
209
210         release_terminal();
211
212         if (!do_reset)
213                 return 0;
214
215         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
216         if (tty_fd < 0) {
217                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
218                 return -tty_fd;
219         }
220
221         /* We don't want to force text mode.
222          * plymouth may be showing pictures already from initrd. */
223         r = reset_terminal_fd(tty_fd, false);
224         if (r < 0)
225                 log_error("Failed to reset /dev/console: %s", strerror(-r));
226
227         close_nointr_nofail(tty_fd);
228         return r;
229 }
230
231 static int set_default_unit(const char *u) {
232         char *c;
233
234         assert(u);
235
236         c = strdup(u);
237         if (!c)
238                 return -ENOMEM;
239
240         free(arg_default_unit);
241         arg_default_unit = c;
242
243         return 0;
244 }
245
246 static int parse_proc_cmdline_word(const char *word) {
247
248         static const char * const rlmap[] = {
249                 "emergency", SPECIAL_EMERGENCY_TARGET,
250                 "-b",        SPECIAL_EMERGENCY_TARGET,
251                 "single",    SPECIAL_RESCUE_TARGET,
252                 "-s",        SPECIAL_RESCUE_TARGET,
253                 "s",         SPECIAL_RESCUE_TARGET,
254                 "S",         SPECIAL_RESCUE_TARGET,
255                 "1",         SPECIAL_RESCUE_TARGET,
256                 "2",         SPECIAL_RUNLEVEL2_TARGET,
257                 "3",         SPECIAL_RUNLEVEL3_TARGET,
258                 "4",         SPECIAL_RUNLEVEL4_TARGET,
259                 "5",         SPECIAL_RUNLEVEL5_TARGET,
260         };
261
262         assert(word);
263
264         if (startswith(word, "systemd.unit=")) {
265
266                 if (!in_initrd())
267                         return set_default_unit(word + 13);
268
269         } else if (startswith(word, "rd.systemd.unit=")) {
270
271                 if (in_initrd())
272                         return set_default_unit(word + 16);
273
274         } else if (startswith(word, "systemd.log_target=")) {
275
276                 if (log_set_target_from_string(word + 19) < 0)
277                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
278
279         } else if (startswith(word, "systemd.log_level=")) {
280
281                 if (log_set_max_level_from_string(word + 18) < 0)
282                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
283
284         } else if (startswith(word, "systemd.log_color=")) {
285
286                 if (log_show_color_from_string(word + 18) < 0)
287                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
288
289         } else if (startswith(word, "systemd.log_location=")) {
290
291                 if (log_show_location_from_string(word + 21) < 0)
292                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
293
294         } else if (startswith(word, "systemd.dump_core=")) {
295                 int r;
296
297                 if ((r = parse_boolean(word + 18)) < 0)
298                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
299                 else
300                         arg_dump_core = r;
301
302         } else if (startswith(word, "systemd.crash_shell=")) {
303                 int r;
304
305                 if ((r = parse_boolean(word + 20)) < 0)
306                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
307                 else
308                         arg_crash_shell = r;
309
310         } else if (startswith(word, "systemd.confirm_spawn=")) {
311                 int r;
312
313                 if ((r = parse_boolean(word + 22)) < 0)
314                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
315                 else
316                         arg_confirm_spawn = r;
317
318         } else if (startswith(word, "systemd.crash_chvt=")) {
319                 int k;
320
321                 if (safe_atoi(word + 19, &k) < 0)
322                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
323                 else
324                         arg_crash_chvt = k;
325
326         } else if (startswith(word, "systemd.show_status=")) {
327                 int r;
328
329                 if ((r = parse_boolean(word + 20)) < 0)
330                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
331                 else
332                         arg_show_status = r;
333         } else if (startswith(word, "systemd.default_standard_output=")) {
334                 int r;
335
336                 if ((r = exec_output_from_string(word + 32)) < 0)
337                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
338                 else
339                         arg_default_std_output = r;
340         } else if (startswith(word, "systemd.default_standard_error=")) {
341                 int r;
342
343                 if ((r = exec_output_from_string(word + 31)) < 0)
344                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
345                 else
346                         arg_default_std_error = r;
347         } else if (startswith(word, "systemd.setenv=")) {
348                 _cleanup_free_ char *cenv = NULL;
349                 char *eq;
350                 int r;
351
352                 cenv = strdup(word + 15);
353                 if (!cenv)
354                         return -ENOMEM;
355
356                 eq = strchr(cenv, '=');
357                 if (!eq) {
358                         if (!env_name_is_valid(cenv))
359                                 log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
360                         else  {
361                                 r = unsetenv(cenv);
362                                 if (r < 0)
363                                         log_warning("Unsetting environment variable '%s' failed, ignoring: %m", cenv);
364                         }
365                 } else {
366                         if (!env_assignment_is_valid(cenv))
367                                 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", cenv);
368                         else {
369                                 *eq = 0;
370                                 r = setenv(cenv, eq + 1, 1);
371                                 if (r < 0)
372                                         log_warning("Setting environment variable '%s=%s' failed, ignoring: %m", cenv, eq + 1);
373                         }
374                 }
375
376         } else if (startswith(word, "systemd.") ||
377                    (in_initrd() && startswith(word, "rd.systemd."))) {
378
379                 const char *c;
380
381                 /* Ignore systemd.journald.xyz and friends */
382                 c = word;
383                 if (startswith(c, "rd."))
384                         c += 3;
385                 if (startswith(c, "systemd."))
386                         c += 8;
387                 if (c[strcspn(c, ".=")] != '.')  {
388
389                         log_warning("Unknown kernel switch %s. Ignoring.", word);
390
391                         log_info("Supported kernel switches:\n"
392                                  "systemd.unit=UNIT                        Default unit to start\n"
393                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
394                                  "systemd.dump_core=0|1                    Dump core on crash\n"
395                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
396                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
397                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
398                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
399                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
400                                  "                                         Log target\n"
401                                  "systemd.log_level=LEVEL                  Log level\n"
402                                  "systemd.log_color=0|1                    Highlight important log messages\n"
403                                  "systemd.log_location=0|1                 Include code location in log messages\n"
404                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
405                                  "                                         Set default log output for services\n"
406                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
407                                  "                                         Set default log error output for services\n"
408                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
409                 }
410
411         } else if (streq(word, "quiet"))
412                 arg_show_status = false;
413         else if (!in_initrd()) {
414                 unsigned i;
415
416                 /* SysV compatibility */
417                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
418                         if (streq(word, rlmap[i]))
419                                 return set_default_unit(rlmap[i+1]);
420         }
421
422         return 0;
423 }
424
425 static int config_parse_level2(
426                 const char *filename,
427                 unsigned line,
428                 const char *section,
429                 const char *lvalue,
430                 int ltype,
431                 const char *rvalue,
432                 void *data,
433                 void *userdata) {
434
435         assert(filename);
436         assert(lvalue);
437         assert(rvalue);
438
439         log_set_max_level_from_string(rvalue);
440         return 0;
441 }
442
443 static int config_parse_target(
444                 const char *filename,
445                 unsigned line,
446                 const char *section,
447                 const char *lvalue,
448                 int ltype,
449                 const char *rvalue,
450                 void *data,
451                 void *userdata) {
452
453         assert(filename);
454         assert(lvalue);
455         assert(rvalue);
456
457         log_set_target_from_string(rvalue);
458         return 0;
459 }
460
461 static int config_parse_color(
462                 const char *filename,
463                 unsigned line,
464                 const char *section,
465                 const char *lvalue,
466                 int ltype,
467                 const char *rvalue,
468                 void *data,
469                 void *userdata) {
470
471         assert(filename);
472         assert(lvalue);
473         assert(rvalue);
474
475         log_show_color_from_string(rvalue);
476         return 0;
477 }
478
479 static int config_parse_location(
480                 const char *filename,
481                 unsigned line,
482                 const char *section,
483                 const char *lvalue,
484                 int ltype,
485                 const char *rvalue,
486                 void *data,
487                 void *userdata) {
488
489         assert(filename);
490         assert(lvalue);
491         assert(rvalue);
492
493         log_show_location_from_string(rvalue);
494         return 0;
495 }
496
497 static int config_parse_cpu_affinity2(
498                 const char *filename,
499                 unsigned line,
500                 const char *section,
501                 const char *lvalue,
502                 int ltype,
503                 const char *rvalue,
504                 void *data,
505                 void *userdata) {
506
507         char *w;
508         size_t l;
509         char *state;
510         cpu_set_t *c = NULL;
511         unsigned ncpus = 0;
512
513         assert(filename);
514         assert(lvalue);
515         assert(rvalue);
516
517         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
518                 char *t;
519                 int r;
520                 unsigned cpu;
521
522                 if (!(t = strndup(w, l)))
523                         return log_oom();
524
525                 r = safe_atou(t, &cpu);
526                 free(t);
527
528                 if (!c)
529                         if (!(c = cpu_set_malloc(&ncpus)))
530                                 return log_oom();
531
532                 if (r < 0 || cpu >= ncpus) {
533                         log_error("[%s:%u] Failed to parse CPU affinity: %s", filename, line, rvalue);
534                         CPU_FREE(c);
535                         return -EBADMSG;
536                 }
537
538                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
539         }
540
541         if (c) {
542                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
543                         log_warning("Failed to set CPU affinity: %m");
544
545                 CPU_FREE(c);
546         }
547
548         return 0;
549 }
550
551 static void strv_free_free(char ***l) {
552         char ***i;
553
554         if (!l)
555                 return;
556
557         for (i = l; *i; i++)
558                 strv_free(*i);
559
560         free(l);
561 }
562
563 static void free_join_controllers(void) {
564         if (!arg_join_controllers)
565                 return;
566
567         strv_free_free(arg_join_controllers);
568         arg_join_controllers = NULL;
569 }
570
571 static int config_parse_join_controllers(
572                 const char *filename,
573                 unsigned line,
574                 const char *section,
575                 const char *lvalue,
576                 int ltype,
577                 const char *rvalue,
578                 void *data,
579                 void *userdata) {
580
581         unsigned n = 0;
582         char *state, *w;
583         size_t length;
584
585         assert(filename);
586         assert(lvalue);
587         assert(rvalue);
588
589         free_join_controllers();
590
591         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
592                 char *s, **l;
593
594                 s = strndup(w, length);
595                 if (!s)
596                         return log_oom();
597
598                 l = strv_split(s, ",");
599                 free(s);
600
601                 strv_uniq(l);
602
603                 if (strv_length(l) <= 1) {
604                         strv_free(l);
605                         continue;
606                 }
607
608                 if (!arg_join_controllers) {
609                         arg_join_controllers = new(char**, 2);
610                         if (!arg_join_controllers) {
611                                 strv_free(l);
612                                 return log_oom();
613                         }
614
615                         arg_join_controllers[0] = l;
616                         arg_join_controllers[1] = NULL;
617
618                         n = 1;
619                 } else {
620                         char ***a;
621                         char ***t;
622
623                         t = new0(char**, n+2);
624                         if (!t) {
625                                 strv_free(l);
626                                 return log_oom();
627                         }
628
629                         n = 0;
630
631                         for (a = arg_join_controllers; *a; a++) {
632
633                                 if (strv_overlap(*a, l)) {
634                                         char **c;
635
636                                         c = strv_merge(*a, l);
637                                         if (!c) {
638                                                 strv_free(l);
639                                                 strv_free_free(t);
640                                                 return log_oom();
641                                         }
642
643                                         strv_free(l);
644                                         l = c;
645                                 } else {
646                                         char **c;
647
648                                         c = strv_copy(*a);
649                                         if (!c) {
650                                                 strv_free(l);
651                                                 strv_free_free(t);
652                                                 return log_oom();
653                                         }
654
655                                         t[n++] = c;
656                                 }
657                         }
658
659                         t[n++] = strv_uniq(l);
660
661                         strv_free_free(arg_join_controllers);
662                         arg_join_controllers = t;
663                 }
664         }
665
666         return 0;
667 }
668
669 static int parse_config_file(void) {
670
671         const ConfigTableItem items[] = {
672                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
673                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
674                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
675                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
676                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
677                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
678                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
679                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
680                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
681                 { "Manager", "DefaultControllers",    config_parse_strv,         0, &arg_default_controllers },
682                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
683                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
684                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
685                 { "Manager", "RuntimeWatchdogSec",    config_parse_usec,         0, &arg_runtime_watchdog    },
686                 { "Manager", "ShutdownWatchdogSec",   config_parse_usec,         0, &arg_shutdown_watchdog   },
687                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
688                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
689                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
690                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
691                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
692                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
693                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
694                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
695                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
696                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
697                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
698                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
699                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
700                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
701                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
702                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
703                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
704                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
705                 { NULL, NULL, NULL, 0, NULL }
706         };
707
708         FILE *f;
709         const char *fn;
710         int r;
711
712         fn = arg_running_as == SYSTEMD_SYSTEM ? SYSTEM_CONFIG_FILE : USER_CONFIG_FILE;
713         f = fopen(fn, "re");
714         if (!f) {
715                 if (errno == ENOENT)
716                         return 0;
717
718                 log_warning("Failed to open configuration file '%s': %m", fn);
719                 return 0;
720         }
721
722         r = config_parse(fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, NULL);
723         if (r < 0)
724                 log_warning("Failed to parse configuration file: %s", strerror(-r));
725
726         fclose(f);
727
728         return 0;
729 }
730
731 static int parse_proc_cmdline(void) {
732         char *line, *w, *state;
733         int r;
734         size_t l;
735
736         /* Don't read /proc/cmdline if we are in a container, since
737          * that is only relevant for the host system */
738         if (detect_container(NULL) > 0)
739                 return 0;
740
741         if ((r = read_one_line_file("/proc/cmdline", &line)) < 0) {
742                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
743                 return 0;
744         }
745
746         FOREACH_WORD_QUOTED(w, l, line, state) {
747                 char *word;
748
749                 if (!(word = strndup(w, l))) {
750                         r = -ENOMEM;
751                         goto finish;
752                 }
753
754                 r = parse_proc_cmdline_word(word);
755                 if (r < 0) {
756                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
757                         free(word);
758                         goto finish;
759                 }
760
761                 free(word);
762         }
763
764         r = 0;
765
766 finish:
767         free(line);
768         return r;
769 }
770
771 static int parse_argv(int argc, char *argv[]) {
772
773         enum {
774                 ARG_LOG_LEVEL = 0x100,
775                 ARG_LOG_TARGET,
776                 ARG_LOG_COLOR,
777                 ARG_LOG_LOCATION,
778                 ARG_UNIT,
779                 ARG_SYSTEM,
780                 ARG_USER,
781                 ARG_TEST,
782                 ARG_VERSION,
783                 ARG_DUMP_CONFIGURATION_ITEMS,
784                 ARG_DUMP_CORE,
785                 ARG_CRASH_SHELL,
786                 ARG_CONFIRM_SPAWN,
787                 ARG_SHOW_STATUS,
788                 ARG_DESERIALIZE,
789                 ARG_SWITCHED_ROOT,
790                 ARG_INTROSPECT,
791                 ARG_DEFAULT_STD_OUTPUT,
792                 ARG_DEFAULT_STD_ERROR
793         };
794
795         static const struct option options[] = {
796                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
797                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
798                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
799                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
800                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
801                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
802                 { "user",                     no_argument,       NULL, ARG_USER                     },
803                 { "test",                     no_argument,       NULL, ARG_TEST                     },
804                 { "help",                     no_argument,       NULL, 'h'                          },
805                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
806                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
807                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
808                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
809                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
810                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
811                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
812                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
813                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
814                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
815                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
816                 { NULL,                       0,                 NULL, 0                            }
817         };
818
819         int c, r;
820
821         assert(argc >= 1);
822         assert(argv);
823
824         if (getpid() == 1)
825                 opterr = 0;
826
827         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
828
829                 switch (c) {
830
831                 case ARG_LOG_LEVEL:
832                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
833                                 log_error("Failed to parse log level %s.", optarg);
834                                 return r;
835                         }
836
837                         break;
838
839                 case ARG_LOG_TARGET:
840
841                         if ((r = log_set_target_from_string(optarg)) < 0) {
842                                 log_error("Failed to parse log target %s.", optarg);
843                                 return r;
844                         }
845
846                         break;
847
848                 case ARG_LOG_COLOR:
849
850                         if (optarg) {
851                                 if ((r = log_show_color_from_string(optarg)) < 0) {
852                                         log_error("Failed to parse log color setting %s.", optarg);
853                                         return r;
854                                 }
855                         } else
856                                 log_show_color(true);
857
858                         break;
859
860                 case ARG_LOG_LOCATION:
861
862                         if (optarg) {
863                                 if ((r = log_show_location_from_string(optarg)) < 0) {
864                                         log_error("Failed to parse log location setting %s.", optarg);
865                                         return r;
866                                 }
867                         } else
868                                 log_show_location(true);
869
870                         break;
871
872                 case ARG_DEFAULT_STD_OUTPUT:
873
874                         if ((r = exec_output_from_string(optarg)) < 0) {
875                                 log_error("Failed to parse default standard output setting %s.", optarg);
876                                 return r;
877                         } else
878                                 arg_default_std_output = r;
879                         break;
880
881                 case ARG_DEFAULT_STD_ERROR:
882
883                         if ((r = exec_output_from_string(optarg)) < 0) {
884                                 log_error("Failed to parse default standard error output setting %s.", optarg);
885                                 return r;
886                         } else
887                                 arg_default_std_error = r;
888                         break;
889
890                 case ARG_UNIT:
891
892                         if ((r = set_default_unit(optarg)) < 0) {
893                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
894                                 return r;
895                         }
896
897                         break;
898
899                 case ARG_SYSTEM:
900                         arg_running_as = SYSTEMD_SYSTEM;
901                         break;
902
903                 case ARG_USER:
904                         arg_running_as = SYSTEMD_USER;
905                         break;
906
907                 case ARG_TEST:
908                         arg_action = ACTION_TEST;
909                         break;
910
911                 case ARG_VERSION:
912                         arg_action = ACTION_VERSION;
913                         break;
914
915                 case ARG_DUMP_CONFIGURATION_ITEMS:
916                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
917                         break;
918
919                 case ARG_DUMP_CORE:
920                         r = optarg ? parse_boolean(optarg) : 1;
921                         if (r < 0) {
922                                 log_error("Failed to parse dump core boolean %s.", optarg);
923                                 return r;
924                         }
925                         arg_dump_core = r;
926                         break;
927
928                 case ARG_CRASH_SHELL:
929                         r = optarg ? parse_boolean(optarg) : 1;
930                         if (r < 0) {
931                                 log_error("Failed to parse crash shell boolean %s.", optarg);
932                                 return r;
933                         }
934                         arg_crash_shell = r;
935                         break;
936
937                 case ARG_CONFIRM_SPAWN:
938                         r = optarg ? parse_boolean(optarg) : 1;
939                         if (r < 0) {
940                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
941                                 return r;
942                         }
943                         arg_confirm_spawn = r;
944                         break;
945
946                 case ARG_SHOW_STATUS:
947                         r = optarg ? parse_boolean(optarg) : 1;
948                         if (r < 0) {
949                                 log_error("Failed to parse show status boolean %s.", optarg);
950                                 return r;
951                         }
952                         arg_show_status = r;
953                         break;
954
955                 case ARG_DESERIALIZE: {
956                         int fd;
957                         FILE *f;
958
959                         r = safe_atoi(optarg, &fd);
960                         if (r < 0 || fd < 0) {
961                                 log_error("Failed to parse deserialize option %s.", optarg);
962                                 return r < 0 ? r : -EINVAL;
963                         }
964
965                         fd_cloexec(fd, true);
966
967                         f = fdopen(fd, "r");
968                         if (!f) {
969                                 log_error("Failed to open serialization fd: %m");
970                                 return -errno;
971                         }
972
973                         if (serialization)
974                                 fclose(serialization);
975
976                         serialization = f;
977
978                         break;
979                 }
980
981                 case ARG_SWITCHED_ROOT:
982                         arg_switched_root = true;
983                         break;
984
985                 case ARG_INTROSPECT: {
986                         const char * const * i = NULL;
987
988                         for (i = bus_interface_table; *i; i += 2)
989                                 if (!optarg || streq(i[0], optarg)) {
990                                         fputs(DBUS_INTROSPECT_1_0_XML_DOCTYPE_DECL_NODE
991                                               "<node>\n", stdout);
992                                         fputs(i[1], stdout);
993                                         fputs("</node>\n", stdout);
994
995                                         if (optarg)
996                                                 break;
997                                 }
998
999                         if (!i[0] && optarg)
1000                                 log_error("Unknown interface %s.", optarg);
1001
1002                         arg_action = ACTION_DONE;
1003                         break;
1004                 }
1005
1006                 case 'h':
1007                         arg_action = ACTION_HELP;
1008                         break;
1009
1010                 case 'D':
1011                         log_set_max_level(LOG_DEBUG);
1012                         break;
1013
1014                 case 'b':
1015                 case 's':
1016                 case 'z':
1017                         /* Just to eat away the sysvinit kernel
1018                          * cmdline args without getopt() error
1019                          * messages that we'll parse in
1020                          * parse_proc_cmdline_word() or ignore. */
1021
1022                 case '?':
1023                 default:
1024                         if (getpid() != 1) {
1025                                 log_error("Unknown option code %c", c);
1026                                 return -EINVAL;
1027                         }
1028
1029                         break;
1030                 }
1031
1032         if (optind < argc && getpid() != 1) {
1033                 /* Hmm, when we aren't run as init system
1034                  * let's complain about excess arguments */
1035
1036                 log_error("Excess arguments.");
1037                 return -EINVAL;
1038         }
1039
1040         if (detect_container(NULL) > 0) {
1041                 char **a;
1042
1043                 /* All /proc/cmdline arguments the kernel didn't
1044                  * understand it passed to us. We're not really
1045                  * interested in that usually since /proc/cmdline is
1046                  * more interesting and complete. With one exception:
1047                  * if we are run in a container /proc/cmdline is not
1048                  * relevant for the container, hence we rely on argv[]
1049                  * instead. */
1050
1051                 for (a = argv; a < argv + argc; a++)
1052                         if ((r = parse_proc_cmdline_word(*a)) < 0) {
1053                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
1054                                 return r;
1055                         }
1056         }
1057
1058         return 0;
1059 }
1060
1061 static int help(void) {
1062
1063         printf("%s [OPTIONS...]\n\n"
1064                "Starts up and maintains the system or user services.\n\n"
1065                "  -h --help                      Show this help\n"
1066                "     --test                      Determine startup sequence, dump it and exit\n"
1067                "     --dump-configuration-items  Dump understood unit configuration items\n"
1068                "     --introspect[=INTERFACE]    Extract D-Bus interface data\n"
1069                "     --unit=UNIT                 Set default unit\n"
1070                "     --system                    Run a system instance, even if PID != 1\n"
1071                "     --user                      Run a user instance\n"
1072                "     --dump-core[=0|1]           Dump core on crash\n"
1073                "     --crash-shell[=0|1]         Run shell on crash\n"
1074                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1075                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1076                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1077                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1078                "     --log-color[=0|1]           Highlight important log messages\n"
1079                "     --log-location[=0|1]        Include code location in log messages\n"
1080                "     --default-standard-output=  Set default standard output for services\n"
1081                "     --default-standard-error=   Set default standard error output for services\n",
1082                program_invocation_short_name);
1083
1084         return 0;
1085 }
1086
1087 static int version(void) {
1088         puts(PACKAGE_STRING);
1089         puts(SYSTEMD_FEATURES);
1090
1091         return 0;
1092 }
1093
1094 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool serialize_jobs) {
1095         FILE *f = NULL;
1096         FDSet *fds = NULL;
1097         int r;
1098
1099         assert(m);
1100         assert(_f);
1101         assert(_fds);
1102
1103         /* Make sure nothing is really destructed when we shut down */
1104         m->n_reloading ++;
1105
1106         r = manager_open_serialization(m, &f);
1107         if (r < 0) {
1108                 log_error("Failed to create serialization file: %s", strerror(-r));
1109                 goto fail;
1110         }
1111
1112         fds = fdset_new();
1113         if (!fds) {
1114                 r = -ENOMEM;
1115                 log_error("Failed to allocate fd set: %s", strerror(-r));
1116                 goto fail;
1117         }
1118
1119         r = manager_serialize(m, f, fds, serialize_jobs);
1120         if (r < 0) {
1121                 log_error("Failed to serialize state: %s", strerror(-r));
1122                 goto fail;
1123         }
1124
1125         if (fseeko(f, 0, SEEK_SET) < 0) {
1126                 log_error("Failed to rewind serialization fd: %m");
1127                 goto fail;
1128         }
1129
1130         r = fd_cloexec(fileno(f), false);
1131         if (r < 0) {
1132                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1133                 goto fail;
1134         }
1135
1136         r = fdset_cloexec(fds, false);
1137         if (r < 0) {
1138                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1139                 goto fail;
1140         }
1141
1142         *_f = f;
1143         *_fds = fds;
1144
1145         return 0;
1146
1147 fail:
1148         fdset_free(fds);
1149
1150         if (f)
1151                 fclose(f);
1152
1153         return r;
1154 }
1155
1156 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1157         struct rlimit nl;
1158         int r;
1159
1160         assert(saved_rlimit);
1161
1162         /* Save the original RLIMIT_NOFILE so that we can reset it
1163          * later when transitioning from the initrd to the main
1164          * systemd or suchlike. */
1165         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1166                 log_error("Reading RLIMIT_NOFILE failed: %m");
1167                 return -errno;
1168         }
1169
1170         /* Make sure forked processes get the default kernel setting */
1171         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1172                 struct rlimit *rl;
1173
1174                 rl = newdup(struct rlimit, saved_rlimit, 1);
1175                 if (!rl)
1176                         return log_oom();
1177
1178                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1179         }
1180
1181         /* Bump up the resource limit for ourselves substantially */
1182         nl.rlim_cur = nl.rlim_max = 64*1024;
1183         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1184         if (r < 0) {
1185                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1186                 return r;
1187         }
1188
1189         return 0;
1190 }
1191
1192 static struct dual_timestamp* parse_initrd_timestamp(struct dual_timestamp *t) {
1193         const char *e;
1194         unsigned long long a, b;
1195
1196         assert(t);
1197
1198         e = getenv("RD_TIMESTAMP");
1199         if (!e)
1200                 return NULL;
1201
1202         if (sscanf(e, "%llu %llu", &a, &b) != 2)
1203                 return NULL;
1204
1205         t->realtime = (usec_t) a;
1206         t->monotonic = (usec_t) b;
1207
1208         return t;
1209 }
1210
1211 static void test_mtab(void) {
1212         char *p;
1213
1214         /* Check that /etc/mtab is a symlink */
1215
1216         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1217                 bool b;
1218
1219                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1220                 free(p);
1221
1222                 if (b)
1223                         return;
1224         }
1225
1226         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1227                     "This is not supported anymore. "
1228                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1229 }
1230
1231 static void test_usr(void) {
1232
1233         /* Check that /usr is not a separate fs */
1234
1235         if (dir_is_empty("/usr") <= 0)
1236                 return;
1237
1238         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1239                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1240                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1241 }
1242
1243 static void test_cgroups(void) {
1244
1245         if (access("/proc/cgroups", F_OK) >= 0)
1246                 return;
1247
1248         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1249                     "Systems without control groups are not supported. "
1250                     "We will now sleep for 10s, and then continue boot-up. "
1251                     "Expect breakage and please do not file bugs. "
1252                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1253                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1254
1255         sleep(10);
1256 }
1257
1258 static int initialize_join_controllers(void) {
1259         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1260          * + "net_prio". We'd like to add "cpuset" to the mix, but
1261          * "cpuset" does't really work for groups with no initialized
1262          * attributes. */
1263
1264         arg_join_controllers = new(char**, 3);
1265         if (!arg_join_controllers)
1266                 return -ENOMEM;
1267
1268         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1269         if (!arg_join_controllers[0])
1270                 return -ENOMEM;
1271
1272         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1273         if (!arg_join_controllers[1])
1274                 return -ENOMEM;
1275
1276         arg_join_controllers[2] = NULL;
1277         return 0;
1278 }
1279
1280 int main(int argc, char *argv[]) {
1281         Manager *m = NULL;
1282         int r, retval = EXIT_FAILURE;
1283         usec_t before_startup, after_startup;
1284         char timespan[FORMAT_TIMESPAN_MAX];
1285         FDSet *fds = NULL;
1286         bool reexecute = false;
1287         const char *shutdown_verb = NULL;
1288         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1289         static char systemd[] = "systemd";
1290         bool skip_setup = false;
1291         int j;
1292         bool loaded_policy = false;
1293         bool arm_reboot_watchdog = false;
1294         bool queue_default_job = false;
1295         char *switch_root_dir = NULL, *switch_root_init = NULL;
1296         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1297
1298 #ifdef HAVE_SYSV_COMPAT
1299         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1300                 /* This is compatibility support for SysV, where
1301                  * calling init as a user is identical to telinit. */
1302
1303                 errno = -ENOENT;
1304                 execv(SYSTEMCTL_BINARY_PATH, argv);
1305                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1306                 return 1;
1307         }
1308 #endif
1309
1310         /* Determine if this is a reexecution or normal bootup. We do
1311          * the full command line parsing much later, so let's just
1312          * have a quick peek here. */
1313         for (j = 1; j < argc; j++)
1314                 if (streq(argv[j], "--deserialize")) {
1315                         skip_setup = true;
1316                         break;
1317                 }
1318
1319         /* If we have switched root, do all the special setup
1320          * things */
1321         for (j = 1; j < argc; j++)
1322                 if (streq(argv[j], "--switched-root")) {
1323                         skip_setup = false;
1324                         break;
1325                 }
1326
1327         /* If we get started via the /sbin/init symlink then we are
1328            called 'init'. After a subsequent reexecution we are then
1329            called 'systemd'. That is confusing, hence let's call us
1330            systemd right-away. */
1331         program_invocation_short_name = systemd;
1332         prctl(PR_SET_NAME, systemd);
1333
1334         saved_argv = argv;
1335         saved_argc = argc;
1336
1337         log_show_color(isatty(STDERR_FILENO) > 0);
1338
1339         if (getpid() == 1 && detect_container(NULL) <= 0) {
1340
1341                 /* Running outside of a container as PID 1 */
1342                 arg_running_as = SYSTEMD_SYSTEM;
1343                 make_null_stdio();
1344                 log_set_target(LOG_TARGET_KMSG);
1345                 log_open();
1346
1347                 if (in_initrd()) {
1348                         char *rd_timestamp = NULL;
1349
1350                         dual_timestamp_get(&initrd_timestamp);
1351                         asprintf(&rd_timestamp, "%llu %llu",
1352                                  (unsigned long long) initrd_timestamp.realtime,
1353                                  (unsigned long long) initrd_timestamp.monotonic);
1354                         if (rd_timestamp) {
1355                                 setenv("RD_TIMESTAMP", rd_timestamp, 1);
1356                                 free(rd_timestamp);
1357                         }
1358                 }
1359
1360                 if (!skip_setup) {
1361                         mount_setup_early();
1362                         if (selinux_setup(&loaded_policy) < 0)
1363                                 goto finish;
1364                         if (ima_setup() < 0)
1365                                 goto finish;
1366                         if (smack_setup() < 0)
1367                                 goto finish;
1368                 }
1369
1370                 if (label_init(NULL) < 0)
1371                         goto finish;
1372
1373                 if (!skip_setup) {
1374                         if (hwclock_is_localtime() > 0) {
1375                                 int min;
1376
1377                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1378                                 r = hwclock_set_timezone(&min);
1379                                 if (r < 0)
1380                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1381                                 else
1382                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1383                         } else if (!in_initrd()) {
1384                                 /*
1385                                  * Do dummy first-time call to seal the kernel's time warp magic
1386                                  *
1387                                  * Do not call this this from inside the initrd. The initrd might not
1388                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1389                                  * that way. In such case, we need to delay the time-warp or the sealing
1390                                  * until we reach the real system.
1391                                  */
1392                                 hwclock_reset_timezone();
1393
1394                                 /* Tell the kernel our time zone */
1395                                 r = hwclock_set_timezone(NULL);
1396                                 if (r < 0)
1397                                         log_error("Failed to set the kernel's time zone, ignoring: %s", strerror(-r));
1398                         }
1399                 }
1400
1401                 /* Set the default for later on, but don't actually
1402                  * open the logs like this for now. Note that if we
1403                  * are transitioning from the initrd there might still
1404                  * be journal fd open, and we shouldn't attempt
1405                  * opening that before we parsed /proc/cmdline which
1406                  * might redirect output elsewhere. */
1407                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1408
1409         } else if (getpid() == 1) {
1410
1411                 /* Running inside a container, as PID 1 */
1412                 arg_running_as = SYSTEMD_SYSTEM;
1413                 log_set_target(LOG_TARGET_CONSOLE);
1414                 log_open();
1415
1416                 /* For the later on, see above... */
1417                 log_set_target(LOG_TARGET_JOURNAL);
1418
1419         } else {
1420
1421                 /* Running as user instance */
1422                 arg_running_as = SYSTEMD_USER;
1423                 log_set_target(LOG_TARGET_AUTO);
1424                 log_open();
1425         }
1426
1427         /* Initialize default unit */
1428         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1429         if (r < 0) {
1430                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1431                 goto finish;
1432         }
1433
1434         r = initialize_join_controllers();
1435         if (r < 0)
1436                 goto finish;
1437
1438         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1439          * /proc/$PID/fd is available. */
1440         if (getpid() == 1) {
1441                 r = mount_setup(loaded_policy);
1442                 if (r < 0)
1443                         goto finish;
1444         }
1445
1446         /* Reset all signal handlers. */
1447         assert_se(reset_all_signal_handlers() == 0);
1448
1449         /* If we are init, we can block sigkill. Yay. */
1450         ignore_signals(SIGNALS_IGNORE, -1);
1451
1452         if (parse_config_file() < 0)
1453                 goto finish;
1454
1455         if (arg_running_as == SYSTEMD_SYSTEM)
1456                 if (parse_proc_cmdline() < 0)
1457                         goto finish;
1458
1459         log_parse_environment();
1460
1461         if (parse_argv(argc, argv) < 0)
1462                 goto finish;
1463
1464         if (arg_action == ACTION_TEST &&
1465             geteuid() == 0) {
1466                 log_error("Don't run test mode as root.");
1467                 goto finish;
1468         }
1469
1470         if (arg_running_as == SYSTEMD_USER &&
1471             arg_action == ACTION_RUN &&
1472             sd_booted() <= 0) {
1473                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1474                 goto finish;
1475         }
1476
1477         if (arg_running_as == SYSTEMD_SYSTEM &&
1478             arg_action == ACTION_RUN &&
1479             running_in_chroot() > 0) {
1480                 log_error("Cannot be run in a chroot() environment.");
1481                 goto finish;
1482         }
1483
1484         if (arg_action == ACTION_HELP) {
1485                 retval = help();
1486                 goto finish;
1487         } else if (arg_action == ACTION_VERSION) {
1488                 retval = version();
1489                 goto finish;
1490         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1491                 unit_dump_config_items(stdout);
1492                 retval = EXIT_SUCCESS;
1493                 goto finish;
1494         } else if (arg_action == ACTION_DONE) {
1495                 retval = EXIT_SUCCESS;
1496                 goto finish;
1497         }
1498
1499         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1500
1501         /* Close logging fds, in order not to confuse fdset below */
1502         log_close();
1503
1504         /* Remember open file descriptors for later deserialization */
1505         r = fdset_new_fill(&fds);
1506         if (r < 0) {
1507                 log_error("Failed to allocate fd set: %s", strerror(-r));
1508                 goto finish;
1509         } else
1510                 fdset_cloexec(fds, true);
1511
1512         if (serialization)
1513                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1514
1515         /* Set up PATH unless it is already set */
1516         setenv("PATH",
1517 #ifdef HAVE_SPLIT_USR
1518                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1519 #else
1520                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin",
1521 #endif
1522                arg_running_as == SYSTEMD_SYSTEM);
1523
1524         if (arg_running_as == SYSTEMD_SYSTEM) {
1525                 /* Parse the data passed to us. We leave this
1526                  * variables set, but the manager later on will not
1527                  * pass them on to our children. */
1528                 if (!in_initrd())
1529                         parse_initrd_timestamp(&initrd_timestamp);
1530
1531                 /* Unset some environment variables passed in from the
1532                  * kernel that don't really make sense for us. */
1533                 unsetenv("HOME");
1534                 unsetenv("TERM");
1535
1536                 /* When we are invoked by a shell, these might be set,
1537                  * but make little sense to pass on */
1538                 unsetenv("PWD");
1539                 unsetenv("SHLVL");
1540                 unsetenv("_");
1541
1542                 /* When we are invoked by a chroot-like tool such as
1543                  * nspawn, these might be set, but make little sense
1544                  * to pass on */
1545                 unsetenv("USER");
1546                 unsetenv("LOGNAME");
1547
1548                 /* We suppress the socket activation env vars, as
1549                  * we'll try to match *any* open fd to units if
1550                  * possible. */
1551                 unsetenv("LISTEN_FDS");
1552                 unsetenv("LISTEN_PID");
1553
1554                 /* All other variables are left as is, so that clients
1555                  * can still read them via /proc/1/environ */
1556         }
1557
1558         /* Move out of the way, so that we won't block unmounts */
1559         assert_se(chdir("/")  == 0);
1560
1561         if (arg_running_as == SYSTEMD_SYSTEM) {
1562                 /* Become a session leader if we aren't one yet. */
1563                 setsid();
1564
1565                 /* Disable the umask logic */
1566                 umask(0);
1567         }
1568
1569         /* Make sure D-Bus doesn't fiddle with the SIGPIPE handlers */
1570         dbus_connection_set_change_sigpipe(FALSE);
1571
1572         /* Reset the console, but only if this is really init and we
1573          * are freshly booted */
1574         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1575                 console_setup(getpid() == 1 && !skip_setup);
1576
1577         /* Open the logging devices, if possible and necessary */
1578         log_open();
1579
1580         /* Make sure we leave a core dump without panicing the
1581          * kernel. */
1582         if (getpid() == 1)
1583                 install_crash_handler();
1584
1585         if (getpid() == 1) {
1586                 r = mount_cgroup_controllers(arg_join_controllers);
1587                 if (r < 0)
1588                         goto finish;
1589         }
1590
1591         if (arg_running_as == SYSTEMD_SYSTEM) {
1592                 const char *virtualization = NULL;
1593
1594                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1595
1596                 detect_virtualization(&virtualization);
1597                 if (virtualization)
1598                         log_info("Detected virtualization '%s'.", virtualization);
1599
1600                 if (in_initrd())
1601                         log_info("Running in initial RAM disk.");
1602
1603         } else
1604                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1605
1606         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1607                 locale_setup();
1608
1609                 if (arg_show_status || plymouth_running())
1610                         status_welcome();
1611
1612 #ifdef HAVE_KMOD
1613                 kmod_setup();
1614 #endif
1615                 hostname_setup();
1616                 machine_id_setup();
1617                 loopback_setup();
1618
1619                 test_mtab();
1620                 test_usr();
1621                 test_cgroups();
1622         }
1623
1624         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1625                 watchdog_set_timeout(&arg_runtime_watchdog);
1626
1627         if (arg_timer_slack_nsec != (nsec_t) -1)
1628                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1629                         log_error("Failed to adjust timer slack: %m");
1630
1631         if (arg_capability_bounding_set_drop) {
1632                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1633                 if (r < 0) {
1634                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1635                         goto finish;
1636                 }
1637                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1638                 if (r < 0) {
1639                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1640                         goto finish;
1641                 }
1642         }
1643
1644         if (arg_running_as == SYSTEMD_USER) {
1645                 /* Become reaper of our children */
1646                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1647                         log_warning("Failed to make us a subreaper: %m");
1648                         if (errno == EINVAL)
1649                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1650                 }
1651         }
1652
1653         if (arg_running_as == SYSTEMD_SYSTEM)
1654                 bump_rlimit_nofile(&saved_rlimit_nofile);
1655
1656         r = manager_new(arg_running_as, &m);
1657         if (r < 0) {
1658                 log_error("Failed to allocate manager object: %s", strerror(-r));
1659                 goto finish;
1660         }
1661
1662         m->confirm_spawn = arg_confirm_spawn;
1663         m->default_std_output = arg_default_std_output;
1664         m->default_std_error = arg_default_std_error;
1665         m->runtime_watchdog = arg_runtime_watchdog;
1666         m->shutdown_watchdog = arg_shutdown_watchdog;
1667
1668         manager_set_default_rlimits(m, arg_default_rlimit);
1669
1670         if (dual_timestamp_is_set(&initrd_timestamp))
1671                 m->initrd_timestamp = initrd_timestamp;
1672
1673         if (arg_default_controllers)
1674                 manager_set_default_controllers(m, arg_default_controllers);
1675
1676         manager_set_show_status(m, arg_show_status);
1677
1678         /* Remember whether we should queue the default job */
1679         queue_default_job = !serialization || arg_switched_root;
1680
1681         before_startup = now(CLOCK_MONOTONIC);
1682
1683         r = manager_startup(m, serialization, fds);
1684         if (r < 0)
1685                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1686
1687         /* This will close all file descriptors that were opened, but
1688          * not claimed by any unit. */
1689         fdset_free(fds);
1690
1691         if (serialization) {
1692                 fclose(serialization);
1693                 serialization = NULL;
1694         }
1695
1696         if (queue_default_job) {
1697                 DBusError error;
1698                 Unit *target = NULL;
1699                 Job *default_unit_job;
1700
1701                 dbus_error_init(&error);
1702
1703                 log_debug("Activating default unit: %s", arg_default_unit);
1704
1705                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1706                 if (r < 0) {
1707                         log_error("Failed to load default target: %s", bus_error(&error, r));
1708                         dbus_error_free(&error);
1709                 } else if (target->load_state == UNIT_ERROR)
1710                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1711                 else if (target->load_state == UNIT_MASKED)
1712                         log_error("Default target masked.");
1713
1714                 if (!target || target->load_state != UNIT_LOADED) {
1715                         log_info("Trying to load rescue target...");
1716
1717                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1718                         if (r < 0) {
1719                                 log_error("Failed to load rescue target: %s", bus_error(&error, r));
1720                                 dbus_error_free(&error);
1721                                 goto finish;
1722                         } else if (target->load_state == UNIT_ERROR) {
1723                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1724                                 goto finish;
1725                         } else if (target->load_state == UNIT_MASKED) {
1726                                 log_error("Rescue target masked.");
1727                                 goto finish;
1728                         }
1729                 }
1730
1731                 assert(target->load_state == UNIT_LOADED);
1732
1733                 if (arg_action == ACTION_TEST) {
1734                         printf("-> By units:\n");
1735                         manager_dump_units(m, stdout, "\t");
1736                 }
1737
1738                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1739                 if (r == -EPERM) {
1740                         log_error("Default target could not be isolated, starting instead: %s", bus_error(&error, r));
1741                         dbus_error_free(&error);
1742
1743                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1744                         if (r < 0) {
1745                                 log_error("Failed to start default target: %s", bus_error(&error, r));
1746                                 dbus_error_free(&error);
1747                                 goto finish;
1748                         }
1749                 } else if (r < 0) {
1750                         log_error("Failed to isolate default target: %s", bus_error(&error, r));
1751                         dbus_error_free(&error);
1752                         goto finish;
1753                 }
1754
1755                 m->default_unit_job_id = default_unit_job->id;
1756
1757                 after_startup = now(CLOCK_MONOTONIC);
1758                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1759                          "Loaded units and determined initial transaction in %s.",
1760                           format_timespan(timespan, sizeof(timespan), after_startup - before_startup));
1761
1762                 if (arg_action == ACTION_TEST) {
1763                         printf("-> By jobs:\n");
1764                         manager_dump_jobs(m, stdout, "\t");
1765                         retval = EXIT_SUCCESS;
1766                         goto finish;
1767                 }
1768         }
1769
1770         for (;;) {
1771                 r = manager_loop(m);
1772                 if (r < 0) {
1773                         log_error("Failed to run mainloop: %s", strerror(-r));
1774                         goto finish;
1775                 }
1776
1777                 switch (m->exit_code) {
1778
1779                 case MANAGER_EXIT:
1780                         retval = EXIT_SUCCESS;
1781                         log_debug("Exit.");
1782                         goto finish;
1783
1784                 case MANAGER_RELOAD:
1785                         log_info("Reloading.");
1786                         r = manager_reload(m);
1787                         if (r < 0)
1788                                 log_error("Failed to reload: %s", strerror(-r));
1789                         break;
1790
1791                 case MANAGER_REEXECUTE:
1792
1793                         if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1794                                 goto finish;
1795
1796                         reexecute = true;
1797                         log_notice("Reexecuting.");
1798                         goto finish;
1799
1800                 case MANAGER_SWITCH_ROOT:
1801                         /* Steal the switch root parameters */
1802                         switch_root_dir = m->switch_root;
1803                         switch_root_init = m->switch_root_init;
1804                         m->switch_root = m->switch_root_init = NULL;
1805
1806                         if (!switch_root_init)
1807                                 if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1808                                         goto finish;
1809
1810                         reexecute = true;
1811                         log_notice("Switching root.");
1812                         goto finish;
1813
1814                 case MANAGER_REBOOT:
1815                 case MANAGER_POWEROFF:
1816                 case MANAGER_HALT:
1817                 case MANAGER_KEXEC: {
1818                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1819                                 [MANAGER_REBOOT] = "reboot",
1820                                 [MANAGER_POWEROFF] = "poweroff",
1821                                 [MANAGER_HALT] = "halt",
1822                                 [MANAGER_KEXEC] = "kexec"
1823                         };
1824
1825                         assert_se(shutdown_verb = table[m->exit_code]);
1826                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1827
1828                         log_notice("Shutting down.");
1829                         goto finish;
1830                 }
1831
1832                 default:
1833                         assert_not_reached("Unknown exit code.");
1834                 }
1835         }
1836
1837 finish:
1838         if (m)
1839                 manager_free(m);
1840
1841         for (j = 0; j < RLIMIT_NLIMITS; j++)
1842                 free(arg_default_rlimit[j]);
1843
1844         free(arg_default_unit);
1845         strv_free(arg_default_controllers);
1846         free_join_controllers();
1847
1848         dbus_shutdown();
1849         label_finish();
1850
1851         if (reexecute) {
1852                 const char **args;
1853                 unsigned i, args_size;
1854
1855                 /* Close and disarm the watchdog, so that the new
1856                  * instance can reinitialize it, but doesn't get
1857                  * rebooted while we do that */
1858                 watchdog_close(true);
1859
1860                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1861                  * that the new systemd can pass the kernel default to
1862                  * its child processes */
1863                 if (saved_rlimit_nofile.rlim_cur > 0)
1864                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1865
1866                 if (switch_root_dir) {
1867                         /* Kill all remaining processes from the
1868                          * initrd, but don't wait for them, so that we
1869                          * can handle the SIGCHLD for them after
1870                          * deserializing. */
1871                         broadcast_signal(SIGTERM, false);
1872
1873                         /* And switch root */
1874                         r = switch_root(switch_root_dir);
1875                         if (r < 0)
1876                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1877                 }
1878
1879                 args_size = MAX(6, argc+1);
1880                 args = newa(const char*, args_size);
1881
1882                 if (!switch_root_init) {
1883                         char sfd[16];
1884
1885                         /* First try to spawn ourselves with the right
1886                          * path, and with full serialization. We do
1887                          * this only if the user didn't specify an
1888                          * explicit init to spawn. */
1889
1890                         assert(serialization);
1891                         assert(fds);
1892
1893                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1894                         char_array_0(sfd);
1895
1896                         i = 0;
1897                         args[i++] = SYSTEMD_BINARY_PATH;
1898                         if (switch_root_dir)
1899                                 args[i++] = "--switched-root";
1900                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1901                         args[i++] = "--deserialize";
1902                         args[i++] = sfd;
1903                         args[i++] = NULL;
1904
1905                         assert(i <= args_size);
1906                         execv(args[0], (char* const*) args);
1907                 }
1908
1909                 /* Try the fallback, if there is any, without any
1910                  * serialization. We pass the original argv[] and
1911                  * envp[]. (Well, modulo the ordering changes due to
1912                  * getopt() in argv[], and some cleanups in envp[],
1913                  * but let's hope that doesn't matter.) */
1914
1915                 if (serialization) {
1916                         fclose(serialization);
1917                         serialization = NULL;
1918                 }
1919
1920                 if (fds) {
1921                         fdset_free(fds);
1922                         fds = NULL;
1923                 }
1924
1925                 /* Reopen the console */
1926                 make_console_stdio();
1927
1928                 for (j = 1, i = 1; j < argc; j++)
1929                         args[i++] = argv[j];
1930                 args[i++] = NULL;
1931                 assert(i <= args_size);
1932
1933                 if (switch_root_init) {
1934                         args[0] = switch_root_init;
1935                         execv(args[0], (char* const*) args);
1936                         log_warning("Failed to execute configured init, trying fallback: %m");
1937                 }
1938
1939                 args[0] = "/sbin/init";
1940                 execv(args[0], (char* const*) args);
1941
1942                 if (errno == ENOENT) {
1943                         log_warning("No /sbin/init, trying fallback");
1944
1945                         args[0] = "/bin/sh";
1946                         args[1] = NULL;
1947                         execv(args[0], (char* const*) args);
1948                         log_error("Failed to execute /bin/sh, giving up: %m");
1949                 } else
1950                         log_warning("Failed to execute /sbin/init, giving up: %m");
1951         }
1952
1953         if (serialization)
1954                 fclose(serialization);
1955
1956         if (fds)
1957                 fdset_free(fds);
1958
1959         if (shutdown_verb) {
1960                 const char * command_line[] = {
1961                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1962                         shutdown_verb,
1963                         NULL
1964                 };
1965                 char **env_block;
1966
1967                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1968                         char e[32];
1969
1970                         /* If we reboot let's set the shutdown
1971                          * watchdog and tell the shutdown binary to
1972                          * repeatedly ping it */
1973                         watchdog_set_timeout(&arg_shutdown_watchdog);
1974                         watchdog_close(false);
1975
1976                         /* Tell the binary how often to ping */
1977                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1978                         char_array_0(e);
1979
1980                         env_block = strv_append(environ, e);
1981                 } else {
1982                         env_block = strv_copy(environ);
1983                         watchdog_close(true);
1984                 }
1985
1986                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1987                 free(env_block);
1988                 log_error("Failed to execute shutdown binary, freezing: %m");
1989         }
1990
1991         if (getpid() == 1)
1992                 freeze();
1993
1994         return retval;
1995 }