chiark / gitweb /
util: unify reading of /proc/cmdline
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dbus/dbus.h>
23
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <getopt.h>
31 #include <signal.h>
32 #include <sys/wait.h>
33 #include <fcntl.h>
34 #include <sys/prctl.h>
35 #include <sys/mount.h>
36
37 #include "manager.h"
38 #include "log.h"
39 #include "load-fragment.h"
40 #include "fdset.h"
41 #include "special.h"
42 #include "conf-parser.h"
43 #include "dbus-common.h"
44 #include "missing.h"
45 #include "label.h"
46 #include "build.h"
47 #include "strv.h"
48 #include "def.h"
49 #include "virt.h"
50 #include "watchdog.h"
51 #include "path-util.h"
52 #include "switch-root.h"
53 #include "capability.h"
54 #include "killall.h"
55 #include "env-util.h"
56 #include "hwclock.h"
57 #include "sd-daemon.h"
58 #include "sd-messages.h"
59
60 #include "mount-setup.h"
61 #include "loopback-setup.h"
62 #ifdef HAVE_KMOD
63 #include "kmod-setup.h"
64 #endif
65 #include "hostname-setup.h"
66 #include "machine-id-setup.h"
67 #include "selinux-setup.h"
68 #include "ima-setup.h"
69 #include "fileio.h"
70 #include "smack-setup.h"
71
72 static enum {
73         ACTION_RUN,
74         ACTION_HELP,
75         ACTION_VERSION,
76         ACTION_TEST,
77         ACTION_DUMP_CONFIGURATION_ITEMS,
78         ACTION_DONE
79 } arg_action = ACTION_RUN;
80
81 static char *arg_default_unit = NULL;
82 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
83
84 static bool arg_dump_core = true;
85 static bool arg_crash_shell = false;
86 static int arg_crash_chvt = -1;
87 static bool arg_confirm_spawn = false;
88 static bool arg_show_status = true;
89 static bool arg_switched_root = false;
90 static char ***arg_join_controllers = NULL;
91 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
92 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
93 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
94 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
95 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
96 static usec_t arg_runtime_watchdog = 0;
97 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
98 static char **arg_default_environment = NULL;
99 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
100 static uint64_t arg_capability_bounding_set_drop = 0;
101 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
102
103 static FILE* serialization = NULL;
104
105 static void nop_handler(int sig) {
106 }
107
108 _noreturn_ static void crash(int sig) {
109
110         if (getpid() != 1)
111                 /* Pass this on immediately, if this is not PID 1 */
112                 raise(sig);
113         else if (!arg_dump_core)
114                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
115         else {
116                 struct sigaction sa = {
117                         .sa_handler = nop_handler,
118                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
119                 };
120                 pid_t pid;
121
122                 /* We want to wait for the core process, hence let's enable SIGCHLD */
123                 sigaction(SIGCHLD, &sa, NULL);
124
125                 pid = fork();
126                 if (pid < 0)
127                         log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno));
128
129                 else if (pid == 0) {
130                         struct rlimit rl = {};
131
132                         /* Enable default signal handler for core dump */
133                         zero(sa);
134                         sa.sa_handler = SIG_DFL;
135                         sigaction(sig, &sa, NULL);
136
137                         /* Don't limit the core dump size */
138                         rl.rlim_cur = RLIM_INFINITY;
139                         rl.rlim_max = RLIM_INFINITY;
140                         setrlimit(RLIMIT_CORE, &rl);
141
142                         /* Just to be sure... */
143                         chdir("/");
144
145                         /* Raise the signal again */
146                         raise(sig);
147
148                         assert_not_reached("We shouldn't be here...");
149                         _exit(1);
150
151                 } else {
152                         siginfo_t status;
153                         int r;
154
155                         /* Order things nicely. */
156                         r = wait_for_terminate(pid, &status);
157                         if (r < 0)
158                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
159                         else if (status.si_code != CLD_DUMPED)
160                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
161                         else
162                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
163                 }
164         }
165
166         if (arg_crash_chvt)
167                 chvt(arg_crash_chvt);
168
169         if (arg_crash_shell) {
170                 struct sigaction sa = {
171                         .sa_handler = SIG_IGN,
172                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
173                 };
174                 pid_t pid;
175
176                 log_info("Executing crash shell in 10s...");
177                 sleep(10);
178
179                 /* Let the kernel reap children for us */
180                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
181
182                 pid = fork();
183                 if (pid < 0)
184                         log_error("Failed to fork off crash shell: %m");
185                 else if (pid == 0) {
186                         make_console_stdio();
187                         execl("/bin/sh", "/bin/sh", NULL);
188
189                         log_error("execl() failed: %m");
190                         _exit(1);
191                 }
192
193                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
194         }
195
196         log_info("Freezing execution.");
197         freeze();
198 }
199
200 static void install_crash_handler(void) {
201         struct sigaction sa = {
202                 .sa_handler = crash,
203                 .sa_flags = SA_NODEFER,
204         };
205
206         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
207 }
208
209 static int console_setup(bool do_reset) {
210         int tty_fd, r;
211
212         /* If we are init, we connect stdin/stdout/stderr to /dev/null
213          * and make sure we don't have a controlling tty. */
214
215         release_terminal();
216
217         if (!do_reset)
218                 return 0;
219
220         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
221         if (tty_fd < 0) {
222                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
223                 return -tty_fd;
224         }
225
226         /* We don't want to force text mode.
227          * plymouth may be showing pictures already from initrd. */
228         r = reset_terminal_fd(tty_fd, false);
229         if (r < 0)
230                 log_error("Failed to reset /dev/console: %s", strerror(-r));
231
232         close_nointr_nofail(tty_fd);
233         return r;
234 }
235
236 static int set_default_unit(const char *u) {
237         char *c;
238
239         assert(u);
240
241         c = strdup(u);
242         if (!c)
243                 return -ENOMEM;
244
245         free(arg_default_unit);
246         arg_default_unit = c;
247
248         return 0;
249 }
250
251 static int parse_proc_cmdline_word(const char *word) {
252
253         static const char * const rlmap[] = {
254                 "emergency", SPECIAL_EMERGENCY_TARGET,
255                 "-b",        SPECIAL_EMERGENCY_TARGET,
256                 "single",    SPECIAL_RESCUE_TARGET,
257                 "-s",        SPECIAL_RESCUE_TARGET,
258                 "s",         SPECIAL_RESCUE_TARGET,
259                 "S",         SPECIAL_RESCUE_TARGET,
260                 "1",         SPECIAL_RESCUE_TARGET,
261                 "2",         SPECIAL_RUNLEVEL2_TARGET,
262                 "3",         SPECIAL_RUNLEVEL3_TARGET,
263                 "4",         SPECIAL_RUNLEVEL4_TARGET,
264                 "5",         SPECIAL_RUNLEVEL5_TARGET,
265         };
266
267         assert(word);
268
269         if (startswith(word, "systemd.unit=")) {
270
271                 if (!in_initrd())
272                         return set_default_unit(word + 13);
273
274         } else if (startswith(word, "rd.systemd.unit=")) {
275
276                 if (in_initrd())
277                         return set_default_unit(word + 16);
278
279         } else if (startswith(word, "systemd.log_target=")) {
280
281                 if (log_set_target_from_string(word + 19) < 0)
282                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
283
284         } else if (startswith(word, "systemd.log_level=")) {
285
286                 if (log_set_max_level_from_string(word + 18) < 0)
287                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
288
289         } else if (startswith(word, "systemd.log_color=")) {
290
291                 if (log_show_color_from_string(word + 18) < 0)
292                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
293
294         } else if (startswith(word, "systemd.log_location=")) {
295
296                 if (log_show_location_from_string(word + 21) < 0)
297                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
298
299         } else if (startswith(word, "systemd.dump_core=")) {
300                 int r;
301
302                 if ((r = parse_boolean(word + 18)) < 0)
303                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
304                 else
305                         arg_dump_core = r;
306
307         } else if (startswith(word, "systemd.crash_shell=")) {
308                 int r;
309
310                 if ((r = parse_boolean(word + 20)) < 0)
311                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
312                 else
313                         arg_crash_shell = r;
314
315         } else if (startswith(word, "systemd.confirm_spawn=")) {
316                 int r;
317
318                 if ((r = parse_boolean(word + 22)) < 0)
319                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
320                 else
321                         arg_confirm_spawn = r;
322
323         } else if (startswith(word, "systemd.crash_chvt=")) {
324                 int k;
325
326                 if (safe_atoi(word + 19, &k) < 0)
327                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
328                 else
329                         arg_crash_chvt = k;
330
331         } else if (startswith(word, "systemd.show_status=")) {
332                 int r;
333
334                 if ((r = parse_boolean(word + 20)) < 0)
335                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
336                 else
337                         arg_show_status = r;
338         } else if (startswith(word, "systemd.default_standard_output=")) {
339                 int r;
340
341                 if ((r = exec_output_from_string(word + 32)) < 0)
342                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
343                 else
344                         arg_default_std_output = r;
345         } else if (startswith(word, "systemd.default_standard_error=")) {
346                 int r;
347
348                 if ((r = exec_output_from_string(word + 31)) < 0)
349                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
350                 else
351                         arg_default_std_error = r;
352         } else if (startswith(word, "systemd.setenv=")) {
353                 _cleanup_free_ char *cenv = NULL;
354
355                 cenv = strdup(word + 15);
356                 if (!cenv)
357                         return -ENOMEM;
358
359                 if (env_assignment_is_valid(cenv)) {
360                         char **env;
361
362                         env = strv_env_set(arg_default_environment, cenv);
363                         if (env)
364                                 arg_default_environment = env;
365                         else
366                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
367                 } else
368                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
369
370         } else if (startswith(word, "systemd.") ||
371                    (in_initrd() && startswith(word, "rd.systemd."))) {
372
373                 const char *c;
374
375                 /* Ignore systemd.journald.xyz and friends */
376                 c = word;
377                 if (startswith(c, "rd."))
378                         c += 3;
379                 if (startswith(c, "systemd."))
380                         c += 8;
381                 if (c[strcspn(c, ".=")] != '.')  {
382
383                         log_warning("Unknown kernel switch %s. Ignoring.", word);
384
385                         log_info("Supported kernel switches:\n"
386                                  "systemd.unit=UNIT                        Default unit to start\n"
387                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
388                                  "systemd.dump_core=0|1                    Dump core on crash\n"
389                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
390                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
391                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
392                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
393                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
394                                  "                                         Log target\n"
395                                  "systemd.log_level=LEVEL                  Log level\n"
396                                  "systemd.log_color=0|1                    Highlight important log messages\n"
397                                  "systemd.log_location=0|1                 Include code location in log messages\n"
398                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
399                                  "                                         Set default log output for services\n"
400                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
401                                  "                                         Set default log error output for services\n"
402                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
403                 }
404
405         } else if (streq(word, "quiet"))
406                 arg_show_status = false;
407         else if (streq(word, "debug")) {
408                 /* Log to kmsg, the journal socket will fill up before the
409                  * journal is started and tools running during that time
410                  * will block with every log message for for 60 seconds,
411                  * before they give up. */
412                 log_set_max_level(LOG_DEBUG);
413                 log_set_target(LOG_TARGET_KMSG);
414         } else if (!in_initrd()) {
415                 unsigned i;
416
417                 /* SysV compatibility */
418                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
419                         if (streq(word, rlmap[i]))
420                                 return set_default_unit(rlmap[i+1]);
421         }
422
423         return 0;
424 }
425
426 #define DEFINE_SETTER(name, func, descr)                              \
427         static int name(const char *unit,                             \
428                         const char *filename,                         \
429                         unsigned line,                                \
430                         const char *section,                          \
431                         const char *lvalue,                           \
432                         int ltype,                                    \
433                         const char *rvalue,                           \
434                         void *data,                                   \
435                         void *userdata) {                             \
436                                                                       \
437                 int r;                                                \
438                                                                       \
439                 assert(filename);                                     \
440                 assert(lvalue);                                       \
441                 assert(rvalue);                                       \
442                                                                       \
443                 r = func(rvalue);                                     \
444                 if (r < 0)                                            \
445                         log_syntax(unit, LOG_ERR, filename, line, -r, \
446                                    "Invalid " descr "'%s': %s",       \
447                                    rvalue, strerror(-r));             \
448                                                                       \
449                 return 0;                                             \
450         }
451
452 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
453 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
454 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
455 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
456
457
458 static int config_parse_cpu_affinity2(const char *unit,
459                                       const char *filename,
460                                       unsigned line,
461                                       const char *section,
462                                       const char *lvalue,
463                                       int ltype,
464                                       const char *rvalue,
465                                       void *data,
466                                       void *userdata) {
467
468         char *w;
469         size_t l;
470         char *state;
471         cpu_set_t *c = NULL;
472         unsigned ncpus = 0;
473
474         assert(filename);
475         assert(lvalue);
476         assert(rvalue);
477
478         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
479                 char *t;
480                 int r;
481                 unsigned cpu;
482
483                 if (!(t = strndup(w, l)))
484                         return log_oom();
485
486                 r = safe_atou(t, &cpu);
487                 free(t);
488
489                 if (!c)
490                         if (!(c = cpu_set_malloc(&ncpus)))
491                                 return log_oom();
492
493                 if (r < 0 || cpu >= ncpus) {
494                         log_syntax(unit, LOG_ERR, filename, line, -r,
495                                    "Failed to parse CPU affinity '%s'", rvalue);
496                         CPU_FREE(c);
497                         return -EBADMSG;
498                 }
499
500                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
501         }
502
503         if (c) {
504                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
505                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
506
507                 CPU_FREE(c);
508         }
509
510         return 0;
511 }
512
513 static void strv_free_free(char ***l) {
514         char ***i;
515
516         if (!l)
517                 return;
518
519         for (i = l; *i; i++)
520                 strv_free(*i);
521
522         free(l);
523 }
524
525 static void free_join_controllers(void) {
526         strv_free_free(arg_join_controllers);
527         arg_join_controllers = NULL;
528 }
529
530 static int config_parse_join_controllers(const char *unit,
531                                          const char *filename,
532                                          unsigned line,
533                                          const char *section,
534                                          const char *lvalue,
535                                          int ltype,
536                                          const char *rvalue,
537                                          void *data,
538                                          void *userdata) {
539
540         unsigned n = 0;
541         char *state, *w;
542         size_t length;
543
544         assert(filename);
545         assert(lvalue);
546         assert(rvalue);
547
548         free_join_controllers();
549
550         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
551                 char *s, **l;
552
553                 s = strndup(w, length);
554                 if (!s)
555                         return log_oom();
556
557                 l = strv_split(s, ",");
558                 free(s);
559
560                 strv_uniq(l);
561
562                 if (strv_length(l) <= 1) {
563                         strv_free(l);
564                         continue;
565                 }
566
567                 if (!arg_join_controllers) {
568                         arg_join_controllers = new(char**, 2);
569                         if (!arg_join_controllers) {
570                                 strv_free(l);
571                                 return log_oom();
572                         }
573
574                         arg_join_controllers[0] = l;
575                         arg_join_controllers[1] = NULL;
576
577                         n = 1;
578                 } else {
579                         char ***a;
580                         char ***t;
581
582                         t = new0(char**, n+2);
583                         if (!t) {
584                                 strv_free(l);
585                                 return log_oom();
586                         }
587
588                         n = 0;
589
590                         for (a = arg_join_controllers; *a; a++) {
591
592                                 if (strv_overlap(*a, l)) {
593                                         char **c;
594
595                                         c = strv_merge(*a, l);
596                                         if (!c) {
597                                                 strv_free(l);
598                                                 strv_free_free(t);
599                                                 return log_oom();
600                                         }
601
602                                         strv_free(l);
603                                         l = c;
604                                 } else {
605                                         char **c;
606
607                                         c = strv_copy(*a);
608                                         if (!c) {
609                                                 strv_free(l);
610                                                 strv_free_free(t);
611                                                 return log_oom();
612                                         }
613
614                                         t[n++] = c;
615                                 }
616                         }
617
618                         t[n++] = strv_uniq(l);
619
620                         strv_free_free(arg_join_controllers);
621                         arg_join_controllers = t;
622                 }
623         }
624
625         return 0;
626 }
627
628 static int parse_config_file(void) {
629
630         const ConfigTableItem items[] = {
631                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
632                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
633                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
634                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
635                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
636                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
637                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
638                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
639                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
640                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
641                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
642                 { "Manager", "DefaultTimeoutStartSec", config_parse_sec,         0, &arg_default_timeout_start_usec },
643                 { "Manager", "DefaultTimeoutStopSec", config_parse_sec,          0, &arg_default_timeout_stop_usec  },
644                 { "Manager", "DefaultRestartSec",     config_parse_sec,          0, &arg_default_restart_usec  },
645                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
646                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
647                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
648                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
649                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
650                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
651                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
652                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
653                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
654                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
655                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
656                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
657                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
658                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
659                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
660                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
661                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
662                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
663                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
664                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
665                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
666                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
667                 { NULL, NULL, NULL, 0, NULL }
668         };
669
670         _cleanup_fclose_ FILE *f;
671         const char *fn;
672         int r;
673
674         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
675         f = fopen(fn, "re");
676         if (!f) {
677                 if (errno == ENOENT)
678                         return 0;
679
680                 log_warning("Failed to open configuration file '%s': %m", fn);
681                 return 0;
682         }
683
684         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
685         if (r < 0)
686                 log_warning("Failed to parse configuration file: %s", strerror(-r));
687
688         return 0;
689 }
690
691 static int parse_proc_cmdline(void) {
692         _cleanup_free_ char *line = NULL;
693         char *w, *state;
694         size_t l;
695         int r;
696
697         r = proc_cmdline(&line);
698         if (r < 0)
699                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
700         if (r <= 0)
701                 return 0;
702
703         FOREACH_WORD_QUOTED(w, l, line, state) {
704                 _cleanup_free_ char *word;
705
706                 word = strndup(w, l);
707                 if (!word)
708                         return log_oom();
709
710                 r = parse_proc_cmdline_word(word);
711                 if (r < 0) {
712                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
713                         return r;
714                 }
715         }
716
717         return 0;
718 }
719
720 static int parse_argv(int argc, char *argv[]) {
721
722         enum {
723                 ARG_LOG_LEVEL = 0x100,
724                 ARG_LOG_TARGET,
725                 ARG_LOG_COLOR,
726                 ARG_LOG_LOCATION,
727                 ARG_UNIT,
728                 ARG_SYSTEM,
729                 ARG_USER,
730                 ARG_TEST,
731                 ARG_VERSION,
732                 ARG_DUMP_CONFIGURATION_ITEMS,
733                 ARG_DUMP_CORE,
734                 ARG_CRASH_SHELL,
735                 ARG_CONFIRM_SPAWN,
736                 ARG_SHOW_STATUS,
737                 ARG_DESERIALIZE,
738                 ARG_SWITCHED_ROOT,
739                 ARG_DEFAULT_STD_OUTPUT,
740                 ARG_DEFAULT_STD_ERROR
741         };
742
743         static const struct option options[] = {
744                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
745                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
746                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
747                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
748                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
749                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
750                 { "user",                     no_argument,       NULL, ARG_USER                     },
751                 { "test",                     no_argument,       NULL, ARG_TEST                     },
752                 { "help",                     no_argument,       NULL, 'h'                          },
753                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
754                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
755                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
756                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
757                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
758                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
759                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
760                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
761                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
762                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
763                 { NULL,                       0,                 NULL, 0                            }
764         };
765
766         int c, r;
767
768         assert(argc >= 1);
769         assert(argv);
770
771         if (getpid() == 1)
772                 opterr = 0;
773
774         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
775
776                 switch (c) {
777
778                 case ARG_LOG_LEVEL:
779                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
780                                 log_error("Failed to parse log level %s.", optarg);
781                                 return r;
782                         }
783
784                         break;
785
786                 case ARG_LOG_TARGET:
787
788                         if ((r = log_set_target_from_string(optarg)) < 0) {
789                                 log_error("Failed to parse log target %s.", optarg);
790                                 return r;
791                         }
792
793                         break;
794
795                 case ARG_LOG_COLOR:
796
797                         if (optarg) {
798                                 if ((r = log_show_color_from_string(optarg)) < 0) {
799                                         log_error("Failed to parse log color setting %s.", optarg);
800                                         return r;
801                                 }
802                         } else
803                                 log_show_color(true);
804
805                         break;
806
807                 case ARG_LOG_LOCATION:
808
809                         if (optarg) {
810                                 if ((r = log_show_location_from_string(optarg)) < 0) {
811                                         log_error("Failed to parse log location setting %s.", optarg);
812                                         return r;
813                                 }
814                         } else
815                                 log_show_location(true);
816
817                         break;
818
819                 case ARG_DEFAULT_STD_OUTPUT:
820
821                         if ((r = exec_output_from_string(optarg)) < 0) {
822                                 log_error("Failed to parse default standard output setting %s.", optarg);
823                                 return r;
824                         } else
825                                 arg_default_std_output = r;
826                         break;
827
828                 case ARG_DEFAULT_STD_ERROR:
829
830                         if ((r = exec_output_from_string(optarg)) < 0) {
831                                 log_error("Failed to parse default standard error output setting %s.", optarg);
832                                 return r;
833                         } else
834                                 arg_default_std_error = r;
835                         break;
836
837                 case ARG_UNIT:
838
839                         if ((r = set_default_unit(optarg)) < 0) {
840                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
841                                 return r;
842                         }
843
844                         break;
845
846                 case ARG_SYSTEM:
847                         arg_running_as = SYSTEMD_SYSTEM;
848                         break;
849
850                 case ARG_USER:
851                         arg_running_as = SYSTEMD_USER;
852                         break;
853
854                 case ARG_TEST:
855                         arg_action = ACTION_TEST;
856                         break;
857
858                 case ARG_VERSION:
859                         arg_action = ACTION_VERSION;
860                         break;
861
862                 case ARG_DUMP_CONFIGURATION_ITEMS:
863                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
864                         break;
865
866                 case ARG_DUMP_CORE:
867                         r = optarg ? parse_boolean(optarg) : 1;
868                         if (r < 0) {
869                                 log_error("Failed to parse dump core boolean %s.", optarg);
870                                 return r;
871                         }
872                         arg_dump_core = r;
873                         break;
874
875                 case ARG_CRASH_SHELL:
876                         r = optarg ? parse_boolean(optarg) : 1;
877                         if (r < 0) {
878                                 log_error("Failed to parse crash shell boolean %s.", optarg);
879                                 return r;
880                         }
881                         arg_crash_shell = r;
882                         break;
883
884                 case ARG_CONFIRM_SPAWN:
885                         r = optarg ? parse_boolean(optarg) : 1;
886                         if (r < 0) {
887                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
888                                 return r;
889                         }
890                         arg_confirm_spawn = r;
891                         break;
892
893                 case ARG_SHOW_STATUS:
894                         r = optarg ? parse_boolean(optarg) : 1;
895                         if (r < 0) {
896                                 log_error("Failed to parse show status boolean %s.", optarg);
897                                 return r;
898                         }
899                         arg_show_status = r;
900                         break;
901
902                 case ARG_DESERIALIZE: {
903                         int fd;
904                         FILE *f;
905
906                         r = safe_atoi(optarg, &fd);
907                         if (r < 0 || fd < 0) {
908                                 log_error("Failed to parse deserialize option %s.", optarg);
909                                 return r < 0 ? r : -EINVAL;
910                         }
911
912                         fd_cloexec(fd, true);
913
914                         f = fdopen(fd, "r");
915                         if (!f) {
916                                 log_error("Failed to open serialization fd: %m");
917                                 return -errno;
918                         }
919
920                         if (serialization)
921                                 fclose(serialization);
922
923                         serialization = f;
924
925                         break;
926                 }
927
928                 case ARG_SWITCHED_ROOT:
929                         arg_switched_root = true;
930                         break;
931
932                 case 'h':
933                         arg_action = ACTION_HELP;
934                         break;
935
936                 case 'D':
937                         log_set_max_level(LOG_DEBUG);
938                         break;
939
940                 case 'b':
941                 case 's':
942                 case 'z':
943                         /* Just to eat away the sysvinit kernel
944                          * cmdline args without getopt() error
945                          * messages that we'll parse in
946                          * parse_proc_cmdline_word() or ignore. */
947
948                 case '?':
949                 default:
950                         if (getpid() != 1) {
951                                 log_error("Unknown option code %c", c);
952                                 return -EINVAL;
953                         }
954
955                         break;
956                 }
957
958         if (optind < argc && getpid() != 1) {
959                 /* Hmm, when we aren't run as init system
960                  * let's complain about excess arguments */
961
962                 log_error("Excess arguments.");
963                 return -EINVAL;
964         }
965
966         if (detect_container(NULL) > 0) {
967                 char **a;
968
969                 /* All /proc/cmdline arguments the kernel didn't
970                  * understand it passed to us. We're not really
971                  * interested in that usually since /proc/cmdline is
972                  * more interesting and complete. With one exception:
973                  * if we are run in a container /proc/cmdline is not
974                  * relevant for the container, hence we rely on argv[]
975                  * instead. */
976
977                 for (a = argv; a < argv + argc; a++) {
978                         r = parse_proc_cmdline_word(*a);
979                         if (r < 0) {
980                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
981                                 return r;
982                         }
983                 }
984         }
985
986         return 0;
987 }
988
989 static int help(void) {
990
991         printf("%s [OPTIONS...]\n\n"
992                "Starts up and maintains the system or user services.\n\n"
993                "  -h --help                      Show this help\n"
994                "     --test                      Determine startup sequence, dump it and exit\n"
995                "     --dump-configuration-items  Dump understood unit configuration items\n"
996                "     --unit=UNIT                 Set default unit\n"
997                "     --system                    Run a system instance, even if PID != 1\n"
998                "     --user                      Run a user instance\n"
999                "     --dump-core[=0|1]           Dump core on crash\n"
1000                "     --crash-shell[=0|1]         Run shell on crash\n"
1001                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1002                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1003                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1004                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1005                "     --log-color[=0|1]           Highlight important log messages\n"
1006                "     --log-location[=0|1]        Include code location in log messages\n"
1007                "     --default-standard-output=  Set default standard output for services\n"
1008                "     --default-standard-error=   Set default standard error output for services\n",
1009                program_invocation_short_name);
1010
1011         return 0;
1012 }
1013
1014 static int version(void) {
1015         puts(PACKAGE_STRING);
1016         puts(SYSTEMD_FEATURES);
1017
1018         return 0;
1019 }
1020
1021 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1022         FILE *f = NULL;
1023         FDSet *fds = NULL;
1024         int r;
1025
1026         assert(m);
1027         assert(_f);
1028         assert(_fds);
1029
1030         r = manager_open_serialization(m, &f);
1031         if (r < 0) {
1032                 log_error("Failed to create serialization file: %s", strerror(-r));
1033                 goto fail;
1034         }
1035
1036         /* Make sure nothing is really destructed when we shut down */
1037         m->n_reloading ++;
1038         bus_broadcast_reloading(m, true);
1039
1040         fds = fdset_new();
1041         if (!fds) {
1042                 r = -ENOMEM;
1043                 log_error("Failed to allocate fd set: %s", strerror(-r));
1044                 goto fail;
1045         }
1046
1047         r = manager_serialize(m, f, fds, switching_root);
1048         if (r < 0) {
1049                 log_error("Failed to serialize state: %s", strerror(-r));
1050                 goto fail;
1051         }
1052
1053         if (fseeko(f, 0, SEEK_SET) < 0) {
1054                 log_error("Failed to rewind serialization fd: %m");
1055                 goto fail;
1056         }
1057
1058         r = fd_cloexec(fileno(f), false);
1059         if (r < 0) {
1060                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1061                 goto fail;
1062         }
1063
1064         r = fdset_cloexec(fds, false);
1065         if (r < 0) {
1066                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1067                 goto fail;
1068         }
1069
1070         *_f = f;
1071         *_fds = fds;
1072
1073         return 0;
1074
1075 fail:
1076         fdset_free(fds);
1077
1078         if (f)
1079                 fclose(f);
1080
1081         return r;
1082 }
1083
1084 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1085         struct rlimit nl;
1086         int r;
1087
1088         assert(saved_rlimit);
1089
1090         /* Save the original RLIMIT_NOFILE so that we can reset it
1091          * later when transitioning from the initrd to the main
1092          * systemd or suchlike. */
1093         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1094                 log_error("Reading RLIMIT_NOFILE failed: %m");
1095                 return -errno;
1096         }
1097
1098         /* Make sure forked processes get the default kernel setting */
1099         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1100                 struct rlimit *rl;
1101
1102                 rl = newdup(struct rlimit, saved_rlimit, 1);
1103                 if (!rl)
1104                         return log_oom();
1105
1106                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1107         }
1108
1109         /* Bump up the resource limit for ourselves substantially */
1110         nl.rlim_cur = nl.rlim_max = 64*1024;
1111         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1112         if (r < 0) {
1113                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1114                 return r;
1115         }
1116
1117         return 0;
1118 }
1119
1120 static void test_mtab(void) {
1121         char *p;
1122
1123         /* Check that /etc/mtab is a symlink */
1124
1125         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1126                 bool b;
1127
1128                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1129                 free(p);
1130
1131                 if (b)
1132                         return;
1133         }
1134
1135         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1136                     "This is not supported anymore. "
1137                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1138 }
1139
1140 static void test_usr(void) {
1141
1142         /* Check that /usr is not a separate fs */
1143
1144         if (dir_is_empty("/usr") <= 0)
1145                 return;
1146
1147         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1148                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1149                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1150 }
1151
1152 static void test_cgroups(void) {
1153
1154         if (access("/proc/cgroups", F_OK) >= 0)
1155                 return;
1156
1157         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1158                     "Systems without control groups are not supported. "
1159                     "We will now sleep for 10s, and then continue boot-up. "
1160                     "Expect breakage and please do not file bugs. "
1161                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1162                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1163
1164         sleep(10);
1165 }
1166
1167 static int initialize_join_controllers(void) {
1168         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1169          * + "net_prio". We'd like to add "cpuset" to the mix, but
1170          * "cpuset" does't really work for groups with no initialized
1171          * attributes. */
1172
1173         arg_join_controllers = new(char**, 3);
1174         if (!arg_join_controllers)
1175                 return -ENOMEM;
1176
1177         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1178         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1179         arg_join_controllers[2] = NULL;
1180
1181         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1182                 free_join_controllers();
1183                 return -ENOMEM;
1184         }
1185
1186         return 0;
1187 }
1188
1189 int main(int argc, char *argv[]) {
1190         Manager *m = NULL;
1191         int r, retval = EXIT_FAILURE;
1192         usec_t before_startup, after_startup;
1193         char timespan[FORMAT_TIMESPAN_MAX];
1194         FDSet *fds = NULL;
1195         bool reexecute = false;
1196         const char *shutdown_verb = NULL;
1197         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1198         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1199         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1200         static char systemd[] = "systemd";
1201         bool skip_setup = false;
1202         int j;
1203         bool loaded_policy = false;
1204         bool arm_reboot_watchdog = false;
1205         bool queue_default_job = false;
1206         char *switch_root_dir = NULL, *switch_root_init = NULL;
1207         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1208
1209 #ifdef HAVE_SYSV_COMPAT
1210         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1211                 /* This is compatibility support for SysV, where
1212                  * calling init as a user is identical to telinit. */
1213
1214                 errno = -ENOENT;
1215                 execv(SYSTEMCTL_BINARY_PATH, argv);
1216                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1217                 return 1;
1218         }
1219 #endif
1220
1221         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1222         dual_timestamp_get(&userspace_timestamp);
1223
1224         /* Determine if this is a reexecution or normal bootup. We do
1225          * the full command line parsing much later, so let's just
1226          * have a quick peek here. */
1227         if (strv_find(argv+1, "--deserialize"))
1228                 skip_setup = true;
1229
1230         /* If we have switched root, do all the special setup
1231          * things */
1232         if (strv_find(argv+1, "--switched-root"))
1233                 skip_setup = false;
1234
1235         /* If we get started via the /sbin/init symlink then we are
1236            called 'init'. After a subsequent reexecution we are then
1237            called 'systemd'. That is confusing, hence let's call us
1238            systemd right-away. */
1239         program_invocation_short_name = systemd;
1240         prctl(PR_SET_NAME, systemd);
1241
1242         saved_argv = argv;
1243         saved_argc = argc;
1244
1245         log_show_color(isatty(STDERR_FILENO) > 0);
1246
1247         /* Disable the umask logic */
1248         if (getpid() == 1)
1249                 umask(0);
1250
1251         if (getpid() == 1 && detect_container(NULL) <= 0) {
1252
1253                 /* Running outside of a container as PID 1 */
1254                 arg_running_as = SYSTEMD_SYSTEM;
1255                 make_null_stdio();
1256                 log_set_target(LOG_TARGET_KMSG);
1257                 log_open();
1258
1259                 if (in_initrd())
1260                         initrd_timestamp = userspace_timestamp;
1261
1262                 if (!skip_setup) {
1263                         mount_setup_early();
1264                         if (selinux_setup(&loaded_policy) < 0)
1265                                 goto finish;
1266                         if (ima_setup() < 0)
1267                                 goto finish;
1268                         if (smack_setup() < 0)
1269                                 goto finish;
1270                 }
1271
1272                 if (label_init(NULL) < 0)
1273                         goto finish;
1274
1275                 if (!skip_setup) {
1276                         if (hwclock_is_localtime() > 0) {
1277                                 int min;
1278
1279                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1280                                 r = hwclock_set_timezone(&min);
1281                                 if (r < 0)
1282                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1283                                 else
1284                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1285                         } else if (!in_initrd()) {
1286                                 /*
1287                                  * Do dummy first-time call to seal the kernel's time warp magic
1288                                  *
1289                                  * Do not call this this from inside the initrd. The initrd might not
1290                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1291                                  * that way. In such case, we need to delay the time-warp or the sealing
1292                                  * until we reach the real system.
1293                                  */
1294                                 hwclock_reset_timezone();
1295
1296                                 /* Tell the kernel our timezone */
1297                                 r = hwclock_set_timezone(NULL);
1298                                 if (r < 0)
1299                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1300                         }
1301                 }
1302
1303                 /* Set the default for later on, but don't actually
1304                  * open the logs like this for now. Note that if we
1305                  * are transitioning from the initrd there might still
1306                  * be journal fd open, and we shouldn't attempt
1307                  * opening that before we parsed /proc/cmdline which
1308                  * might redirect output elsewhere. */
1309                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1310
1311         } else if (getpid() == 1) {
1312                 /* Running inside a container, as PID 1 */
1313                 arg_running_as = SYSTEMD_SYSTEM;
1314                 log_set_target(LOG_TARGET_CONSOLE);
1315                 log_open();
1316
1317                 /* For the later on, see above... */
1318                 log_set_target(LOG_TARGET_JOURNAL);
1319
1320                 /* clear the kernel timestamp,
1321                  * because we are in a container */
1322                 kernel_timestamp.monotonic = 0ULL;
1323                 kernel_timestamp.realtime = 0ULL;
1324
1325         } else {
1326                 /* Running as user instance */
1327                 arg_running_as = SYSTEMD_USER;
1328                 log_set_target(LOG_TARGET_AUTO);
1329                 log_open();
1330
1331                 /* clear the kernel timestamp,
1332                  * because we are not PID 1 */
1333                 kernel_timestamp.monotonic = 0ULL;
1334                 kernel_timestamp.realtime = 0ULL;
1335         }
1336
1337         /* Initialize default unit */
1338         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1339         if (r < 0) {
1340                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1341                 goto finish;
1342         }
1343
1344         r = initialize_join_controllers();
1345         if (r < 0)
1346                 goto finish;
1347
1348         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1349          * /proc/$PID/fd is available. */
1350         if (getpid() == 1) {
1351                 r = mount_setup(loaded_policy);
1352                 if (r < 0)
1353                         goto finish;
1354         }
1355
1356         /* Reset all signal handlers. */
1357         assert_se(reset_all_signal_handlers() == 0);
1358
1359         ignore_signals(SIGNALS_IGNORE, -1);
1360
1361         if (parse_config_file() < 0)
1362                 goto finish;
1363
1364         if (arg_running_as == SYSTEMD_SYSTEM)
1365                 if (parse_proc_cmdline() < 0)
1366                         goto finish;
1367
1368         log_parse_environment();
1369
1370         if (parse_argv(argc, argv) < 0)
1371                 goto finish;
1372
1373         if (arg_action == ACTION_TEST &&
1374             geteuid() == 0) {
1375                 log_error("Don't run test mode as root.");
1376                 goto finish;
1377         }
1378
1379         if (arg_running_as == SYSTEMD_USER &&
1380             arg_action == ACTION_RUN &&
1381             sd_booted() <= 0) {
1382                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1383                 goto finish;
1384         }
1385
1386         if (arg_running_as == SYSTEMD_SYSTEM &&
1387             arg_action == ACTION_RUN &&
1388             running_in_chroot() > 0) {
1389                 log_error("Cannot be run in a chroot() environment.");
1390                 goto finish;
1391         }
1392
1393         if (arg_action == ACTION_HELP) {
1394                 retval = help();
1395                 goto finish;
1396         } else if (arg_action == ACTION_VERSION) {
1397                 retval = version();
1398                 goto finish;
1399         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1400                 unit_dump_config_items(stdout);
1401                 retval = EXIT_SUCCESS;
1402                 goto finish;
1403         } else if (arg_action == ACTION_DONE) {
1404                 retval = EXIT_SUCCESS;
1405                 goto finish;
1406         }
1407
1408         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1409
1410         /* Close logging fds, in order not to confuse fdset below */
1411         log_close();
1412
1413         /* Remember open file descriptors for later deserialization */
1414         r = fdset_new_fill(&fds);
1415         if (r < 0) {
1416                 log_error("Failed to allocate fd set: %s", strerror(-r));
1417                 goto finish;
1418         } else
1419                 fdset_cloexec(fds, true);
1420
1421         if (serialization)
1422                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1423
1424         if (arg_running_as == SYSTEMD_SYSTEM)
1425                 /* Become a session leader if we aren't one yet. */
1426                 setsid();
1427
1428         /* Move out of the way, so that we won't block unmounts */
1429         assert_se(chdir("/")  == 0);
1430
1431         /* Make sure D-Bus doesn't fiddle with the SIGPIPE handlers */
1432         dbus_connection_set_change_sigpipe(FALSE);
1433
1434         /* Reset the console, but only if this is really init and we
1435          * are freshly booted */
1436         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1437                 console_setup(getpid() == 1 && !skip_setup);
1438
1439         /* Open the logging devices, if possible and necessary */
1440         log_open();
1441
1442         /* Make sure we leave a core dump without panicing the
1443          * kernel. */
1444         if (getpid() == 1) {
1445                 install_crash_handler();
1446
1447                 r = mount_cgroup_controllers(arg_join_controllers);
1448                 if (r < 0)
1449                         goto finish;
1450         }
1451
1452         if (arg_running_as == SYSTEMD_SYSTEM) {
1453                 const char *virtualization = NULL;
1454
1455                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1456
1457                 detect_virtualization(&virtualization);
1458                 if (virtualization)
1459                         log_info("Detected virtualization '%s'.", virtualization);
1460
1461                 if (in_initrd())
1462                         log_info("Running in initial RAM disk.");
1463
1464         } else
1465                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1466
1467         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1468                 if (arg_show_status || plymouth_running())
1469                         status_welcome();
1470
1471 #ifdef HAVE_KMOD
1472                 kmod_setup();
1473 #endif
1474                 hostname_setup();
1475                 machine_id_setup();
1476                 loopback_setup();
1477
1478                 test_mtab();
1479                 test_usr();
1480                 test_cgroups();
1481         }
1482
1483         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1484                 watchdog_set_timeout(&arg_runtime_watchdog);
1485
1486         if (arg_timer_slack_nsec != (nsec_t) -1)
1487                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1488                         log_error("Failed to adjust timer slack: %m");
1489
1490         if (arg_capability_bounding_set_drop) {
1491                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1492                 if (r < 0) {
1493                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1494                         goto finish;
1495                 }
1496                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1497                 if (r < 0) {
1498                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1499                         goto finish;
1500                 }
1501         }
1502
1503         if (arg_running_as == SYSTEMD_USER) {
1504                 /* Become reaper of our children */
1505                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1506                         log_warning("Failed to make us a subreaper: %m");
1507                         if (errno == EINVAL)
1508                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1509                 }
1510         }
1511
1512         if (arg_running_as == SYSTEMD_SYSTEM)
1513                 bump_rlimit_nofile(&saved_rlimit_nofile);
1514
1515         r = manager_new(arg_running_as, !!serialization, &m);
1516         if (r < 0) {
1517                 log_error("Failed to allocate manager object: %s", strerror(-r));
1518                 goto finish;
1519         }
1520
1521         m->confirm_spawn = arg_confirm_spawn;
1522         m->default_std_output = arg_default_std_output;
1523         m->default_std_error = arg_default_std_error;
1524         m->default_restart_usec = arg_default_restart_usec;
1525         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1526         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1527         m->runtime_watchdog = arg_runtime_watchdog;
1528         m->shutdown_watchdog = arg_shutdown_watchdog;
1529         m->userspace_timestamp = userspace_timestamp;
1530         m->kernel_timestamp = kernel_timestamp;
1531         m->initrd_timestamp = initrd_timestamp;
1532
1533         manager_set_default_rlimits(m, arg_default_rlimit);
1534
1535         if (arg_default_environment)
1536                 manager_environment_add(m, arg_default_environment);
1537
1538         manager_set_show_status(m, arg_show_status);
1539
1540         /* Remember whether we should queue the default job */
1541         queue_default_job = !serialization || arg_switched_root;
1542
1543         before_startup = now(CLOCK_MONOTONIC);
1544
1545         r = manager_startup(m, serialization, fds);
1546         if (r < 0)
1547                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1548
1549         /* This will close all file descriptors that were opened, but
1550          * not claimed by any unit. */
1551         fdset_free(fds);
1552         fds = NULL;
1553
1554         if (serialization) {
1555                 fclose(serialization);
1556                 serialization = NULL;
1557         }
1558
1559         if (queue_default_job) {
1560                 DBusError error;
1561                 Unit *target = NULL;
1562                 Job *default_unit_job;
1563
1564                 dbus_error_init(&error);
1565
1566                 log_debug("Activating default unit: %s", arg_default_unit);
1567
1568                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1569                 if (r < 0) {
1570                         log_error("Failed to load default target: %s", bus_error(&error, r));
1571                         dbus_error_free(&error);
1572                 } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1573                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1574                 else if (target->load_state == UNIT_MASKED)
1575                         log_error("Default target masked.");
1576
1577                 if (!target || target->load_state != UNIT_LOADED) {
1578                         log_info("Trying to load rescue target...");
1579
1580                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1581                         if (r < 0) {
1582                                 log_error("Failed to load rescue target: %s", bus_error(&error, r));
1583                                 dbus_error_free(&error);
1584                                 goto finish;
1585                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1586                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1587                                 goto finish;
1588                         } else if (target->load_state == UNIT_MASKED) {
1589                                 log_error("Rescue target masked.");
1590                                 goto finish;
1591                         }
1592                 }
1593
1594                 assert(target->load_state == UNIT_LOADED);
1595
1596                 if (arg_action == ACTION_TEST) {
1597                         printf("-> By units:\n");
1598                         manager_dump_units(m, stdout, "\t");
1599                 }
1600
1601                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1602                 if (r == -EPERM) {
1603                         log_debug("Default target could not be isolated, starting instead: %s", bus_error(&error, r));
1604                         dbus_error_free(&error);
1605
1606                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1607                         if (r < 0) {
1608                                 log_error("Failed to start default target: %s", bus_error(&error, r));
1609                                 dbus_error_free(&error);
1610                                 goto finish;
1611                         }
1612                 } else if (r < 0) {
1613                         log_error("Failed to isolate default target: %s", bus_error(&error, r));
1614                         dbus_error_free(&error);
1615                         goto finish;
1616                 }
1617
1618                 m->default_unit_job_id = default_unit_job->id;
1619
1620                 after_startup = now(CLOCK_MONOTONIC);
1621                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1622                          "Loaded units and determined initial transaction in %s.",
1623                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1624
1625                 if (arg_action == ACTION_TEST) {
1626                         printf("-> By jobs:\n");
1627                         manager_dump_jobs(m, stdout, "\t");
1628                         retval = EXIT_SUCCESS;
1629                         goto finish;
1630                 }
1631         }
1632
1633         for (;;) {
1634                 r = manager_loop(m);
1635                 if (r < 0) {
1636                         log_error("Failed to run mainloop: %s", strerror(-r));
1637                         goto finish;
1638                 }
1639
1640                 switch (m->exit_code) {
1641
1642                 case MANAGER_EXIT:
1643                         retval = EXIT_SUCCESS;
1644                         log_debug("Exit.");
1645                         goto finish;
1646
1647                 case MANAGER_RELOAD:
1648                         log_info("Reloading.");
1649                         r = manager_reload(m);
1650                         if (r < 0)
1651                                 log_error("Failed to reload: %s", strerror(-r));
1652                         break;
1653
1654                 case MANAGER_REEXECUTE:
1655
1656                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1657                                 goto finish;
1658
1659                         reexecute = true;
1660                         log_notice("Reexecuting.");
1661                         goto finish;
1662
1663                 case MANAGER_SWITCH_ROOT:
1664                         /* Steal the switch root parameters */
1665                         switch_root_dir = m->switch_root;
1666                         switch_root_init = m->switch_root_init;
1667                         m->switch_root = m->switch_root_init = NULL;
1668
1669                         if (!switch_root_init)
1670                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1671                                         goto finish;
1672
1673                         reexecute = true;
1674                         log_notice("Switching root.");
1675                         goto finish;
1676
1677                 case MANAGER_REBOOT:
1678                 case MANAGER_POWEROFF:
1679                 case MANAGER_HALT:
1680                 case MANAGER_KEXEC: {
1681                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1682                                 [MANAGER_REBOOT] = "reboot",
1683                                 [MANAGER_POWEROFF] = "poweroff",
1684                                 [MANAGER_HALT] = "halt",
1685                                 [MANAGER_KEXEC] = "kexec"
1686                         };
1687
1688                         assert_se(shutdown_verb = table[m->exit_code]);
1689                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1690
1691                         log_notice("Shutting down.");
1692                         goto finish;
1693                 }
1694
1695                 default:
1696                         assert_not_reached("Unknown exit code.");
1697                 }
1698         }
1699
1700 finish:
1701         if (m)
1702                 manager_free(m);
1703
1704         for (j = 0; j < RLIMIT_NLIMITS; j++)
1705                 free(arg_default_rlimit[j]);
1706
1707         free(arg_default_unit);
1708         free_join_controllers();
1709
1710         dbus_shutdown();
1711         label_finish();
1712
1713         if (reexecute) {
1714                 const char **args;
1715                 unsigned i, args_size;
1716
1717                 /* Close and disarm the watchdog, so that the new
1718                  * instance can reinitialize it, but doesn't get
1719                  * rebooted while we do that */
1720                 watchdog_close(true);
1721
1722                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1723                  * that the new systemd can pass the kernel default to
1724                  * its child processes */
1725                 if (saved_rlimit_nofile.rlim_cur > 0)
1726                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1727
1728                 if (switch_root_dir) {
1729                         /* Kill all remaining processes from the
1730                          * initrd, but don't wait for them, so that we
1731                          * can handle the SIGCHLD for them after
1732                          * deserializing. */
1733                         broadcast_signal(SIGTERM, false);
1734
1735                         /* And switch root */
1736                         r = switch_root(switch_root_dir);
1737                         if (r < 0)
1738                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1739                 }
1740
1741                 args_size = MAX(6, argc+1);
1742                 args = newa(const char*, args_size);
1743
1744                 if (!switch_root_init) {
1745                         char sfd[16];
1746
1747                         /* First try to spawn ourselves with the right
1748                          * path, and with full serialization. We do
1749                          * this only if the user didn't specify an
1750                          * explicit init to spawn. */
1751
1752                         assert(serialization);
1753                         assert(fds);
1754
1755                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1756                         char_array_0(sfd);
1757
1758                         i = 0;
1759                         args[i++] = SYSTEMD_BINARY_PATH;
1760                         if (switch_root_dir)
1761                                 args[i++] = "--switched-root";
1762                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1763                         args[i++] = "--deserialize";
1764                         args[i++] = sfd;
1765                         args[i++] = NULL;
1766
1767                         /* do not pass along the environment we inherit from the kernel or initrd */
1768                         if (switch_root_dir)
1769                                 clearenv();
1770
1771                         assert(i <= args_size);
1772                         execv(args[0], (char* const*) args);
1773                 }
1774
1775                 /* Try the fallback, if there is any, without any
1776                  * serialization. We pass the original argv[] and
1777                  * envp[]. (Well, modulo the ordering changes due to
1778                  * getopt() in argv[], and some cleanups in envp[],
1779                  * but let's hope that doesn't matter.) */
1780
1781                 if (serialization) {
1782                         fclose(serialization);
1783                         serialization = NULL;
1784                 }
1785
1786                 if (fds) {
1787                         fdset_free(fds);
1788                         fds = NULL;
1789                 }
1790
1791                 /* Reopen the console */
1792                 make_console_stdio();
1793
1794                 for (j = 1, i = 1; j < argc; j++)
1795                         args[i++] = argv[j];
1796                 args[i++] = NULL;
1797                 assert(i <= args_size);
1798
1799                 if (switch_root_init) {
1800                         args[0] = switch_root_init;
1801                         execv(args[0], (char* const*) args);
1802                         log_warning("Failed to execute configured init, trying fallback: %m");
1803                 }
1804
1805                 args[0] = "/sbin/init";
1806                 execv(args[0], (char* const*) args);
1807
1808                 if (errno == ENOENT) {
1809                         log_warning("No /sbin/init, trying fallback");
1810
1811                         args[0] = "/bin/sh";
1812                         args[1] = NULL;
1813                         execv(args[0], (char* const*) args);
1814                         log_error("Failed to execute /bin/sh, giving up: %m");
1815                 } else
1816                         log_warning("Failed to execute /sbin/init, giving up: %m");
1817         }
1818
1819         if (serialization)
1820                 fclose(serialization);
1821
1822         if (fds)
1823                 fdset_free(fds);
1824
1825         if (shutdown_verb) {
1826                 const char * command_line[] = {
1827                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1828                         shutdown_verb,
1829                         NULL
1830                 };
1831                 char **env_block;
1832
1833                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1834                         char e[32];
1835
1836                         /* If we reboot let's set the shutdown
1837                          * watchdog and tell the shutdown binary to
1838                          * repeatedly ping it */
1839                         watchdog_set_timeout(&arg_shutdown_watchdog);
1840                         watchdog_close(false);
1841
1842                         /* Tell the binary how often to ping */
1843                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1844                         char_array_0(e);
1845
1846                         env_block = strv_append(environ, e);
1847                 } else {
1848                         env_block = strv_copy(environ);
1849                         watchdog_close(true);
1850                 }
1851
1852                 /* Avoid the creation of new processes forked by the
1853                  * kernel; at this point, we will not listen to the
1854                  * signals anyway */
1855                 if (detect_container(NULL) <= 0)
1856                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1857
1858                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1859                 free(env_block);
1860                 log_error("Failed to execute shutdown binary, freezing: %m");
1861         }
1862
1863         if (getpid() == 1)
1864                 freeze();
1865
1866         return retval;
1867 }