chiark / gitweb /
valgrind: make running PID 1 in valgrind useful
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85
86 static char *arg_default_unit = NULL;
87 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
88
89 static bool arg_dump_core = true;
90 static bool arg_crash_shell = false;
91 static int arg_crash_chvt = -1;
92 static bool arg_confirm_spawn = false;
93 static bool arg_show_status = true;
94 static bool arg_switched_root = false;
95 static char ***arg_join_controllers = NULL;
96 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
97 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
98 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
99 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
100 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
101 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
102 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
103 static usec_t arg_runtime_watchdog = 0;
104 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
105 static char **arg_default_environment = NULL;
106 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
107 static uint64_t arg_capability_bounding_set_drop = 0;
108 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
109
110 static FILE* serialization = NULL;
111
112 static void nop_handler(int sig) {
113 }
114
115 _noreturn_ static void crash(int sig) {
116
117         if (getpid() != 1)
118                 /* Pass this on immediately, if this is not PID 1 */
119                 raise(sig);
120         else if (!arg_dump_core)
121                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
122         else {
123                 struct sigaction sa = {
124                         .sa_handler = nop_handler,
125                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
126                 };
127                 pid_t pid;
128
129                 /* We want to wait for the core process, hence let's enable SIGCHLD */
130                 sigaction(SIGCHLD, &sa, NULL);
131
132                 pid = fork();
133                 if (pid < 0)
134                         log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno));
135
136                 else if (pid == 0) {
137                         struct rlimit rl = {};
138
139                         /* Enable default signal handler for core dump */
140                         zero(sa);
141                         sa.sa_handler = SIG_DFL;
142                         sigaction(sig, &sa, NULL);
143
144                         /* Don't limit the core dump size */
145                         rl.rlim_cur = RLIM_INFINITY;
146                         rl.rlim_max = RLIM_INFINITY;
147                         setrlimit(RLIMIT_CORE, &rl);
148
149                         /* Just to be sure... */
150                         chdir("/");
151
152                         /* Raise the signal again */
153                         raise(sig);
154
155                         assert_not_reached("We shouldn't be here...");
156                         _exit(1);
157
158                 } else {
159                         siginfo_t status;
160                         int r;
161
162                         /* Order things nicely. */
163                         r = wait_for_terminate(pid, &status);
164                         if (r < 0)
165                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
166                         else if (status.si_code != CLD_DUMPED)
167                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
168                         else
169                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
170                 }
171         }
172
173         if (arg_crash_chvt)
174                 chvt(arg_crash_chvt);
175
176         if (arg_crash_shell) {
177                 struct sigaction sa = {
178                         .sa_handler = SIG_IGN,
179                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
180                 };
181                 pid_t pid;
182
183                 log_info("Executing crash shell in 10s...");
184                 sleep(10);
185
186                 /* Let the kernel reap children for us */
187                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
188
189                 pid = fork();
190                 if (pid < 0)
191                         log_error("Failed to fork off crash shell: %m");
192                 else if (pid == 0) {
193                         make_console_stdio();
194                         execl("/bin/sh", "/bin/sh", NULL);
195
196                         log_error("execl() failed: %m");
197                         _exit(1);
198                 }
199
200                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
201         }
202
203         log_info("Freezing execution.");
204         freeze();
205 }
206
207 static void install_crash_handler(void) {
208         struct sigaction sa = {
209                 .sa_handler = crash,
210                 .sa_flags = SA_NODEFER,
211         };
212
213         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
214 }
215
216 static int console_setup(bool do_reset) {
217         int tty_fd, r;
218
219         /* If we are init, we connect stdin/stdout/stderr to /dev/null
220          * and make sure we don't have a controlling tty. */
221
222         release_terminal();
223
224         if (!do_reset)
225                 return 0;
226
227         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
228         if (tty_fd < 0) {
229                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
230                 return -tty_fd;
231         }
232
233         /* We don't want to force text mode.
234          * plymouth may be showing pictures already from initrd. */
235         r = reset_terminal_fd(tty_fd, false);
236         if (r < 0)
237                 log_error("Failed to reset /dev/console: %s", strerror(-r));
238
239         close_nointr_nofail(tty_fd);
240         return r;
241 }
242
243 static int set_default_unit(const char *u) {
244         char *c;
245
246         assert(u);
247
248         c = strdup(u);
249         if (!c)
250                 return -ENOMEM;
251
252         free(arg_default_unit);
253         arg_default_unit = c;
254
255         return 0;
256 }
257
258 static int parse_proc_cmdline_word(const char *word) {
259
260         static const char * const rlmap[] = {
261                 "emergency", SPECIAL_EMERGENCY_TARGET,
262                 "-b",        SPECIAL_EMERGENCY_TARGET,
263                 "single",    SPECIAL_RESCUE_TARGET,
264                 "-s",        SPECIAL_RESCUE_TARGET,
265                 "s",         SPECIAL_RESCUE_TARGET,
266                 "S",         SPECIAL_RESCUE_TARGET,
267                 "1",         SPECIAL_RESCUE_TARGET,
268                 "2",         SPECIAL_RUNLEVEL2_TARGET,
269                 "3",         SPECIAL_RUNLEVEL3_TARGET,
270                 "4",         SPECIAL_RUNLEVEL4_TARGET,
271                 "5",         SPECIAL_RUNLEVEL5_TARGET,
272         };
273
274         assert(word);
275
276         if (startswith(word, "systemd.unit=")) {
277
278                 if (!in_initrd())
279                         return set_default_unit(word + 13);
280
281         } else if (startswith(word, "rd.systemd.unit=")) {
282
283                 if (in_initrd())
284                         return set_default_unit(word + 16);
285
286         } else if (startswith(word, "systemd.log_target=")) {
287
288                 if (log_set_target_from_string(word + 19) < 0)
289                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
290
291         } else if (startswith(word, "systemd.log_level=")) {
292
293                 if (log_set_max_level_from_string(word + 18) < 0)
294                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
295
296         } else if (startswith(word, "systemd.log_color=")) {
297
298                 if (log_show_color_from_string(word + 18) < 0)
299                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
300
301         } else if (startswith(word, "systemd.log_location=")) {
302
303                 if (log_show_location_from_string(word + 21) < 0)
304                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
305
306         } else if (startswith(word, "systemd.dump_core=")) {
307                 int r;
308
309                 if ((r = parse_boolean(word + 18)) < 0)
310                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
311                 else
312                         arg_dump_core = r;
313
314         } else if (startswith(word, "systemd.crash_shell=")) {
315                 int r;
316
317                 if ((r = parse_boolean(word + 20)) < 0)
318                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
319                 else
320                         arg_crash_shell = r;
321
322         } else if (startswith(word, "systemd.confirm_spawn=")) {
323                 int r;
324
325                 if ((r = parse_boolean(word + 22)) < 0)
326                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
327                 else
328                         arg_confirm_spawn = r;
329
330         } else if (startswith(word, "systemd.crash_chvt=")) {
331                 int k;
332
333                 if (safe_atoi(word + 19, &k) < 0)
334                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
335                 else
336                         arg_crash_chvt = k;
337
338         } else if (startswith(word, "systemd.show_status=")) {
339                 int r;
340
341                 if ((r = parse_boolean(word + 20)) < 0)
342                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
343                 else
344                         arg_show_status = r;
345         } else if (startswith(word, "systemd.default_standard_output=")) {
346                 int r;
347
348                 if ((r = exec_output_from_string(word + 32)) < 0)
349                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
350                 else
351                         arg_default_std_output = r;
352         } else if (startswith(word, "systemd.default_standard_error=")) {
353                 int r;
354
355                 if ((r = exec_output_from_string(word + 31)) < 0)
356                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
357                 else
358                         arg_default_std_error = r;
359         } else if (startswith(word, "systemd.setenv=")) {
360                 _cleanup_free_ char *cenv = NULL;
361
362                 cenv = strdup(word + 15);
363                 if (!cenv)
364                         return -ENOMEM;
365
366                 if (env_assignment_is_valid(cenv)) {
367                         char **env;
368
369                         env = strv_env_set(arg_default_environment, cenv);
370                         if (env)
371                                 arg_default_environment = env;
372                         else
373                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
374                 } else
375                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
376
377         } else if (startswith(word, "systemd.") ||
378                    (in_initrd() && startswith(word, "rd.systemd."))) {
379
380                 const char *c;
381
382                 /* Ignore systemd.journald.xyz and friends */
383                 c = word;
384                 if (startswith(c, "rd."))
385                         c += 3;
386                 if (startswith(c, "systemd."))
387                         c += 8;
388                 if (c[strcspn(c, ".=")] != '.')  {
389
390                         log_warning("Unknown kernel switch %s. Ignoring.", word);
391
392                         log_info("Supported kernel switches:\n"
393                                  "systemd.unit=UNIT                        Default unit to start\n"
394                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
395                                  "systemd.dump_core=0|1                    Dump core on crash\n"
396                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
397                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
398                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
399                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
400                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
401                                  "                                         Log target\n"
402                                  "systemd.log_level=LEVEL                  Log level\n"
403                                  "systemd.log_color=0|1                    Highlight important log messages\n"
404                                  "systemd.log_location=0|1                 Include code location in log messages\n"
405                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
406                                  "                                         Set default log output for services\n"
407                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
408                                  "                                         Set default log error output for services\n"
409                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
410                 }
411
412         } else if (streq(word, "quiet"))
413                 arg_show_status = false;
414         else if (streq(word, "debug")) {
415                 /* Log to kmsg, the journal socket will fill up before the
416                  * journal is started and tools running during that time
417                  * will block with every log message for for 60 seconds,
418                  * before they give up. */
419                 log_set_max_level(LOG_DEBUG);
420                 log_set_target(LOG_TARGET_KMSG);
421         } else if (!in_initrd()) {
422                 unsigned i;
423
424                 /* SysV compatibility */
425                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
426                         if (streq(word, rlmap[i]))
427                                 return set_default_unit(rlmap[i+1]);
428         }
429
430         return 0;
431 }
432
433 #define DEFINE_SETTER(name, func, descr)                              \
434         static int name(const char *unit,                             \
435                         const char *filename,                         \
436                         unsigned line,                                \
437                         const char *section,                          \
438                         const char *lvalue,                           \
439                         int ltype,                                    \
440                         const char *rvalue,                           \
441                         void *data,                                   \
442                         void *userdata) {                             \
443                                                                       \
444                 int r;                                                \
445                                                                       \
446                 assert(filename);                                     \
447                 assert(lvalue);                                       \
448                 assert(rvalue);                                       \
449                                                                       \
450                 r = func(rvalue);                                     \
451                 if (r < 0)                                            \
452                         log_syntax(unit, LOG_ERR, filename, line, -r, \
453                                    "Invalid " descr "'%s': %s",       \
454                                    rvalue, strerror(-r));             \
455                                                                       \
456                 return 0;                                             \
457         }
458
459 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
460 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
461 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
462 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
463
464
465 static int config_parse_cpu_affinity2(const char *unit,
466                                       const char *filename,
467                                       unsigned line,
468                                       const char *section,
469                                       const char *lvalue,
470                                       int ltype,
471                                       const char *rvalue,
472                                       void *data,
473                                       void *userdata) {
474
475         char *w;
476         size_t l;
477         char *state;
478         cpu_set_t *c = NULL;
479         unsigned ncpus = 0;
480
481         assert(filename);
482         assert(lvalue);
483         assert(rvalue);
484
485         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
486                 char *t;
487                 int r;
488                 unsigned cpu;
489
490                 if (!(t = strndup(w, l)))
491                         return log_oom();
492
493                 r = safe_atou(t, &cpu);
494                 free(t);
495
496                 if (!c)
497                         if (!(c = cpu_set_malloc(&ncpus)))
498                                 return log_oom();
499
500                 if (r < 0 || cpu >= ncpus) {
501                         log_syntax(unit, LOG_ERR, filename, line, -r,
502                                    "Failed to parse CPU affinity '%s'", rvalue);
503                         CPU_FREE(c);
504                         return -EBADMSG;
505                 }
506
507                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
508         }
509
510         if (c) {
511                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
512                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
513
514                 CPU_FREE(c);
515         }
516
517         return 0;
518 }
519
520 static void strv_free_free(char ***l) {
521         char ***i;
522
523         if (!l)
524                 return;
525
526         for (i = l; *i; i++)
527                 strv_free(*i);
528
529         free(l);
530 }
531
532 static void free_join_controllers(void) {
533         strv_free_free(arg_join_controllers);
534         arg_join_controllers = NULL;
535 }
536
537 static int config_parse_join_controllers(const char *unit,
538                                          const char *filename,
539                                          unsigned line,
540                                          const char *section,
541                                          const char *lvalue,
542                                          int ltype,
543                                          const char *rvalue,
544                                          void *data,
545                                          void *userdata) {
546
547         unsigned n = 0;
548         char *state, *w;
549         size_t length;
550
551         assert(filename);
552         assert(lvalue);
553         assert(rvalue);
554
555         free_join_controllers();
556
557         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
558                 char *s, **l;
559
560                 s = strndup(w, length);
561                 if (!s)
562                         return log_oom();
563
564                 l = strv_split(s, ",");
565                 free(s);
566
567                 strv_uniq(l);
568
569                 if (strv_length(l) <= 1) {
570                         strv_free(l);
571                         continue;
572                 }
573
574                 if (!arg_join_controllers) {
575                         arg_join_controllers = new(char**, 2);
576                         if (!arg_join_controllers) {
577                                 strv_free(l);
578                                 return log_oom();
579                         }
580
581                         arg_join_controllers[0] = l;
582                         arg_join_controllers[1] = NULL;
583
584                         n = 1;
585                 } else {
586                         char ***a;
587                         char ***t;
588
589                         t = new0(char**, n+2);
590                         if (!t) {
591                                 strv_free(l);
592                                 return log_oom();
593                         }
594
595                         n = 0;
596
597                         for (a = arg_join_controllers; *a; a++) {
598
599                                 if (strv_overlap(*a, l)) {
600                                         char **c;
601
602                                         c = strv_merge(*a, l);
603                                         if (!c) {
604                                                 strv_free(l);
605                                                 strv_free_free(t);
606                                                 return log_oom();
607                                         }
608
609                                         strv_free(l);
610                                         l = c;
611                                 } else {
612                                         char **c;
613
614                                         c = strv_copy(*a);
615                                         if (!c) {
616                                                 strv_free(l);
617                                                 strv_free_free(t);
618                                                 return log_oom();
619                                         }
620
621                                         t[n++] = c;
622                                 }
623                         }
624
625                         t[n++] = strv_uniq(l);
626
627                         strv_free_free(arg_join_controllers);
628                         arg_join_controllers = t;
629                 }
630         }
631
632         return 0;
633 }
634
635 static int parse_config_file(void) {
636
637         const ConfigTableItem items[] = {
638                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
639                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
640                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
641                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
642                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
643                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
644                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
645                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
646                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
647                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
648                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
649                 { "Manager", "DefaultTimeoutStartSec", config_parse_sec,         0, &arg_default_timeout_start_usec },
650                 { "Manager", "DefaultTimeoutStopSec", config_parse_sec,          0, &arg_default_timeout_stop_usec  },
651                 { "Manager", "DefaultRestartSec",     config_parse_sec,          0, &arg_default_restart_usec  },
652                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,      0, &arg_default_start_limit_interval },
653                 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned,    0, &arg_default_start_limit_burst },
654                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
655                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
656                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
657                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
658                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
659                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
660                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
661                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
662                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
663                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
664                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
665                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
666                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
667                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
668                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
669                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
670                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
671                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
672                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
673                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
674                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
675                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
676                 { NULL, NULL, NULL, 0, NULL }
677         };
678
679         _cleanup_fclose_ FILE *f;
680         const char *fn;
681         int r;
682
683         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
684         f = fopen(fn, "re");
685         if (!f) {
686                 if (errno == ENOENT)
687                         return 0;
688
689                 log_warning("Failed to open configuration file '%s': %m", fn);
690                 return 0;
691         }
692
693         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
694         if (r < 0)
695                 log_warning("Failed to parse configuration file: %s", strerror(-r));
696
697         return 0;
698 }
699
700 static int parse_proc_cmdline(void) {
701         _cleanup_free_ char *line = NULL;
702         char *w, *state;
703         size_t l;
704         int r;
705
706         r = proc_cmdline(&line);
707         if (r < 0)
708                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
709         if (r <= 0)
710                 return 0;
711
712         FOREACH_WORD_QUOTED(w, l, line, state) {
713                 _cleanup_free_ char *word;
714
715                 word = strndup(w, l);
716                 if (!word)
717                         return log_oom();
718
719                 r = parse_proc_cmdline_word(word);
720                 if (r < 0) {
721                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
722                         return r;
723                 }
724         }
725
726         return 0;
727 }
728
729 static int parse_argv(int argc, char *argv[]) {
730
731         enum {
732                 ARG_LOG_LEVEL = 0x100,
733                 ARG_LOG_TARGET,
734                 ARG_LOG_COLOR,
735                 ARG_LOG_LOCATION,
736                 ARG_UNIT,
737                 ARG_SYSTEM,
738                 ARG_USER,
739                 ARG_TEST,
740                 ARG_VERSION,
741                 ARG_DUMP_CONFIGURATION_ITEMS,
742                 ARG_DUMP_CORE,
743                 ARG_CRASH_SHELL,
744                 ARG_CONFIRM_SPAWN,
745                 ARG_SHOW_STATUS,
746                 ARG_DESERIALIZE,
747                 ARG_SWITCHED_ROOT,
748                 ARG_DEFAULT_STD_OUTPUT,
749                 ARG_DEFAULT_STD_ERROR
750         };
751
752         static const struct option options[] = {
753                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
754                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
755                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
756                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
757                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
758                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
759                 { "user",                     no_argument,       NULL, ARG_USER                     },
760                 { "test",                     no_argument,       NULL, ARG_TEST                     },
761                 { "help",                     no_argument,       NULL, 'h'                          },
762                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
763                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
764                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
765                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
766                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
767                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
768                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
769                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
770                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
771                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
772                 { NULL,                       0,                 NULL, 0                            }
773         };
774
775         int c, r;
776
777         assert(argc >= 1);
778         assert(argv);
779
780         if (getpid() == 1)
781                 opterr = 0;
782
783         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
784
785                 switch (c) {
786
787                 case ARG_LOG_LEVEL:
788                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
789                                 log_error("Failed to parse log level %s.", optarg);
790                                 return r;
791                         }
792
793                         break;
794
795                 case ARG_LOG_TARGET:
796
797                         if ((r = log_set_target_from_string(optarg)) < 0) {
798                                 log_error("Failed to parse log target %s.", optarg);
799                                 return r;
800                         }
801
802                         break;
803
804                 case ARG_LOG_COLOR:
805
806                         if (optarg) {
807                                 if ((r = log_show_color_from_string(optarg)) < 0) {
808                                         log_error("Failed to parse log color setting %s.", optarg);
809                                         return r;
810                                 }
811                         } else
812                                 log_show_color(true);
813
814                         break;
815
816                 case ARG_LOG_LOCATION:
817
818                         if (optarg) {
819                                 if ((r = log_show_location_from_string(optarg)) < 0) {
820                                         log_error("Failed to parse log location setting %s.", optarg);
821                                         return r;
822                                 }
823                         } else
824                                 log_show_location(true);
825
826                         break;
827
828                 case ARG_DEFAULT_STD_OUTPUT:
829
830                         if ((r = exec_output_from_string(optarg)) < 0) {
831                                 log_error("Failed to parse default standard output setting %s.", optarg);
832                                 return r;
833                         } else
834                                 arg_default_std_output = r;
835                         break;
836
837                 case ARG_DEFAULT_STD_ERROR:
838
839                         if ((r = exec_output_from_string(optarg)) < 0) {
840                                 log_error("Failed to parse default standard error output setting %s.", optarg);
841                                 return r;
842                         } else
843                                 arg_default_std_error = r;
844                         break;
845
846                 case ARG_UNIT:
847
848                         if ((r = set_default_unit(optarg)) < 0) {
849                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
850                                 return r;
851                         }
852
853                         break;
854
855                 case ARG_SYSTEM:
856                         arg_running_as = SYSTEMD_SYSTEM;
857                         break;
858
859                 case ARG_USER:
860                         arg_running_as = SYSTEMD_USER;
861                         break;
862
863                 case ARG_TEST:
864                         arg_action = ACTION_TEST;
865                         break;
866
867                 case ARG_VERSION:
868                         arg_action = ACTION_VERSION;
869                         break;
870
871                 case ARG_DUMP_CONFIGURATION_ITEMS:
872                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
873                         break;
874
875                 case ARG_DUMP_CORE:
876                         r = optarg ? parse_boolean(optarg) : 1;
877                         if (r < 0) {
878                                 log_error("Failed to parse dump core boolean %s.", optarg);
879                                 return r;
880                         }
881                         arg_dump_core = r;
882                         break;
883
884                 case ARG_CRASH_SHELL:
885                         r = optarg ? parse_boolean(optarg) : 1;
886                         if (r < 0) {
887                                 log_error("Failed to parse crash shell boolean %s.", optarg);
888                                 return r;
889                         }
890                         arg_crash_shell = r;
891                         break;
892
893                 case ARG_CONFIRM_SPAWN:
894                         r = optarg ? parse_boolean(optarg) : 1;
895                         if (r < 0) {
896                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
897                                 return r;
898                         }
899                         arg_confirm_spawn = r;
900                         break;
901
902                 case ARG_SHOW_STATUS:
903                         r = optarg ? parse_boolean(optarg) : 1;
904                         if (r < 0) {
905                                 log_error("Failed to parse show status boolean %s.", optarg);
906                                 return r;
907                         }
908                         arg_show_status = r;
909                         break;
910
911                 case ARG_DESERIALIZE: {
912                         int fd;
913                         FILE *f;
914
915                         r = safe_atoi(optarg, &fd);
916                         if (r < 0 || fd < 0) {
917                                 log_error("Failed to parse deserialize option %s.", optarg);
918                                 return r < 0 ? r : -EINVAL;
919                         }
920
921                         fd_cloexec(fd, true);
922
923                         f = fdopen(fd, "r");
924                         if (!f) {
925                                 log_error("Failed to open serialization fd: %m");
926                                 return -errno;
927                         }
928
929                         if (serialization)
930                                 fclose(serialization);
931
932                         serialization = f;
933
934                         break;
935                 }
936
937                 case ARG_SWITCHED_ROOT:
938                         arg_switched_root = true;
939                         break;
940
941                 case 'h':
942                         arg_action = ACTION_HELP;
943                         break;
944
945                 case 'D':
946                         log_set_max_level(LOG_DEBUG);
947                         break;
948
949                 case 'b':
950                 case 's':
951                 case 'z':
952                         /* Just to eat away the sysvinit kernel
953                          * cmdline args without getopt() error
954                          * messages that we'll parse in
955                          * parse_proc_cmdline_word() or ignore. */
956
957                 case '?':
958                 default:
959                         if (getpid() != 1) {
960                                 log_error("Unknown option code %c", c);
961                                 return -EINVAL;
962                         }
963
964                         break;
965                 }
966
967         if (optind < argc && getpid() != 1) {
968                 /* Hmm, when we aren't run as init system
969                  * let's complain about excess arguments */
970
971                 log_error("Excess arguments.");
972                 return -EINVAL;
973         }
974
975         if (detect_container(NULL) > 0) {
976                 char **a;
977
978                 /* All /proc/cmdline arguments the kernel didn't
979                  * understand it passed to us. We're not really
980                  * interested in that usually since /proc/cmdline is
981                  * more interesting and complete. With one exception:
982                  * if we are run in a container /proc/cmdline is not
983                  * relevant for the container, hence we rely on argv[]
984                  * instead. */
985
986                 for (a = argv; a < argv + argc; a++) {
987                         r = parse_proc_cmdline_word(*a);
988                         if (r < 0) {
989                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
990                                 return r;
991                         }
992                 }
993         }
994
995         return 0;
996 }
997
998 static int help(void) {
999
1000         printf("%s [OPTIONS...]\n\n"
1001                "Starts up and maintains the system or user services.\n\n"
1002                "  -h --help                      Show this help\n"
1003                "     --test                      Determine startup sequence, dump it and exit\n"
1004                "     --dump-configuration-items  Dump understood unit configuration items\n"
1005                "     --unit=UNIT                 Set default unit\n"
1006                "     --system                    Run a system instance, even if PID != 1\n"
1007                "     --user                      Run a user instance\n"
1008                "     --dump-core[=0|1]           Dump core on crash\n"
1009                "     --crash-shell[=0|1]         Run shell on crash\n"
1010                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1011                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1012                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1013                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1014                "     --log-color[=0|1]           Highlight important log messages\n"
1015                "     --log-location[=0|1]        Include code location in log messages\n"
1016                "     --default-standard-output=  Set default standard output for services\n"
1017                "     --default-standard-error=   Set default standard error output for services\n",
1018                program_invocation_short_name);
1019
1020         return 0;
1021 }
1022
1023 static int version(void) {
1024         puts(PACKAGE_STRING);
1025         puts(SYSTEMD_FEATURES);
1026
1027         return 0;
1028 }
1029
1030 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1031         FILE *f = NULL;
1032         FDSet *fds = NULL;
1033         int r;
1034
1035         assert(m);
1036         assert(_f);
1037         assert(_fds);
1038
1039         r = manager_open_serialization(m, &f);
1040         if (r < 0) {
1041                 log_error("Failed to create serialization file: %s", strerror(-r));
1042                 goto fail;
1043         }
1044
1045         /* Make sure nothing is really destructed when we shut down */
1046         m->n_reloading ++;
1047         bus_manager_send_reloading(m, true);
1048
1049         fds = fdset_new();
1050         if (!fds) {
1051                 r = -ENOMEM;
1052                 log_error("Failed to allocate fd set: %s", strerror(-r));
1053                 goto fail;
1054         }
1055
1056         r = manager_serialize(m, f, fds, switching_root);
1057         if (r < 0) {
1058                 log_error("Failed to serialize state: %s", strerror(-r));
1059                 goto fail;
1060         }
1061
1062         if (fseeko(f, 0, SEEK_SET) < 0) {
1063                 log_error("Failed to rewind serialization fd: %m");
1064                 goto fail;
1065         }
1066
1067         r = fd_cloexec(fileno(f), false);
1068         if (r < 0) {
1069                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1070                 goto fail;
1071         }
1072
1073         r = fdset_cloexec(fds, false);
1074         if (r < 0) {
1075                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1076                 goto fail;
1077         }
1078
1079         *_f = f;
1080         *_fds = fds;
1081
1082         return 0;
1083
1084 fail:
1085         fdset_free(fds);
1086
1087         if (f)
1088                 fclose(f);
1089
1090         return r;
1091 }
1092
1093 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1094         struct rlimit nl;
1095         int r;
1096
1097         assert(saved_rlimit);
1098
1099         /* Save the original RLIMIT_NOFILE so that we can reset it
1100          * later when transitioning from the initrd to the main
1101          * systemd or suchlike. */
1102         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1103                 log_error("Reading RLIMIT_NOFILE failed: %m");
1104                 return -errno;
1105         }
1106
1107         /* Make sure forked processes get the default kernel setting */
1108         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1109                 struct rlimit *rl;
1110
1111                 rl = newdup(struct rlimit, saved_rlimit, 1);
1112                 if (!rl)
1113                         return log_oom();
1114
1115                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1116         }
1117
1118         /* Bump up the resource limit for ourselves substantially */
1119         nl.rlim_cur = nl.rlim_max = 64*1024;
1120         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1121         if (r < 0) {
1122                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1123                 return r;
1124         }
1125
1126         return 0;
1127 }
1128
1129 static void test_mtab(void) {
1130         char *p;
1131
1132         /* Check that /etc/mtab is a symlink */
1133
1134         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1135                 bool b;
1136
1137                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1138                 free(p);
1139
1140                 if (b)
1141                         return;
1142         }
1143
1144         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1145                     "This is not supported anymore. "
1146                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1147 }
1148
1149 static void test_usr(void) {
1150
1151         /* Check that /usr is not a separate fs */
1152
1153         if (dir_is_empty("/usr") <= 0)
1154                 return;
1155
1156         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1157                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1158                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1159 }
1160
1161 static void test_cgroups(void) {
1162
1163         if (access("/proc/cgroups", F_OK) >= 0)
1164                 return;
1165
1166         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1167                     "Systems without control groups are not supported. "
1168                     "We will now sleep for 10s, and then continue boot-up. "
1169                     "Expect breakage and please do not file bugs. "
1170                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1171                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1172
1173         sleep(10);
1174 }
1175
1176 static int initialize_join_controllers(void) {
1177         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1178          * + "net_prio". We'd like to add "cpuset" to the mix, but
1179          * "cpuset" does't really work for groups with no initialized
1180          * attributes. */
1181
1182         arg_join_controllers = new(char**, 3);
1183         if (!arg_join_controllers)
1184                 return -ENOMEM;
1185
1186         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1187         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1188         arg_join_controllers[2] = NULL;
1189
1190         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1191                 free_join_controllers();
1192                 return -ENOMEM;
1193         }
1194
1195         return 0;
1196 }
1197
1198 int main(int argc, char *argv[]) {
1199         Manager *m = NULL;
1200         int r, retval = EXIT_FAILURE;
1201         usec_t before_startup, after_startup;
1202         char timespan[FORMAT_TIMESPAN_MAX];
1203         FDSet *fds = NULL;
1204         bool reexecute = false;
1205         const char *shutdown_verb = NULL;
1206         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1207         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1208         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1209         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1210         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1211         static char systemd[] = "systemd";
1212         bool skip_setup = false;
1213         int j;
1214         bool loaded_policy = false;
1215         bool arm_reboot_watchdog = false;
1216         bool queue_default_job = false;
1217         char *switch_root_dir = NULL, *switch_root_init = NULL;
1218         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1219
1220 #ifdef HAVE_SYSV_COMPAT
1221         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1222                 /* This is compatibility support for SysV, where
1223                  * calling init as a user is identical to telinit. */
1224
1225                 errno = -ENOENT;
1226                 execv(SYSTEMCTL_BINARY_PATH, argv);
1227                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1228                 return 1;
1229         }
1230 #endif
1231
1232         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1233         dual_timestamp_get(&userspace_timestamp);
1234
1235         /* Determine if this is a reexecution or normal bootup. We do
1236          * the full command line parsing much later, so let's just
1237          * have a quick peek here. */
1238         if (strv_find(argv+1, "--deserialize"))
1239                 skip_setup = true;
1240
1241         /* If we have switched root, do all the special setup
1242          * things */
1243         if (strv_find(argv+1, "--switched-root"))
1244                 skip_setup = false;
1245
1246         /* If we get started via the /sbin/init symlink then we are
1247            called 'init'. After a subsequent reexecution we are then
1248            called 'systemd'. That is confusing, hence let's call us
1249            systemd right-away. */
1250         program_invocation_short_name = systemd;
1251         prctl(PR_SET_NAME, systemd);
1252
1253         saved_argv = argv;
1254         saved_argc = argc;
1255
1256         log_show_color(isatty(STDERR_FILENO) > 0);
1257
1258         /* Disable the umask logic */
1259         if (getpid() == 1)
1260                 umask(0);
1261
1262         if (getpid() == 1 && detect_container(NULL) <= 0) {
1263
1264                 /* Running outside of a container as PID 1 */
1265                 arg_running_as = SYSTEMD_SYSTEM;
1266                 make_null_stdio();
1267                 log_set_target(LOG_TARGET_KMSG);
1268                 log_open();
1269
1270                 if (in_initrd())
1271                         initrd_timestamp = userspace_timestamp;
1272
1273                 if (!skip_setup) {
1274                         mount_setup_early();
1275                         dual_timestamp_get(&security_start_timestamp);
1276                         if (selinux_setup(&loaded_policy) < 0)
1277                                 goto finish;
1278                         if (ima_setup() < 0)
1279                                 goto finish;
1280                         if (smack_setup() < 0)
1281                                 goto finish;
1282                         dual_timestamp_get(&security_finish_timestamp);
1283                 }
1284
1285                 if (label_init(NULL) < 0)
1286                         goto finish;
1287
1288                 if (!skip_setup) {
1289                         if (hwclock_is_localtime() > 0) {
1290                                 int min;
1291
1292                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1293                                 r = hwclock_set_timezone(&min);
1294                                 if (r < 0)
1295                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1296                                 else
1297                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1298                         } else if (!in_initrd()) {
1299                                 /*
1300                                  * Do dummy first-time call to seal the kernel's time warp magic
1301                                  *
1302                                  * Do not call this this from inside the initrd. The initrd might not
1303                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1304                                  * that way. In such case, we need to delay the time-warp or the sealing
1305                                  * until we reach the real system.
1306                                  */
1307                                 hwclock_reset_timezone();
1308
1309                                 /* Tell the kernel our timezone */
1310                                 r = hwclock_set_timezone(NULL);
1311                                 if (r < 0)
1312                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1313                         }
1314                 }
1315
1316                 /* Set the default for later on, but don't actually
1317                  * open the logs like this for now. Note that if we
1318                  * are transitioning from the initrd there might still
1319                  * be journal fd open, and we shouldn't attempt
1320                  * opening that before we parsed /proc/cmdline which
1321                  * might redirect output elsewhere. */
1322                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1323
1324         } else if (getpid() == 1) {
1325                 /* Running inside a container, as PID 1 */
1326                 arg_running_as = SYSTEMD_SYSTEM;
1327                 log_set_target(LOG_TARGET_CONSOLE);
1328                 log_open();
1329
1330                 /* For the later on, see above... */
1331                 log_set_target(LOG_TARGET_JOURNAL);
1332
1333                 /* clear the kernel timestamp,
1334                  * because we are in a container */
1335                 kernel_timestamp.monotonic = 0ULL;
1336                 kernel_timestamp.realtime = 0ULL;
1337
1338         } else {
1339                 /* Running as user instance */
1340                 arg_running_as = SYSTEMD_USER;
1341                 log_set_target(LOG_TARGET_AUTO);
1342                 log_open();
1343
1344                 /* clear the kernel timestamp,
1345                  * because we are not PID 1 */
1346                 kernel_timestamp.monotonic = 0ULL;
1347                 kernel_timestamp.realtime = 0ULL;
1348         }
1349
1350         /* Initialize default unit */
1351         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1352         if (r < 0) {
1353                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1354                 goto finish;
1355         }
1356
1357         r = initialize_join_controllers();
1358         if (r < 0)
1359                 goto finish;
1360
1361         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1362          * /proc/$PID/fd is available. */
1363         if (getpid() == 1) {
1364                 r = mount_setup(loaded_policy);
1365                 if (r < 0)
1366                         goto finish;
1367         }
1368
1369         /* Reset all signal handlers. */
1370         assert_se(reset_all_signal_handlers() == 0);
1371
1372         ignore_signals(SIGNALS_IGNORE, -1);
1373
1374         if (parse_config_file() < 0)
1375                 goto finish;
1376
1377         if (arg_running_as == SYSTEMD_SYSTEM)
1378                 if (parse_proc_cmdline() < 0)
1379                         goto finish;
1380
1381         log_parse_environment();
1382
1383         if (parse_argv(argc, argv) < 0)
1384                 goto finish;
1385
1386         if (arg_action == ACTION_TEST &&
1387             geteuid() == 0) {
1388                 log_error("Don't run test mode as root.");
1389                 goto finish;
1390         }
1391
1392         if (arg_running_as == SYSTEMD_USER &&
1393             arg_action == ACTION_RUN &&
1394             sd_booted() <= 0) {
1395                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1396                 goto finish;
1397         }
1398
1399         if (arg_running_as == SYSTEMD_SYSTEM &&
1400             arg_action == ACTION_RUN &&
1401             running_in_chroot() > 0) {
1402                 log_error("Cannot be run in a chroot() environment.");
1403                 goto finish;
1404         }
1405
1406         if (arg_action == ACTION_HELP) {
1407                 retval = help();
1408                 goto finish;
1409         } else if (arg_action == ACTION_VERSION) {
1410                 retval = version();
1411                 goto finish;
1412         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1413                 unit_dump_config_items(stdout);
1414                 retval = EXIT_SUCCESS;
1415                 goto finish;
1416         } else if (arg_action == ACTION_DONE) {
1417                 retval = EXIT_SUCCESS;
1418                 goto finish;
1419         }
1420
1421         if (arg_running_as == SYSTEMD_USER &&
1422             !getenv("XDG_RUNTIME_DIR")) {
1423                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1424                 goto finish;
1425         }
1426
1427         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1428
1429         /* Close logging fds, in order not to confuse fdset below */
1430         log_close();
1431
1432         /* Remember open file descriptors for later deserialization */
1433         r = fdset_new_fill(&fds);
1434         if (r < 0) {
1435                 log_error("Failed to allocate fd set: %s", strerror(-r));
1436                 goto finish;
1437         } else
1438                 fdset_cloexec(fds, true);
1439
1440         if (serialization)
1441                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1442
1443         if (arg_running_as == SYSTEMD_SYSTEM)
1444                 /* Become a session leader if we aren't one yet. */
1445                 setsid();
1446
1447         /* Move out of the way, so that we won't block unmounts */
1448         assert_se(chdir("/")  == 0);
1449
1450         /* Reset the console, but only if this is really init and we
1451          * are freshly booted */
1452         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1453                 console_setup(getpid() == 1 && !skip_setup);
1454
1455         /* Open the logging devices, if possible and necessary */
1456         log_open();
1457
1458         /* Make sure we leave a core dump without panicing the
1459          * kernel. */
1460         if (getpid() == 1) {
1461                 install_crash_handler();
1462
1463                 r = mount_cgroup_controllers(arg_join_controllers);
1464                 if (r < 0)
1465                         goto finish;
1466         }
1467
1468         if (arg_running_as == SYSTEMD_SYSTEM) {
1469                 const char *virtualization = NULL;
1470
1471                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1472
1473                 detect_virtualization(&virtualization);
1474                 if (virtualization)
1475                         log_info("Detected virtualization '%s'.", virtualization);
1476
1477                 if (in_initrd())
1478                         log_info("Running in initial RAM disk.");
1479
1480         } else
1481                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1482
1483         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1484                 if (arg_show_status || plymouth_running())
1485                         status_welcome();
1486
1487 #ifdef HAVE_KMOD
1488                 kmod_setup();
1489 #endif
1490                 hostname_setup();
1491                 machine_id_setup();
1492                 loopback_setup();
1493
1494                 test_mtab();
1495                 test_usr();
1496                 test_cgroups();
1497         }
1498
1499         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1500                 watchdog_set_timeout(&arg_runtime_watchdog);
1501
1502         if (arg_timer_slack_nsec != (nsec_t) -1)
1503                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1504                         log_error("Failed to adjust timer slack: %m");
1505
1506         if (arg_capability_bounding_set_drop) {
1507                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1508                 if (r < 0) {
1509                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1510                         goto finish;
1511                 }
1512                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1513                 if (r < 0) {
1514                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1515                         goto finish;
1516                 }
1517         }
1518
1519         if (arg_running_as == SYSTEMD_USER) {
1520                 /* Become reaper of our children */
1521                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1522                         log_warning("Failed to make us a subreaper: %m");
1523                         if (errno == EINVAL)
1524                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1525                 }
1526         }
1527
1528         if (arg_running_as == SYSTEMD_SYSTEM)
1529                 bump_rlimit_nofile(&saved_rlimit_nofile);
1530
1531         r = manager_new(arg_running_as, !!serialization, &m);
1532         if (r < 0) {
1533                 log_error("Failed to allocate manager object: %s", strerror(-r));
1534                 goto finish;
1535         }
1536
1537         m->confirm_spawn = arg_confirm_spawn;
1538         m->default_std_output = arg_default_std_output;
1539         m->default_std_error = arg_default_std_error;
1540         m->default_restart_usec = arg_default_restart_usec;
1541         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1542         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1543         m->default_start_limit_interval = arg_default_start_limit_interval;
1544         m->default_start_limit_burst = arg_default_start_limit_burst;
1545         m->runtime_watchdog = arg_runtime_watchdog;
1546         m->shutdown_watchdog = arg_shutdown_watchdog;
1547         m->userspace_timestamp = userspace_timestamp;
1548         m->kernel_timestamp = kernel_timestamp;
1549         m->initrd_timestamp = initrd_timestamp;
1550         m->security_start_timestamp = security_start_timestamp;
1551         m->security_finish_timestamp = security_finish_timestamp;
1552
1553         manager_set_default_rlimits(m, arg_default_rlimit);
1554
1555         if (arg_default_environment)
1556                 manager_environment_add(m, NULL, arg_default_environment);
1557
1558         manager_set_show_status(m, arg_show_status);
1559
1560         /* Remember whether we should queue the default job */
1561         queue_default_job = !serialization || arg_switched_root;
1562
1563         before_startup = now(CLOCK_MONOTONIC);
1564
1565         r = manager_startup(m, serialization, fds);
1566         if (r < 0)
1567                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1568
1569         /* This will close all file descriptors that were opened, but
1570          * not claimed by any unit. */
1571         fdset_free(fds);
1572         fds = NULL;
1573
1574         if (serialization) {
1575                 fclose(serialization);
1576                 serialization = NULL;
1577         }
1578
1579         if (queue_default_job) {
1580                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1581                 Unit *target = NULL;
1582                 Job *default_unit_job;
1583
1584                 log_debug("Activating default unit: %s", arg_default_unit);
1585
1586                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1587                 if (r < 0)
1588                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1589                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1590                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1591                 else if (target->load_state == UNIT_MASKED)
1592                         log_error("Default target masked.");
1593
1594                 if (!target || target->load_state != UNIT_LOADED) {
1595                         log_info("Trying to load rescue target...");
1596
1597                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1598                         if (r < 0) {
1599                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1600                                 goto finish;
1601                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1602                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1603                                 goto finish;
1604                         } else if (target->load_state == UNIT_MASKED) {
1605                                 log_error("Rescue target masked.");
1606                                 goto finish;
1607                         }
1608                 }
1609
1610                 assert(target->load_state == UNIT_LOADED);
1611
1612                 if (arg_action == ACTION_TEST) {
1613                         printf("-> By units:\n");
1614                         manager_dump_units(m, stdout, "\t");
1615                 }
1616
1617                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1618                 if (r == -EPERM) {
1619                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1620
1621                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1622                         if (r < 0) {
1623                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1624                                 goto finish;
1625                         }
1626                 } else if (r < 0) {
1627                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1628                         goto finish;
1629                 }
1630
1631                 m->default_unit_job_id = default_unit_job->id;
1632
1633                 after_startup = now(CLOCK_MONOTONIC);
1634                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1635                          "Loaded units and determined initial transaction in %s.",
1636                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1637
1638                 if (arg_action == ACTION_TEST) {
1639                         printf("-> By jobs:\n");
1640                         manager_dump_jobs(m, stdout, "\t");
1641                         retval = EXIT_SUCCESS;
1642                         goto finish;
1643                 }
1644         }
1645
1646         for (;;) {
1647                 r = manager_loop(m);
1648                 if (r < 0) {
1649                         log_error("Failed to run mainloop: %s", strerror(-r));
1650                         goto finish;
1651                 }
1652
1653                 switch (m->exit_code) {
1654
1655                 case MANAGER_EXIT:
1656                         retval = EXIT_SUCCESS;
1657                         log_debug("Exit.");
1658                         goto finish;
1659
1660                 case MANAGER_RELOAD:
1661                         log_info("Reloading.");
1662                         r = manager_reload(m);
1663                         if (r < 0)
1664                                 log_error("Failed to reload: %s", strerror(-r));
1665                         break;
1666
1667                 case MANAGER_REEXECUTE:
1668
1669                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1670                                 goto finish;
1671
1672                         reexecute = true;
1673                         log_notice("Reexecuting.");
1674                         goto finish;
1675
1676                 case MANAGER_SWITCH_ROOT:
1677                         /* Steal the switch root parameters */
1678                         switch_root_dir = m->switch_root;
1679                         switch_root_init = m->switch_root_init;
1680                         m->switch_root = m->switch_root_init = NULL;
1681
1682                         if (!switch_root_init)
1683                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1684                                         goto finish;
1685
1686                         reexecute = true;
1687                         log_notice("Switching root.");
1688                         goto finish;
1689
1690                 case MANAGER_REBOOT:
1691                 case MANAGER_POWEROFF:
1692                 case MANAGER_HALT:
1693                 case MANAGER_KEXEC: {
1694                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1695                                 [MANAGER_REBOOT] = "reboot",
1696                                 [MANAGER_POWEROFF] = "poweroff",
1697                                 [MANAGER_HALT] = "halt",
1698                                 [MANAGER_KEXEC] = "kexec"
1699                         };
1700
1701                         assert_se(shutdown_verb = table[m->exit_code]);
1702                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1703
1704                         log_notice("Shutting down.");
1705                         goto finish;
1706                 }
1707
1708                 default:
1709                         assert_not_reached("Unknown exit code.");
1710                 }
1711         }
1712
1713 finish:
1714         if (m)
1715                 manager_free(m);
1716
1717         for (j = 0; j < RLIMIT_NLIMITS; j++)
1718                 free(arg_default_rlimit[j]);
1719
1720         free(arg_default_unit);
1721         free_join_controllers();
1722
1723         label_finish();
1724
1725         if (reexecute) {
1726                 const char **args;
1727                 unsigned i, args_size;
1728
1729                 /* Close and disarm the watchdog, so that the new
1730                  * instance can reinitialize it, but doesn't get
1731                  * rebooted while we do that */
1732                 watchdog_close(true);
1733
1734                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1735                  * that the new systemd can pass the kernel default to
1736                  * its child processes */
1737                 if (saved_rlimit_nofile.rlim_cur > 0)
1738                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1739
1740                 if (switch_root_dir) {
1741                         /* Kill all remaining processes from the
1742                          * initrd, but don't wait for them, so that we
1743                          * can handle the SIGCHLD for them after
1744                          * deserializing. */
1745                         broadcast_signal(SIGTERM, false);
1746
1747                         /* And switch root */
1748                         r = switch_root(switch_root_dir);
1749                         if (r < 0)
1750                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1751                 }
1752
1753                 args_size = MAX(6, argc+1);
1754                 args = newa(const char*, args_size);
1755
1756                 if (!switch_root_init) {
1757                         char sfd[16];
1758
1759                         /* First try to spawn ourselves with the right
1760                          * path, and with full serialization. We do
1761                          * this only if the user didn't specify an
1762                          * explicit init to spawn. */
1763
1764                         assert(serialization);
1765                         assert(fds);
1766
1767                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1768                         char_array_0(sfd);
1769
1770                         i = 0;
1771                         args[i++] = SYSTEMD_BINARY_PATH;
1772                         if (switch_root_dir)
1773                                 args[i++] = "--switched-root";
1774                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1775                         args[i++] = "--deserialize";
1776                         args[i++] = sfd;
1777                         args[i++] = NULL;
1778
1779                         /* do not pass along the environment we inherit from the kernel or initrd */
1780                         if (switch_root_dir)
1781                                 clearenv();
1782
1783                         assert(i <= args_size);
1784                         execv(args[0], (char* const*) args);
1785                 }
1786
1787                 /* Try the fallback, if there is any, without any
1788                  * serialization. We pass the original argv[] and
1789                  * envp[]. (Well, modulo the ordering changes due to
1790                  * getopt() in argv[], and some cleanups in envp[],
1791                  * but let's hope that doesn't matter.) */
1792
1793                 if (serialization) {
1794                         fclose(serialization);
1795                         serialization = NULL;
1796                 }
1797
1798                 if (fds) {
1799                         fdset_free(fds);
1800                         fds = NULL;
1801                 }
1802
1803                 /* Reopen the console */
1804                 make_console_stdio();
1805
1806                 for (j = 1, i = 1; j < argc; j++)
1807                         args[i++] = argv[j];
1808                 args[i++] = NULL;
1809                 assert(i <= args_size);
1810
1811                 if (switch_root_init) {
1812                         args[0] = switch_root_init;
1813                         execv(args[0], (char* const*) args);
1814                         log_warning("Failed to execute configured init, trying fallback: %m");
1815                 }
1816
1817                 args[0] = "/sbin/init";
1818                 execv(args[0], (char* const*) args);
1819
1820                 if (errno == ENOENT) {
1821                         log_warning("No /sbin/init, trying fallback");
1822
1823                         args[0] = "/bin/sh";
1824                         args[1] = NULL;
1825                         execv(args[0], (char* const*) args);
1826                         log_error("Failed to execute /bin/sh, giving up: %m");
1827                 } else
1828                         log_warning("Failed to execute /sbin/init, giving up: %m");
1829         }
1830
1831         if (serialization)
1832                 fclose(serialization);
1833
1834         if (fds)
1835                 fdset_free(fds);
1836
1837 #ifdef HAVE_VALGRIND_VALGRIND_H
1838         /* If we are PID 1 and running under valgrind, then let's exit
1839          * here explicitly. valgrind will only generate nice output on
1840          * exit(), not on exec(), hence let's do the former not the
1841          * latter here. */
1842         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1843                 return 0;
1844 #endif
1845
1846         if (shutdown_verb) {
1847                 const char * command_line[] = {
1848                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1849                         shutdown_verb,
1850                         NULL
1851                 };
1852                 char **env_block;
1853
1854                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1855                         char e[32];
1856
1857                         /* If we reboot let's set the shutdown
1858                          * watchdog and tell the shutdown binary to
1859                          * repeatedly ping it */
1860                         watchdog_set_timeout(&arg_shutdown_watchdog);
1861                         watchdog_close(false);
1862
1863                         /* Tell the binary how often to ping */
1864                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1865                         char_array_0(e);
1866
1867                         env_block = strv_append(environ, e);
1868                 } else {
1869                         env_block = strv_copy(environ);
1870                         watchdog_close(true);
1871                 }
1872
1873                 /* Avoid the creation of new processes forked by the
1874                  * kernel; at this point, we will not listen to the
1875                  * signals anyway */
1876                 if (detect_container(NULL) <= 0)
1877                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1878
1879                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1880                 free(env_block);
1881                 log_error("Failed to execute shutdown binary, freezing: %m");
1882         }
1883
1884         if (getpid() == 1)
1885                 freeze();
1886
1887         return retval;
1888 }