chiark / gitweb /
bus: do kdbus only if this is enabled on the configure switch
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85
86 static char *arg_default_unit = NULL;
87 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
88
89 static bool arg_dump_core = true;
90 static bool arg_crash_shell = false;
91 static int arg_crash_chvt = -1;
92 static bool arg_confirm_spawn = false;
93 static bool arg_show_status = true;
94 static bool arg_switched_root = false;
95 static char ***arg_join_controllers = NULL;
96 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
97 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
98 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
99 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
100 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
101 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
102 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
103 static usec_t arg_runtime_watchdog = 0;
104 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
105 static char **arg_default_environment = NULL;
106 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
107 static uint64_t arg_capability_bounding_set_drop = 0;
108 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
109
110 static FILE* serialization = NULL;
111
112 static void nop_handler(int sig) {
113 }
114
115 _noreturn_ static void crash(int sig) {
116
117         if (getpid() != 1)
118                 /* Pass this on immediately, if this is not PID 1 */
119                 raise(sig);
120         else if (!arg_dump_core)
121                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
122         else {
123                 struct sigaction sa = {
124                         .sa_handler = nop_handler,
125                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
126                 };
127                 pid_t pid;
128
129                 /* We want to wait for the core process, hence let's enable SIGCHLD */
130                 sigaction(SIGCHLD, &sa, NULL);
131
132                 pid = fork();
133                 if (pid < 0)
134                         log_error("Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
135
136                 else if (pid == 0) {
137                         struct rlimit rl = {};
138
139                         /* Enable default signal handler for core dump */
140                         zero(sa);
141                         sa.sa_handler = SIG_DFL;
142                         sigaction(sig, &sa, NULL);
143
144                         /* Don't limit the core dump size */
145                         rl.rlim_cur = RLIM_INFINITY;
146                         rl.rlim_max = RLIM_INFINITY;
147                         setrlimit(RLIMIT_CORE, &rl);
148
149                         /* Just to be sure... */
150                         chdir("/");
151
152                         /* Raise the signal again */
153                         raise(sig);
154
155                         assert_not_reached("We shouldn't be here...");
156                         _exit(1);
157
158                 } else {
159                         siginfo_t status;
160                         int r;
161
162                         /* Order things nicely. */
163                         r = wait_for_terminate(pid, &status);
164                         if (r < 0)
165                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
166                         else if (status.si_code != CLD_DUMPED)
167                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
168                         else
169                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
170                 }
171         }
172
173         if (arg_crash_chvt)
174                 chvt(arg_crash_chvt);
175
176         if (arg_crash_shell) {
177                 struct sigaction sa = {
178                         .sa_handler = SIG_IGN,
179                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
180                 };
181                 pid_t pid;
182
183                 log_info("Executing crash shell in 10s...");
184                 sleep(10);
185
186                 /* Let the kernel reap children for us */
187                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
188
189                 pid = fork();
190                 if (pid < 0)
191                         log_error("Failed to fork off crash shell: %m");
192                 else if (pid == 0) {
193                         make_console_stdio();
194                         execl("/bin/sh", "/bin/sh", NULL);
195
196                         log_error("execl() failed: %m");
197                         _exit(1);
198                 }
199
200                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
201         }
202
203         log_info("Freezing execution.");
204         freeze();
205 }
206
207 static void install_crash_handler(void) {
208         struct sigaction sa = {
209                 .sa_handler = crash,
210                 .sa_flags = SA_NODEFER,
211         };
212
213         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
214 }
215
216 static int console_setup(bool do_reset) {
217         int tty_fd, r;
218
219         /* If we are init, we connect stdin/stdout/stderr to /dev/null
220          * and make sure we don't have a controlling tty. */
221
222         release_terminal();
223
224         if (!do_reset)
225                 return 0;
226
227         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
228         if (tty_fd < 0) {
229                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
230                 return -tty_fd;
231         }
232
233         /* We don't want to force text mode.
234          * plymouth may be showing pictures already from initrd. */
235         r = reset_terminal_fd(tty_fd, false);
236         if (r < 0)
237                 log_error("Failed to reset /dev/console: %s", strerror(-r));
238
239         close_nointr_nofail(tty_fd);
240         return r;
241 }
242
243 static int set_default_unit(const char *u) {
244         char *c;
245
246         assert(u);
247
248         c = strdup(u);
249         if (!c)
250                 return -ENOMEM;
251
252         free(arg_default_unit);
253         arg_default_unit = c;
254
255         return 0;
256 }
257
258 static int parse_proc_cmdline_word(const char *word) {
259
260         static const char * const rlmap[] = {
261                 "emergency", SPECIAL_EMERGENCY_TARGET,
262                 "-b",        SPECIAL_EMERGENCY_TARGET,
263                 "single",    SPECIAL_RESCUE_TARGET,
264                 "-s",        SPECIAL_RESCUE_TARGET,
265                 "s",         SPECIAL_RESCUE_TARGET,
266                 "S",         SPECIAL_RESCUE_TARGET,
267                 "1",         SPECIAL_RESCUE_TARGET,
268                 "2",         SPECIAL_RUNLEVEL2_TARGET,
269                 "3",         SPECIAL_RUNLEVEL3_TARGET,
270                 "4",         SPECIAL_RUNLEVEL4_TARGET,
271                 "5",         SPECIAL_RUNLEVEL5_TARGET,
272         };
273
274         assert(word);
275
276         if (startswith(word, "systemd.unit=")) {
277
278                 if (!in_initrd())
279                         return set_default_unit(word + 13);
280
281         } else if (startswith(word, "rd.systemd.unit=")) {
282
283                 if (in_initrd())
284                         return set_default_unit(word + 16);
285
286         } else if (startswith(word, "systemd.log_target=")) {
287
288                 if (log_set_target_from_string(word + 19) < 0)
289                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
290
291         } else if (startswith(word, "systemd.log_level=")) {
292
293                 if (log_set_max_level_from_string(word + 18) < 0)
294                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
295
296         } else if (startswith(word, "systemd.log_color=")) {
297
298                 if (log_show_color_from_string(word + 18) < 0)
299                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
300
301         } else if (startswith(word, "systemd.log_location=")) {
302
303                 if (log_show_location_from_string(word + 21) < 0)
304                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
305
306         } else if (startswith(word, "systemd.dump_core=")) {
307                 int r;
308
309                 if ((r = parse_boolean(word + 18)) < 0)
310                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
311                 else
312                         arg_dump_core = r;
313
314         } else if (startswith(word, "systemd.crash_shell=")) {
315                 int r;
316
317                 if ((r = parse_boolean(word + 20)) < 0)
318                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
319                 else
320                         arg_crash_shell = r;
321
322         } else if (startswith(word, "systemd.confirm_spawn=")) {
323                 int r;
324
325                 if ((r = parse_boolean(word + 22)) < 0)
326                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
327                 else
328                         arg_confirm_spawn = r;
329
330         } else if (startswith(word, "systemd.crash_chvt=")) {
331                 int k;
332
333                 if (safe_atoi(word + 19, &k) < 0)
334                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
335                 else
336                         arg_crash_chvt = k;
337
338         } else if (startswith(word, "systemd.show_status=")) {
339                 int r;
340
341                 if ((r = parse_boolean(word + 20)) < 0)
342                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
343                 else
344                         arg_show_status = r;
345         } else if (startswith(word, "systemd.default_standard_output=")) {
346                 int r;
347
348                 if ((r = exec_output_from_string(word + 32)) < 0)
349                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
350                 else
351                         arg_default_std_output = r;
352         } else if (startswith(word, "systemd.default_standard_error=")) {
353                 int r;
354
355                 if ((r = exec_output_from_string(word + 31)) < 0)
356                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
357                 else
358                         arg_default_std_error = r;
359         } else if (startswith(word, "systemd.setenv=")) {
360                 _cleanup_free_ char *cenv = NULL;
361
362                 cenv = strdup(word + 15);
363                 if (!cenv)
364                         return -ENOMEM;
365
366                 if (env_assignment_is_valid(cenv)) {
367                         char **env;
368
369                         env = strv_env_set(arg_default_environment, cenv);
370                         if (env)
371                                 arg_default_environment = env;
372                         else
373                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
374                 } else
375                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
376
377         } else if (startswith(word, "systemd.") ||
378                    (in_initrd() && startswith(word, "rd.systemd."))) {
379
380                 const char *c;
381
382                 /* Ignore systemd.journald.xyz and friends */
383                 c = word;
384                 if (startswith(c, "rd."))
385                         c += 3;
386                 if (startswith(c, "systemd."))
387                         c += 8;
388                 if (c[strcspn(c, ".=")] != '.')  {
389
390                         log_warning("Unknown kernel switch %s. Ignoring.", word);
391
392                         log_info("Supported kernel switches:\n"
393                                  "systemd.unit=UNIT                        Default unit to start\n"
394                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
395                                  "systemd.dump_core=0|1                    Dump core on crash\n"
396                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
397                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
398                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
399                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
400                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
401                                  "                                         Log target\n"
402                                  "systemd.log_level=LEVEL                  Log level\n"
403                                  "systemd.log_color=0|1                    Highlight important log messages\n"
404                                  "systemd.log_location=0|1                 Include code location in log messages\n"
405                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
406                                  "                                         Set default log output for services\n"
407                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
408                                  "                                         Set default log error output for services\n"
409                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
410                 }
411
412         } else if (streq(word, "quiet"))
413                 arg_show_status = false;
414         else if (streq(word, "debug")) {
415                 /* Log to kmsg, the journal socket will fill up before the
416                  * journal is started and tools running during that time
417                  * will block with every log message for for 60 seconds,
418                  * before they give up. */
419                 log_set_max_level(LOG_DEBUG);
420                 log_set_target(LOG_TARGET_KMSG);
421         } else if (!in_initrd()) {
422                 unsigned i;
423
424                 /* SysV compatibility */
425                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
426                         if (streq(word, rlmap[i]))
427                                 return set_default_unit(rlmap[i+1]);
428         }
429
430         return 0;
431 }
432
433 #define DEFINE_SETTER(name, func, descr)                              \
434         static int name(const char *unit,                             \
435                         const char *filename,                         \
436                         unsigned line,                                \
437                         const char *section,                          \
438                         unsigned section_line,                        \
439                         const char *lvalue,                           \
440                         int ltype,                                    \
441                         const char *rvalue,                           \
442                         void *data,                                   \
443                         void *userdata) {                             \
444                                                                       \
445                 int r;                                                \
446                                                                       \
447                 assert(filename);                                     \
448                 assert(lvalue);                                       \
449                 assert(rvalue);                                       \
450                                                                       \
451                 r = func(rvalue);                                     \
452                 if (r < 0)                                            \
453                         log_syntax(unit, LOG_ERR, filename, line, -r, \
454                                    "Invalid " descr "'%s': %s",       \
455                                    rvalue, strerror(-r));             \
456                                                                       \
457                 return 0;                                             \
458         }
459
460 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
461 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
462 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
463 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
464
465
466 static int config_parse_cpu_affinity2(const char *unit,
467                                       const char *filename,
468                                       unsigned line,
469                                       const char *section,
470                                       unsigned section_line,
471                                       const char *lvalue,
472                                       int ltype,
473                                       const char *rvalue,
474                                       void *data,
475                                       void *userdata) {
476
477         char *w;
478         size_t l;
479         char *state;
480         cpu_set_t *c = NULL;
481         unsigned ncpus = 0;
482
483         assert(filename);
484         assert(lvalue);
485         assert(rvalue);
486
487         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
488                 char *t;
489                 int r;
490                 unsigned cpu;
491
492                 if (!(t = strndup(w, l)))
493                         return log_oom();
494
495                 r = safe_atou(t, &cpu);
496                 free(t);
497
498                 if (!c)
499                         if (!(c = cpu_set_malloc(&ncpus)))
500                                 return log_oom();
501
502                 if (r < 0 || cpu >= ncpus) {
503                         log_syntax(unit, LOG_ERR, filename, line, -r,
504                                    "Failed to parse CPU affinity '%s'", rvalue);
505                         CPU_FREE(c);
506                         return -EBADMSG;
507                 }
508
509                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
510         }
511
512         if (c) {
513                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
514                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
515
516                 CPU_FREE(c);
517         }
518
519         return 0;
520 }
521
522 static void strv_free_free(char ***l) {
523         char ***i;
524
525         if (!l)
526                 return;
527
528         for (i = l; *i; i++)
529                 strv_free(*i);
530
531         free(l);
532 }
533
534 static void free_join_controllers(void) {
535         strv_free_free(arg_join_controllers);
536         arg_join_controllers = NULL;
537 }
538
539 static int config_parse_join_controllers(const char *unit,
540                                          const char *filename,
541                                          unsigned line,
542                                          const char *section,
543                                          unsigned section_line,
544                                          const char *lvalue,
545                                          int ltype,
546                                          const char *rvalue,
547                                          void *data,
548                                          void *userdata) {
549
550         unsigned n = 0;
551         char *state, *w;
552         size_t length;
553
554         assert(filename);
555         assert(lvalue);
556         assert(rvalue);
557
558         free_join_controllers();
559
560         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
561                 char *s, **l;
562
563                 s = strndup(w, length);
564                 if (!s)
565                         return log_oom();
566
567                 l = strv_split(s, ",");
568                 free(s);
569
570                 strv_uniq(l);
571
572                 if (strv_length(l) <= 1) {
573                         strv_free(l);
574                         continue;
575                 }
576
577                 if (!arg_join_controllers) {
578                         arg_join_controllers = new(char**, 2);
579                         if (!arg_join_controllers) {
580                                 strv_free(l);
581                                 return log_oom();
582                         }
583
584                         arg_join_controllers[0] = l;
585                         arg_join_controllers[1] = NULL;
586
587                         n = 1;
588                 } else {
589                         char ***a;
590                         char ***t;
591
592                         t = new0(char**, n+2);
593                         if (!t) {
594                                 strv_free(l);
595                                 return log_oom();
596                         }
597
598                         n = 0;
599
600                         for (a = arg_join_controllers; *a; a++) {
601
602                                 if (strv_overlap(*a, l)) {
603                                         char **c;
604
605                                         c = strv_merge(*a, l);
606                                         if (!c) {
607                                                 strv_free(l);
608                                                 strv_free_free(t);
609                                                 return log_oom();
610                                         }
611
612                                         strv_free(l);
613                                         l = c;
614                                 } else {
615                                         char **c;
616
617                                         c = strv_copy(*a);
618                                         if (!c) {
619                                                 strv_free(l);
620                                                 strv_free_free(t);
621                                                 return log_oom();
622                                         }
623
624                                         t[n++] = c;
625                                 }
626                         }
627
628                         t[n++] = strv_uniq(l);
629
630                         strv_free_free(arg_join_controllers);
631                         arg_join_controllers = t;
632                 }
633         }
634
635         return 0;
636 }
637
638 static int parse_config_file(void) {
639
640         const ConfigTableItem items[] = {
641                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
642                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
643                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
644                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
645                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
646                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
647                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
648                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
649                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
650                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
651                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
652                 { "Manager", "DefaultTimeoutStartSec", config_parse_sec,         0, &arg_default_timeout_start_usec },
653                 { "Manager", "DefaultTimeoutStopSec", config_parse_sec,          0, &arg_default_timeout_stop_usec  },
654                 { "Manager", "DefaultRestartSec",     config_parse_sec,          0, &arg_default_restart_usec  },
655                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,      0, &arg_default_start_limit_interval },
656                 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned,    0, &arg_default_start_limit_burst },
657                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
658                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
659                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
660                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
661                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
662                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
663                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
664                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
665                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
666                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
667                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
668                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
669                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
670                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
671                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
672                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
673                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
674                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
675                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
676                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
677                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
678                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
679                 { NULL, NULL, NULL, 0, NULL }
680         };
681
682         _cleanup_fclose_ FILE *f;
683         const char *fn;
684         int r;
685
686         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
687         f = fopen(fn, "re");
688         if (!f) {
689                 if (errno == ENOENT)
690                         return 0;
691
692                 log_warning("Failed to open configuration file '%s': %m", fn);
693                 return 0;
694         }
695
696         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
697         if (r < 0)
698                 log_warning("Failed to parse configuration file: %s", strerror(-r));
699
700         return 0;
701 }
702
703 static int parse_proc_cmdline(void) {
704         _cleanup_free_ char *line = NULL;
705         char *w, *state;
706         size_t l;
707         int r;
708
709         r = proc_cmdline(&line);
710         if (r < 0)
711                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
712         if (r <= 0)
713                 return 0;
714
715         FOREACH_WORD_QUOTED(w, l, line, state) {
716                 _cleanup_free_ char *word;
717
718                 word = strndup(w, l);
719                 if (!word)
720                         return log_oom();
721
722                 r = parse_proc_cmdline_word(word);
723                 if (r < 0) {
724                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
725                         return r;
726                 }
727         }
728
729         return 0;
730 }
731
732 static int parse_argv(int argc, char *argv[]) {
733
734         enum {
735                 ARG_LOG_LEVEL = 0x100,
736                 ARG_LOG_TARGET,
737                 ARG_LOG_COLOR,
738                 ARG_LOG_LOCATION,
739                 ARG_UNIT,
740                 ARG_SYSTEM,
741                 ARG_USER,
742                 ARG_TEST,
743                 ARG_VERSION,
744                 ARG_DUMP_CONFIGURATION_ITEMS,
745                 ARG_DUMP_CORE,
746                 ARG_CRASH_SHELL,
747                 ARG_CONFIRM_SPAWN,
748                 ARG_SHOW_STATUS,
749                 ARG_DESERIALIZE,
750                 ARG_SWITCHED_ROOT,
751                 ARG_DEFAULT_STD_OUTPUT,
752                 ARG_DEFAULT_STD_ERROR
753         };
754
755         static const struct option options[] = {
756                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
757                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
758                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
759                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
760                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
761                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
762                 { "user",                     no_argument,       NULL, ARG_USER                     },
763                 { "test",                     no_argument,       NULL, ARG_TEST                     },
764                 { "help",                     no_argument,       NULL, 'h'                          },
765                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
766                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
767                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
768                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
769                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
770                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
771                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
772                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
773                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
774                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
775                 { NULL,                       0,                 NULL, 0                            }
776         };
777
778         int c, r;
779
780         assert(argc >= 1);
781         assert(argv);
782
783         if (getpid() == 1)
784                 opterr = 0;
785
786         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
787
788                 switch (c) {
789
790                 case ARG_LOG_LEVEL:
791                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
792                                 log_error("Failed to parse log level %s.", optarg);
793                                 return r;
794                         }
795
796                         break;
797
798                 case ARG_LOG_TARGET:
799
800                         if ((r = log_set_target_from_string(optarg)) < 0) {
801                                 log_error("Failed to parse log target %s.", optarg);
802                                 return r;
803                         }
804
805                         break;
806
807                 case ARG_LOG_COLOR:
808
809                         if (optarg) {
810                                 if ((r = log_show_color_from_string(optarg)) < 0) {
811                                         log_error("Failed to parse log color setting %s.", optarg);
812                                         return r;
813                                 }
814                         } else
815                                 log_show_color(true);
816
817                         break;
818
819                 case ARG_LOG_LOCATION:
820
821                         if (optarg) {
822                                 if ((r = log_show_location_from_string(optarg)) < 0) {
823                                         log_error("Failed to parse log location setting %s.", optarg);
824                                         return r;
825                                 }
826                         } else
827                                 log_show_location(true);
828
829                         break;
830
831                 case ARG_DEFAULT_STD_OUTPUT:
832
833                         if ((r = exec_output_from_string(optarg)) < 0) {
834                                 log_error("Failed to parse default standard output setting %s.", optarg);
835                                 return r;
836                         } else
837                                 arg_default_std_output = r;
838                         break;
839
840                 case ARG_DEFAULT_STD_ERROR:
841
842                         if ((r = exec_output_from_string(optarg)) < 0) {
843                                 log_error("Failed to parse default standard error output setting %s.", optarg);
844                                 return r;
845                         } else
846                                 arg_default_std_error = r;
847                         break;
848
849                 case ARG_UNIT:
850
851                         if ((r = set_default_unit(optarg)) < 0) {
852                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
853                                 return r;
854                         }
855
856                         break;
857
858                 case ARG_SYSTEM:
859                         arg_running_as = SYSTEMD_SYSTEM;
860                         break;
861
862                 case ARG_USER:
863                         arg_running_as = SYSTEMD_USER;
864                         break;
865
866                 case ARG_TEST:
867                         arg_action = ACTION_TEST;
868                         break;
869
870                 case ARG_VERSION:
871                         arg_action = ACTION_VERSION;
872                         break;
873
874                 case ARG_DUMP_CONFIGURATION_ITEMS:
875                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
876                         break;
877
878                 case ARG_DUMP_CORE:
879                         r = optarg ? parse_boolean(optarg) : 1;
880                         if (r < 0) {
881                                 log_error("Failed to parse dump core boolean %s.", optarg);
882                                 return r;
883                         }
884                         arg_dump_core = r;
885                         break;
886
887                 case ARG_CRASH_SHELL:
888                         r = optarg ? parse_boolean(optarg) : 1;
889                         if (r < 0) {
890                                 log_error("Failed to parse crash shell boolean %s.", optarg);
891                                 return r;
892                         }
893                         arg_crash_shell = r;
894                         break;
895
896                 case ARG_CONFIRM_SPAWN:
897                         r = optarg ? parse_boolean(optarg) : 1;
898                         if (r < 0) {
899                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
900                                 return r;
901                         }
902                         arg_confirm_spawn = r;
903                         break;
904
905                 case ARG_SHOW_STATUS:
906                         r = optarg ? parse_boolean(optarg) : 1;
907                         if (r < 0) {
908                                 log_error("Failed to parse show status boolean %s.", optarg);
909                                 return r;
910                         }
911                         arg_show_status = r;
912                         break;
913
914                 case ARG_DESERIALIZE: {
915                         int fd;
916                         FILE *f;
917
918                         r = safe_atoi(optarg, &fd);
919                         if (r < 0 || fd < 0) {
920                                 log_error("Failed to parse deserialize option %s.", optarg);
921                                 return r < 0 ? r : -EINVAL;
922                         }
923
924                         fd_cloexec(fd, true);
925
926                         f = fdopen(fd, "r");
927                         if (!f) {
928                                 log_error("Failed to open serialization fd: %m");
929                                 return -errno;
930                         }
931
932                         if (serialization)
933                                 fclose(serialization);
934
935                         serialization = f;
936
937                         break;
938                 }
939
940                 case ARG_SWITCHED_ROOT:
941                         arg_switched_root = true;
942                         break;
943
944                 case 'h':
945                         arg_action = ACTION_HELP;
946                         break;
947
948                 case 'D':
949                         log_set_max_level(LOG_DEBUG);
950                         break;
951
952                 case 'b':
953                 case 's':
954                 case 'z':
955                         /* Just to eat away the sysvinit kernel
956                          * cmdline args without getopt() error
957                          * messages that we'll parse in
958                          * parse_proc_cmdline_word() or ignore. */
959
960                 case '?':
961                 default:
962                         if (getpid() != 1) {
963                                 log_error("Unknown option code %c", c);
964                                 return -EINVAL;
965                         }
966
967                         break;
968                 }
969
970         if (optind < argc && getpid() != 1) {
971                 /* Hmm, when we aren't run as init system
972                  * let's complain about excess arguments */
973
974                 log_error("Excess arguments.");
975                 return -EINVAL;
976         }
977
978         if (detect_container(NULL) > 0) {
979                 char **a;
980
981                 /* All /proc/cmdline arguments the kernel didn't
982                  * understand it passed to us. We're not really
983                  * interested in that usually since /proc/cmdline is
984                  * more interesting and complete. With one exception:
985                  * if we are run in a container /proc/cmdline is not
986                  * relevant for the container, hence we rely on argv[]
987                  * instead. */
988
989                 for (a = argv; a < argv + argc; a++) {
990                         r = parse_proc_cmdline_word(*a);
991                         if (r < 0) {
992                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
993                                 return r;
994                         }
995                 }
996         }
997
998         return 0;
999 }
1000
1001 static int help(void) {
1002
1003         printf("%s [OPTIONS...]\n\n"
1004                "Starts up and maintains the system or user services.\n\n"
1005                "  -h --help                      Show this help\n"
1006                "     --test                      Determine startup sequence, dump it and exit\n"
1007                "     --dump-configuration-items  Dump understood unit configuration items\n"
1008                "     --unit=UNIT                 Set default unit\n"
1009                "     --system                    Run a system instance, even if PID != 1\n"
1010                "     --user                      Run a user instance\n"
1011                "     --dump-core[=0|1]           Dump core on crash\n"
1012                "     --crash-shell[=0|1]         Run shell on crash\n"
1013                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1014                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1015                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1016                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1017                "     --log-color[=0|1]           Highlight important log messages\n"
1018                "     --log-location[=0|1]        Include code location in log messages\n"
1019                "     --default-standard-output=  Set default standard output for services\n"
1020                "     --default-standard-error=   Set default standard error output for services\n",
1021                program_invocation_short_name);
1022
1023         return 0;
1024 }
1025
1026 static int version(void) {
1027         puts(PACKAGE_STRING);
1028         puts(SYSTEMD_FEATURES);
1029
1030         return 0;
1031 }
1032
1033 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1034         FILE *f = NULL;
1035         FDSet *fds = NULL;
1036         int r;
1037
1038         assert(m);
1039         assert(_f);
1040         assert(_fds);
1041
1042         r = manager_open_serialization(m, &f);
1043         if (r < 0) {
1044                 log_error("Failed to create serialization file: %s", strerror(-r));
1045                 goto fail;
1046         }
1047
1048         /* Make sure nothing is really destructed when we shut down */
1049         m->n_reloading ++;
1050         bus_manager_send_reloading(m, true);
1051
1052         fds = fdset_new();
1053         if (!fds) {
1054                 r = -ENOMEM;
1055                 log_error("Failed to allocate fd set: %s", strerror(-r));
1056                 goto fail;
1057         }
1058
1059         r = manager_serialize(m, f, fds, switching_root);
1060         if (r < 0) {
1061                 log_error("Failed to serialize state: %s", strerror(-r));
1062                 goto fail;
1063         }
1064
1065         if (fseeko(f, 0, SEEK_SET) < 0) {
1066                 log_error("Failed to rewind serialization fd: %m");
1067                 goto fail;
1068         }
1069
1070         r = fd_cloexec(fileno(f), false);
1071         if (r < 0) {
1072                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1073                 goto fail;
1074         }
1075
1076         r = fdset_cloexec(fds, false);
1077         if (r < 0) {
1078                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1079                 goto fail;
1080         }
1081
1082         *_f = f;
1083         *_fds = fds;
1084
1085         return 0;
1086
1087 fail:
1088         fdset_free(fds);
1089
1090         if (f)
1091                 fclose(f);
1092
1093         return r;
1094 }
1095
1096 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1097         struct rlimit nl;
1098         int r;
1099
1100         assert(saved_rlimit);
1101
1102         /* Save the original RLIMIT_NOFILE so that we can reset it
1103          * later when transitioning from the initrd to the main
1104          * systemd or suchlike. */
1105         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1106                 log_error("Reading RLIMIT_NOFILE failed: %m");
1107                 return -errno;
1108         }
1109
1110         /* Make sure forked processes get the default kernel setting */
1111         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1112                 struct rlimit *rl;
1113
1114                 rl = newdup(struct rlimit, saved_rlimit, 1);
1115                 if (!rl)
1116                         return log_oom();
1117
1118                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1119         }
1120
1121         /* Bump up the resource limit for ourselves substantially */
1122         nl.rlim_cur = nl.rlim_max = 64*1024;
1123         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1124         if (r < 0) {
1125                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1126                 return r;
1127         }
1128
1129         return 0;
1130 }
1131
1132 static void test_mtab(void) {
1133         char *p;
1134
1135         /* Check that /etc/mtab is a symlink */
1136
1137         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1138                 bool b;
1139
1140                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1141                 free(p);
1142
1143                 if (b)
1144                         return;
1145         }
1146
1147         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1148                     "This is not supported anymore. "
1149                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1150 }
1151
1152 static void test_usr(void) {
1153
1154         /* Check that /usr is not a separate fs */
1155
1156         if (dir_is_empty("/usr") <= 0)
1157                 return;
1158
1159         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1160                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1161                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1162 }
1163
1164 static void test_cgroups(void) {
1165
1166         if (access("/proc/cgroups", F_OK) >= 0)
1167                 return;
1168
1169         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1170                     "Systems without control groups are not supported. "
1171                     "We will now sleep for 10s, and then continue boot-up. "
1172                     "Expect breakage and please do not file bugs. "
1173                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1174                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1175
1176         sleep(10);
1177 }
1178
1179 static int initialize_join_controllers(void) {
1180         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1181          * + "net_prio". We'd like to add "cpuset" to the mix, but
1182          * "cpuset" does't really work for groups with no initialized
1183          * attributes. */
1184
1185         arg_join_controllers = new(char**, 3);
1186         if (!arg_join_controllers)
1187                 return -ENOMEM;
1188
1189         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1190         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1191         arg_join_controllers[2] = NULL;
1192
1193         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1194                 free_join_controllers();
1195                 return -ENOMEM;
1196         }
1197
1198         return 0;
1199 }
1200
1201 int main(int argc, char *argv[]) {
1202         Manager *m = NULL;
1203         int r, retval = EXIT_FAILURE;
1204         usec_t before_startup, after_startup;
1205         char timespan[FORMAT_TIMESPAN_MAX];
1206         FDSet *fds = NULL;
1207         bool reexecute = false;
1208         const char *shutdown_verb = NULL;
1209         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1210         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1211         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1212         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1213         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1214         static char systemd[] = "systemd";
1215         bool skip_setup = false;
1216         int j;
1217         bool loaded_policy = false;
1218         bool arm_reboot_watchdog = false;
1219         bool queue_default_job = false;
1220         char *switch_root_dir = NULL, *switch_root_init = NULL;
1221         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1222
1223 #ifdef HAVE_SYSV_COMPAT
1224         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1225                 /* This is compatibility support for SysV, where
1226                  * calling init as a user is identical to telinit. */
1227
1228                 errno = -ENOENT;
1229                 execv(SYSTEMCTL_BINARY_PATH, argv);
1230                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1231                 return 1;
1232         }
1233 #endif
1234
1235         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1236         dual_timestamp_get(&userspace_timestamp);
1237
1238         /* Determine if this is a reexecution or normal bootup. We do
1239          * the full command line parsing much later, so let's just
1240          * have a quick peek here. */
1241         if (strv_find(argv+1, "--deserialize"))
1242                 skip_setup = true;
1243
1244         /* If we have switched root, do all the special setup
1245          * things */
1246         if (strv_find(argv+1, "--switched-root"))
1247                 skip_setup = false;
1248
1249         /* If we get started via the /sbin/init symlink then we are
1250            called 'init'. After a subsequent reexecution we are then
1251            called 'systemd'. That is confusing, hence let's call us
1252            systemd right-away. */
1253         program_invocation_short_name = systemd;
1254         prctl(PR_SET_NAME, systemd);
1255
1256         saved_argv = argv;
1257         saved_argc = argc;
1258
1259         log_show_color(isatty(STDERR_FILENO) > 0);
1260
1261         /* Disable the umask logic */
1262         if (getpid() == 1)
1263                 umask(0);
1264
1265         if (getpid() == 1 && detect_container(NULL) <= 0) {
1266
1267                 /* Running outside of a container as PID 1 */
1268                 arg_running_as = SYSTEMD_SYSTEM;
1269                 make_null_stdio();
1270                 log_set_target(LOG_TARGET_KMSG);
1271                 log_open();
1272
1273                 if (in_initrd())
1274                         initrd_timestamp = userspace_timestamp;
1275
1276                 if (!skip_setup) {
1277                         mount_setup_early();
1278                         dual_timestamp_get(&security_start_timestamp);
1279                         if (selinux_setup(&loaded_policy) < 0)
1280                                 goto finish;
1281                         if (ima_setup() < 0)
1282                                 goto finish;
1283                         if (smack_setup() < 0)
1284                                 goto finish;
1285                         dual_timestamp_get(&security_finish_timestamp);
1286                 }
1287
1288                 if (label_init(NULL) < 0)
1289                         goto finish;
1290
1291                 if (!skip_setup) {
1292                         if (hwclock_is_localtime() > 0) {
1293                                 int min;
1294
1295                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1296                                 r = hwclock_set_timezone(&min);
1297                                 if (r < 0)
1298                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1299                                 else
1300                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1301                         } else if (!in_initrd()) {
1302                                 /*
1303                                  * Do dummy first-time call to seal the kernel's time warp magic
1304                                  *
1305                                  * Do not call this this from inside the initrd. The initrd might not
1306                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1307                                  * that way. In such case, we need to delay the time-warp or the sealing
1308                                  * until we reach the real system.
1309                                  */
1310                                 hwclock_reset_timezone();
1311
1312                                 /* Tell the kernel our timezone */
1313                                 r = hwclock_set_timezone(NULL);
1314                                 if (r < 0)
1315                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1316                         }
1317                 }
1318
1319                 /* Set the default for later on, but don't actually
1320                  * open the logs like this for now. Note that if we
1321                  * are transitioning from the initrd there might still
1322                  * be journal fd open, and we shouldn't attempt
1323                  * opening that before we parsed /proc/cmdline which
1324                  * might redirect output elsewhere. */
1325                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1326
1327         } else if (getpid() == 1) {
1328                 /* Running inside a container, as PID 1 */
1329                 arg_running_as = SYSTEMD_SYSTEM;
1330                 log_set_target(LOG_TARGET_CONSOLE);
1331                 log_open();
1332
1333                 /* For the later on, see above... */
1334                 log_set_target(LOG_TARGET_JOURNAL);
1335
1336                 /* clear the kernel timestamp,
1337                  * because we are in a container */
1338                 kernel_timestamp.monotonic = 0ULL;
1339                 kernel_timestamp.realtime = 0ULL;
1340
1341         } else {
1342                 /* Running as user instance */
1343                 arg_running_as = SYSTEMD_USER;
1344                 log_set_target(LOG_TARGET_AUTO);
1345                 log_open();
1346
1347                 /* clear the kernel timestamp,
1348                  * because we are not PID 1 */
1349                 kernel_timestamp.monotonic = 0ULL;
1350                 kernel_timestamp.realtime = 0ULL;
1351         }
1352
1353         /* Initialize default unit */
1354         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1355         if (r < 0) {
1356                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1357                 goto finish;
1358         }
1359
1360         r = initialize_join_controllers();
1361         if (r < 0)
1362                 goto finish;
1363
1364         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1365          * /proc/$PID/fd is available. */
1366         if (getpid() == 1) {
1367                 r = mount_setup(loaded_policy);
1368                 if (r < 0)
1369                         goto finish;
1370         }
1371
1372         /* Reset all signal handlers. */
1373         assert_se(reset_all_signal_handlers() == 0);
1374
1375         ignore_signals(SIGNALS_IGNORE, -1);
1376
1377         if (parse_config_file() < 0)
1378                 goto finish;
1379
1380         if (arg_running_as == SYSTEMD_SYSTEM)
1381                 if (parse_proc_cmdline() < 0)
1382                         goto finish;
1383
1384         log_parse_environment();
1385
1386         if (parse_argv(argc, argv) < 0)
1387                 goto finish;
1388
1389         if (arg_action == ACTION_TEST &&
1390             geteuid() == 0) {
1391                 log_error("Don't run test mode as root.");
1392                 goto finish;
1393         }
1394
1395         if (arg_running_as == SYSTEMD_USER &&
1396             arg_action == ACTION_RUN &&
1397             sd_booted() <= 0) {
1398                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1399                 goto finish;
1400         }
1401
1402         if (arg_running_as == SYSTEMD_SYSTEM &&
1403             arg_action == ACTION_RUN &&
1404             running_in_chroot() > 0) {
1405                 log_error("Cannot be run in a chroot() environment.");
1406                 goto finish;
1407         }
1408
1409         if (arg_action == ACTION_HELP) {
1410                 retval = help();
1411                 goto finish;
1412         } else if (arg_action == ACTION_VERSION) {
1413                 retval = version();
1414                 goto finish;
1415         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1416                 unit_dump_config_items(stdout);
1417                 retval = EXIT_SUCCESS;
1418                 goto finish;
1419         } else if (arg_action == ACTION_DONE) {
1420                 retval = EXIT_SUCCESS;
1421                 goto finish;
1422         }
1423
1424         if (arg_running_as == SYSTEMD_USER &&
1425             !getenv("XDG_RUNTIME_DIR")) {
1426                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1427                 goto finish;
1428         }
1429
1430         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1431
1432         /* Close logging fds, in order not to confuse fdset below */
1433         log_close();
1434
1435         /* Remember open file descriptors for later deserialization */
1436         r = fdset_new_fill(&fds);
1437         if (r < 0) {
1438                 log_error("Failed to allocate fd set: %s", strerror(-r));
1439                 goto finish;
1440         } else
1441                 fdset_cloexec(fds, true);
1442
1443         if (serialization)
1444                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1445
1446         if (arg_running_as == SYSTEMD_SYSTEM)
1447                 /* Become a session leader if we aren't one yet. */
1448                 setsid();
1449
1450         /* Move out of the way, so that we won't block unmounts */
1451         assert_se(chdir("/")  == 0);
1452
1453         /* Reset the console, but only if this is really init and we
1454          * are freshly booted */
1455         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1456                 console_setup(getpid() == 1 && !skip_setup);
1457
1458         /* Open the logging devices, if possible and necessary */
1459         log_open();
1460
1461         /* Make sure we leave a core dump without panicing the
1462          * kernel. */
1463         if (getpid() == 1) {
1464                 install_crash_handler();
1465
1466                 r = mount_cgroup_controllers(arg_join_controllers);
1467                 if (r < 0)
1468                         goto finish;
1469         }
1470
1471         if (arg_running_as == SYSTEMD_SYSTEM) {
1472                 const char *virtualization = NULL;
1473
1474                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1475
1476                 detect_virtualization(&virtualization);
1477                 if (virtualization)
1478                         log_info("Detected virtualization '%s'.", virtualization);
1479
1480                 if (in_initrd())
1481                         log_info("Running in initial RAM disk.");
1482
1483         } else
1484                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1485
1486         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1487                 if (arg_show_status || plymouth_running())
1488                         status_welcome();
1489
1490 #ifdef HAVE_KMOD
1491                 kmod_setup();
1492 #endif
1493                 hostname_setup();
1494                 machine_id_setup();
1495                 loopback_setup();
1496
1497                 test_mtab();
1498                 test_usr();
1499                 test_cgroups();
1500         }
1501
1502         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1503                 watchdog_set_timeout(&arg_runtime_watchdog);
1504
1505         if (arg_timer_slack_nsec != (nsec_t) -1)
1506                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1507                         log_error("Failed to adjust timer slack: %m");
1508
1509         if (arg_capability_bounding_set_drop) {
1510                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1511                 if (r < 0) {
1512                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1513                         goto finish;
1514                 }
1515                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1516                 if (r < 0) {
1517                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1518                         goto finish;
1519                 }
1520         }
1521
1522         if (arg_running_as == SYSTEMD_USER) {
1523                 /* Become reaper of our children */
1524                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1525                         log_warning("Failed to make us a subreaper: %m");
1526                         if (errno == EINVAL)
1527                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1528                 }
1529         }
1530
1531         if (arg_running_as == SYSTEMD_SYSTEM)
1532                 bump_rlimit_nofile(&saved_rlimit_nofile);
1533
1534         r = manager_new(arg_running_as, &m);
1535         if (r < 0) {
1536                 log_error("Failed to allocate manager object: %s", strerror(-r));
1537                 goto finish;
1538         }
1539
1540         m->confirm_spawn = arg_confirm_spawn;
1541         m->default_std_output = arg_default_std_output;
1542         m->default_std_error = arg_default_std_error;
1543         m->default_restart_usec = arg_default_restart_usec;
1544         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1545         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1546         m->default_start_limit_interval = arg_default_start_limit_interval;
1547         m->default_start_limit_burst = arg_default_start_limit_burst;
1548         m->runtime_watchdog = arg_runtime_watchdog;
1549         m->shutdown_watchdog = arg_shutdown_watchdog;
1550         m->userspace_timestamp = userspace_timestamp;
1551         m->kernel_timestamp = kernel_timestamp;
1552         m->initrd_timestamp = initrd_timestamp;
1553         m->security_start_timestamp = security_start_timestamp;
1554         m->security_finish_timestamp = security_finish_timestamp;
1555
1556         manager_set_default_rlimits(m, arg_default_rlimit);
1557
1558         if (arg_default_environment)
1559                 manager_environment_add(m, NULL, arg_default_environment);
1560
1561         manager_set_show_status(m, arg_show_status);
1562
1563         /* Remember whether we should queue the default job */
1564         queue_default_job = !serialization || arg_switched_root;
1565
1566         before_startup = now(CLOCK_MONOTONIC);
1567
1568         r = manager_startup(m, serialization, fds);
1569         if (r < 0)
1570                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1571
1572         /* This will close all file descriptors that were opened, but
1573          * not claimed by any unit. */
1574         fdset_free(fds);
1575         fds = NULL;
1576
1577         if (serialization) {
1578                 fclose(serialization);
1579                 serialization = NULL;
1580         }
1581
1582         if (queue_default_job) {
1583                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1584                 Unit *target = NULL;
1585                 Job *default_unit_job;
1586
1587                 log_debug("Activating default unit: %s", arg_default_unit);
1588
1589                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1590                 if (r < 0)
1591                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1592                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1593                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1594                 else if (target->load_state == UNIT_MASKED)
1595                         log_error("Default target masked.");
1596
1597                 if (!target || target->load_state != UNIT_LOADED) {
1598                         log_info("Trying to load rescue target...");
1599
1600                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1601                         if (r < 0) {
1602                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1603                                 goto finish;
1604                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1605                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1606                                 goto finish;
1607                         } else if (target->load_state == UNIT_MASKED) {
1608                                 log_error("Rescue target masked.");
1609                                 goto finish;
1610                         }
1611                 }
1612
1613                 assert(target->load_state == UNIT_LOADED);
1614
1615                 if (arg_action == ACTION_TEST) {
1616                         printf("-> By units:\n");
1617                         manager_dump_units(m, stdout, "\t");
1618                 }
1619
1620                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1621                 if (r == -EPERM) {
1622                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1623
1624                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1625                         if (r < 0) {
1626                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1627                                 goto finish;
1628                         }
1629                 } else if (r < 0) {
1630                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1631                         goto finish;
1632                 }
1633
1634                 m->default_unit_job_id = default_unit_job->id;
1635
1636                 after_startup = now(CLOCK_MONOTONIC);
1637                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1638                          "Loaded units and determined initial transaction in %s.",
1639                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1640
1641                 if (arg_action == ACTION_TEST) {
1642                         printf("-> By jobs:\n");
1643                         manager_dump_jobs(m, stdout, "\t");
1644                         retval = EXIT_SUCCESS;
1645                         goto finish;
1646                 }
1647         }
1648
1649         for (;;) {
1650                 r = manager_loop(m);
1651                 if (r < 0) {
1652                         log_error("Failed to run mainloop: %s", strerror(-r));
1653                         goto finish;
1654                 }
1655
1656                 switch (m->exit_code) {
1657
1658                 case MANAGER_EXIT:
1659                         retval = EXIT_SUCCESS;
1660                         log_debug("Exit.");
1661                         goto finish;
1662
1663                 case MANAGER_RELOAD:
1664                         log_info("Reloading.");
1665                         r = manager_reload(m);
1666                         if (r < 0)
1667                                 log_error("Failed to reload: %s", strerror(-r));
1668                         break;
1669
1670                 case MANAGER_REEXECUTE:
1671
1672                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1673                                 goto finish;
1674
1675                         reexecute = true;
1676                         log_notice("Reexecuting.");
1677                         goto finish;
1678
1679                 case MANAGER_SWITCH_ROOT:
1680                         /* Steal the switch root parameters */
1681                         switch_root_dir = m->switch_root;
1682                         switch_root_init = m->switch_root_init;
1683                         m->switch_root = m->switch_root_init = NULL;
1684
1685                         if (!switch_root_init)
1686                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1687                                         goto finish;
1688
1689                         reexecute = true;
1690                         log_notice("Switching root.");
1691                         goto finish;
1692
1693                 case MANAGER_REBOOT:
1694                 case MANAGER_POWEROFF:
1695                 case MANAGER_HALT:
1696                 case MANAGER_KEXEC: {
1697                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1698                                 [MANAGER_REBOOT] = "reboot",
1699                                 [MANAGER_POWEROFF] = "poweroff",
1700                                 [MANAGER_HALT] = "halt",
1701                                 [MANAGER_KEXEC] = "kexec"
1702                         };
1703
1704                         assert_se(shutdown_verb = table[m->exit_code]);
1705                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1706
1707                         log_notice("Shutting down.");
1708                         goto finish;
1709                 }
1710
1711                 default:
1712                         assert_not_reached("Unknown exit code.");
1713                 }
1714         }
1715
1716 finish:
1717         if (m)
1718                 manager_free(m);
1719
1720         for (j = 0; j < RLIMIT_NLIMITS; j++)
1721                 free(arg_default_rlimit[j]);
1722
1723         free(arg_default_unit);
1724         free_join_controllers();
1725
1726         label_finish();
1727
1728         if (reexecute) {
1729                 const char **args;
1730                 unsigned i, args_size;
1731
1732                 /* Close and disarm the watchdog, so that the new
1733                  * instance can reinitialize it, but doesn't get
1734                  * rebooted while we do that */
1735                 watchdog_close(true);
1736
1737                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1738                  * that the new systemd can pass the kernel default to
1739                  * its child processes */
1740                 if (saved_rlimit_nofile.rlim_cur > 0)
1741                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1742
1743                 if (switch_root_dir) {
1744                         /* Kill all remaining processes from the
1745                          * initrd, but don't wait for them, so that we
1746                          * can handle the SIGCHLD for them after
1747                          * deserializing. */
1748                         broadcast_signal(SIGTERM, false, true);
1749
1750                         /* And switch root */
1751                         r = switch_root(switch_root_dir);
1752                         if (r < 0)
1753                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1754                 }
1755
1756                 args_size = MAX(6, argc+1);
1757                 args = newa(const char*, args_size);
1758
1759                 if (!switch_root_init) {
1760                         char sfd[16];
1761
1762                         /* First try to spawn ourselves with the right
1763                          * path, and with full serialization. We do
1764                          * this only if the user didn't specify an
1765                          * explicit init to spawn. */
1766
1767                         assert(serialization);
1768                         assert(fds);
1769
1770                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1771                         char_array_0(sfd);
1772
1773                         i = 0;
1774                         args[i++] = SYSTEMD_BINARY_PATH;
1775                         if (switch_root_dir)
1776                                 args[i++] = "--switched-root";
1777                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1778                         args[i++] = "--deserialize";
1779                         args[i++] = sfd;
1780                         args[i++] = NULL;
1781
1782                         /* do not pass along the environment we inherit from the kernel or initrd */
1783                         if (switch_root_dir)
1784                                 clearenv();
1785
1786                         assert(i <= args_size);
1787                         execv(args[0], (char* const*) args);
1788                 }
1789
1790                 /* Try the fallback, if there is any, without any
1791                  * serialization. We pass the original argv[] and
1792                  * envp[]. (Well, modulo the ordering changes due to
1793                  * getopt() in argv[], and some cleanups in envp[],
1794                  * but let's hope that doesn't matter.) */
1795
1796                 if (serialization) {
1797                         fclose(serialization);
1798                         serialization = NULL;
1799                 }
1800
1801                 if (fds) {
1802                         fdset_free(fds);
1803                         fds = NULL;
1804                 }
1805
1806                 /* Reopen the console */
1807                 make_console_stdio();
1808
1809                 for (j = 1, i = 1; j < argc; j++)
1810                         args[i++] = argv[j];
1811                 args[i++] = NULL;
1812                 assert(i <= args_size);
1813
1814                 if (switch_root_init) {
1815                         args[0] = switch_root_init;
1816                         execv(args[0], (char* const*) args);
1817                         log_warning("Failed to execute configured init, trying fallback: %m");
1818                 }
1819
1820                 args[0] = "/sbin/init";
1821                 execv(args[0], (char* const*) args);
1822
1823                 if (errno == ENOENT) {
1824                         log_warning("No /sbin/init, trying fallback");
1825
1826                         args[0] = "/bin/sh";
1827                         args[1] = NULL;
1828                         execv(args[0], (char* const*) args);
1829                         log_error("Failed to execute /bin/sh, giving up: %m");
1830                 } else
1831                         log_warning("Failed to execute /sbin/init, giving up: %m");
1832         }
1833
1834         if (serialization)
1835                 fclose(serialization);
1836
1837         if (fds)
1838                 fdset_free(fds);
1839
1840 #ifdef HAVE_VALGRIND_VALGRIND_H
1841         /* If we are PID 1 and running under valgrind, then let's exit
1842          * here explicitly. valgrind will only generate nice output on
1843          * exit(), not on exec(), hence let's do the former not the
1844          * latter here. */
1845         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1846                 return 0;
1847 #endif
1848
1849         if (shutdown_verb) {
1850                 const char * command_line[] = {
1851                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1852                         shutdown_verb,
1853                         NULL
1854                 };
1855                 char **env_block;
1856
1857                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1858                         char e[32];
1859
1860                         /* If we reboot let's set the shutdown
1861                          * watchdog and tell the shutdown binary to
1862                          * repeatedly ping it */
1863                         watchdog_set_timeout(&arg_shutdown_watchdog);
1864                         watchdog_close(false);
1865
1866                         /* Tell the binary how often to ping */
1867                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1868                         char_array_0(e);
1869
1870                         env_block = strv_append(environ, e);
1871                 } else {
1872                         env_block = strv_copy(environ);
1873                         watchdog_close(true);
1874                 }
1875
1876                 /* Avoid the creation of new processes forked by the
1877                  * kernel; at this point, we will not listen to the
1878                  * signals anyway */
1879                 if (detect_container(NULL) <= 0)
1880                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1881
1882                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1883                 free(env_block);
1884                 log_error("Failed to execute shutdown binary, freezing: %m");
1885         }
1886
1887         if (getpid() == 1)
1888                 freeze();
1889
1890         return retval;
1891 }