chiark / gitweb /
6db42991e83ae03ed0897dbfdd4e3af67d33836f
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85
86 static char *arg_default_unit = NULL;
87 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
88
89 static bool arg_dump_core = true;
90 static bool arg_crash_shell = false;
91 static int arg_crash_chvt = -1;
92 static bool arg_confirm_spawn = false;
93 static bool arg_show_status = true;
94 static bool arg_switched_root = false;
95 static char ***arg_join_controllers = NULL;
96 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
97 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
98 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
99 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
100 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
101 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
102 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
103 static usec_t arg_runtime_watchdog = 0;
104 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
105 static char **arg_default_environment = NULL;
106 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
107 static uint64_t arg_capability_bounding_set_drop = 0;
108 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
109
110 static FILE* serialization = NULL;
111
112 static void nop_handler(int sig) {
113 }
114
115 noreturn static void crash(int sig) {
116
117         if (getpid() != 1)
118                 /* Pass this on immediately, if this is not PID 1 */
119                 raise(sig);
120         else if (!arg_dump_core)
121                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
122         else {
123                 struct sigaction sa = {
124                         .sa_handler = nop_handler,
125                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
126                 };
127                 pid_t pid;
128
129                 /* We want to wait for the core process, hence let's enable SIGCHLD */
130                 sigaction(SIGCHLD, &sa, NULL);
131
132                 pid = fork();
133                 if (pid < 0)
134                         log_error("Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
135
136                 else if (pid == 0) {
137                         struct rlimit rl = {};
138
139                         /* Enable default signal handler for core dump */
140                         zero(sa);
141                         sa.sa_handler = SIG_DFL;
142                         sigaction(sig, &sa, NULL);
143
144                         /* Don't limit the core dump size */
145                         rl.rlim_cur = RLIM_INFINITY;
146                         rl.rlim_max = RLIM_INFINITY;
147                         setrlimit(RLIMIT_CORE, &rl);
148
149                         /* Just to be sure... */
150                         chdir("/");
151
152                         /* Raise the signal again */
153                         raise(sig);
154
155                         assert_not_reached("We shouldn't be here...");
156                         _exit(1);
157
158                 } else {
159                         siginfo_t status;
160                         int r;
161
162                         /* Order things nicely. */
163                         r = wait_for_terminate(pid, &status);
164                         if (r < 0)
165                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
166                         else if (status.si_code != CLD_DUMPED)
167                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
168                         else
169                                 log_error("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
170                 }
171         }
172
173         if (arg_crash_chvt)
174                 chvt(arg_crash_chvt);
175
176         if (arg_crash_shell) {
177                 struct sigaction sa = {
178                         .sa_handler = SIG_IGN,
179                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
180                 };
181                 pid_t pid;
182
183                 log_info("Executing crash shell in 10s...");
184                 sleep(10);
185
186                 /* Let the kernel reap children for us */
187                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
188
189                 pid = fork();
190                 if (pid < 0)
191                         log_error("Failed to fork off crash shell: %m");
192                 else if (pid == 0) {
193                         make_console_stdio();
194                         execl("/bin/sh", "/bin/sh", NULL);
195
196                         log_error("execl() failed: %m");
197                         _exit(1);
198                 }
199
200                 log_info("Successfully spawned crash shell as pid "PID_FMT".", pid);
201         }
202
203         log_info("Freezing execution.");
204         freeze();
205 }
206
207 static void install_crash_handler(void) {
208         struct sigaction sa = {
209                 .sa_handler = crash,
210                 .sa_flags = SA_NODEFER,
211         };
212
213         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
214 }
215
216 static int console_setup(bool do_reset) {
217         int tty_fd, r;
218
219         /* If we are init, we connect stdin/stdout/stderr to /dev/null
220          * and make sure we don't have a controlling tty. */
221
222         release_terminal();
223
224         if (!do_reset)
225                 return 0;
226
227         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
228         if (tty_fd < 0) {
229                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
230                 return -tty_fd;
231         }
232
233         /* We don't want to force text mode.
234          * plymouth may be showing pictures already from initrd. */
235         r = reset_terminal_fd(tty_fd, false);
236         if (r < 0)
237                 log_error("Failed to reset /dev/console: %s", strerror(-r));
238
239         close_nointr_nofail(tty_fd);
240         return r;
241 }
242
243 static int set_default_unit(const char *u) {
244         char *c;
245
246         assert(u);
247
248         c = strdup(u);
249         if (!c)
250                 return -ENOMEM;
251
252         free(arg_default_unit);
253         arg_default_unit = c;
254
255         return 0;
256 }
257
258 static int parse_proc_cmdline_word(const char *word) {
259
260         static const char * const rlmap[] = {
261                 "emergency", SPECIAL_EMERGENCY_TARGET,
262                 "-b",        SPECIAL_EMERGENCY_TARGET,
263                 "single",    SPECIAL_RESCUE_TARGET,
264                 "-s",        SPECIAL_RESCUE_TARGET,
265                 "s",         SPECIAL_RESCUE_TARGET,
266                 "S",         SPECIAL_RESCUE_TARGET,
267                 "1",         SPECIAL_RESCUE_TARGET,
268                 "2",         SPECIAL_RUNLEVEL2_TARGET,
269                 "3",         SPECIAL_RUNLEVEL3_TARGET,
270                 "4",         SPECIAL_RUNLEVEL4_TARGET,
271                 "5",         SPECIAL_RUNLEVEL5_TARGET,
272         };
273
274         assert(word);
275
276         if (startswith(word, "systemd.unit=")) {
277
278                 if (!in_initrd())
279                         return set_default_unit(word + 13);
280
281         } else if (startswith(word, "rd.systemd.unit=")) {
282
283                 if (in_initrd())
284                         return set_default_unit(word + 16);
285
286         } else if (startswith(word, "systemd.log_target=")) {
287
288                 if (log_set_target_from_string(word + 19) < 0)
289                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
290
291         } else if (startswith(word, "systemd.log_level=")) {
292
293                 if (log_set_max_level_from_string(word + 18) < 0)
294                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
295
296         } else if (startswith(word, "systemd.log_color=")) {
297
298                 if (log_show_color_from_string(word + 18) < 0)
299                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
300
301         } else if (startswith(word, "systemd.log_location=")) {
302
303                 if (log_show_location_from_string(word + 21) < 0)
304                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
305
306         } else if (startswith(word, "systemd.dump_core=")) {
307                 int r;
308
309                 if ((r = parse_boolean(word + 18)) < 0)
310                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
311                 else
312                         arg_dump_core = r;
313
314         } else if (startswith(word, "systemd.crash_shell=")) {
315                 int r;
316
317                 if ((r = parse_boolean(word + 20)) < 0)
318                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
319                 else
320                         arg_crash_shell = r;
321
322         } else if (startswith(word, "systemd.confirm_spawn=")) {
323                 int r;
324
325                 if ((r = parse_boolean(word + 22)) < 0)
326                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
327                 else
328                         arg_confirm_spawn = r;
329
330         } else if (startswith(word, "systemd.crash_chvt=")) {
331                 int k;
332
333                 if (safe_atoi(word + 19, &k) < 0)
334                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
335                 else
336                         arg_crash_chvt = k;
337
338         } else if (startswith(word, "systemd.show_status=")) {
339                 int r;
340
341                 if ((r = parse_boolean(word + 20)) < 0)
342                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
343                 else
344                         arg_show_status = r;
345         } else if (startswith(word, "systemd.default_standard_output=")) {
346                 int r;
347
348                 if ((r = exec_output_from_string(word + 32)) < 0)
349                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
350                 else
351                         arg_default_std_output = r;
352         } else if (startswith(word, "systemd.default_standard_error=")) {
353                 int r;
354
355                 if ((r = exec_output_from_string(word + 31)) < 0)
356                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
357                 else
358                         arg_default_std_error = r;
359         } else if (startswith(word, "systemd.setenv=")) {
360                 _cleanup_free_ char *cenv = NULL;
361
362                 cenv = strdup(word + 15);
363                 if (!cenv)
364                         return -ENOMEM;
365
366                 if (env_assignment_is_valid(cenv)) {
367                         char **env;
368
369                         env = strv_env_set(arg_default_environment, cenv);
370                         if (env)
371                                 arg_default_environment = env;
372                         else
373                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
374                 } else
375                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
376
377         } else if (startswith(word, "systemd.") ||
378                    (in_initrd() && startswith(word, "rd.systemd."))) {
379
380                 const char *c;
381
382                 /* Ignore systemd.journald.xyz and friends */
383                 c = word;
384                 if (startswith(c, "rd."))
385                         c += 3;
386                 if (startswith(c, "systemd."))
387                         c += 8;
388                 if (c[strcspn(c, ".=")] != '.')  {
389
390                         log_warning("Unknown kernel switch %s. Ignoring.", word);
391
392                         log_info("Supported kernel switches:\n"
393                                  "systemd.unit=UNIT                        Default unit to start\n"
394                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
395                                  "systemd.dump_core=0|1                    Dump core on crash\n"
396                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
397                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
398                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
399                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
400                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
401                                  "                                         Log target\n"
402                                  "systemd.log_level=LEVEL                  Log level\n"
403                                  "systemd.log_color=0|1                    Highlight important log messages\n"
404                                  "systemd.log_location=0|1                 Include code location in log messages\n"
405                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
406                                  "                                         Set default log output for services\n"
407                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
408                                  "                                         Set default log error output for services\n"
409                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
410                 }
411
412         } else if (streq(word, "quiet"))
413                 arg_show_status = false;
414         else if (streq(word, "debug")) {
415                 /* Log to kmsg, the journal socket will fill up before the
416                  * journal is started and tools running during that time
417                  * will block with every log message for for 60 seconds,
418                  * before they give up. */
419                 log_set_max_level(LOG_DEBUG);
420                 log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG);
421         } else if (!in_initrd()) {
422                 unsigned i;
423
424                 /* SysV compatibility */
425                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
426                         if (streq(word, rlmap[i]))
427                                 return set_default_unit(rlmap[i+1]);
428         }
429
430         return 0;
431 }
432
433 #define DEFINE_SETTER(name, func, descr)                              \
434         static int name(const char *unit,                             \
435                         const char *filename,                         \
436                         unsigned line,                                \
437                         const char *section,                          \
438                         unsigned section_line,                        \
439                         const char *lvalue,                           \
440                         int ltype,                                    \
441                         const char *rvalue,                           \
442                         void *data,                                   \
443                         void *userdata) {                             \
444                                                                       \
445                 int r;                                                \
446                                                                       \
447                 assert(filename);                                     \
448                 assert(lvalue);                                       \
449                 assert(rvalue);                                       \
450                                                                       \
451                 r = func(rvalue);                                     \
452                 if (r < 0)                                            \
453                         log_syntax(unit, LOG_ERR, filename, line, -r, \
454                                    "Invalid " descr "'%s': %s",       \
455                                    rvalue, strerror(-r));             \
456                                                                       \
457                 return 0;                                             \
458         }
459
460 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
461 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
462 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
463 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
464
465 static int config_parse_cpu_affinity2(const char *unit,
466                                       const char *filename,
467                                       unsigned line,
468                                       const char *section,
469                                       unsigned section_line,
470                                       const char *lvalue,
471                                       int ltype,
472                                       const char *rvalue,
473                                       void *data,
474                                       void *userdata) {
475
476         char *w;
477         size_t l;
478         char *state;
479         cpu_set_t *c = NULL;
480         unsigned ncpus = 0;
481
482         assert(filename);
483         assert(lvalue);
484         assert(rvalue);
485
486         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
487                 char *t;
488                 int r;
489                 unsigned cpu;
490
491                 if (!(t = strndup(w, l)))
492                         return log_oom();
493
494                 r = safe_atou(t, &cpu);
495                 free(t);
496
497                 if (!c)
498                         if (!(c = cpu_set_malloc(&ncpus)))
499                                 return log_oom();
500
501                 if (r < 0 || cpu >= ncpus) {
502                         log_syntax(unit, LOG_ERR, filename, line, -r,
503                                    "Failed to parse CPU affinity '%s'", rvalue);
504                         CPU_FREE(c);
505                         return -EBADMSG;
506                 }
507
508                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
509         }
510
511         if (c) {
512                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
513                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
514
515                 CPU_FREE(c);
516         }
517
518         return 0;
519 }
520
521 static void strv_free_free(char ***l) {
522         char ***i;
523
524         if (!l)
525                 return;
526
527         for (i = l; *i; i++)
528                 strv_free(*i);
529
530         free(l);
531 }
532
533 static void free_join_controllers(void) {
534         strv_free_free(arg_join_controllers);
535         arg_join_controllers = NULL;
536 }
537
538 static int config_parse_join_controllers(const char *unit,
539                                          const char *filename,
540                                          unsigned line,
541                                          const char *section,
542                                          unsigned section_line,
543                                          const char *lvalue,
544                                          int ltype,
545                                          const char *rvalue,
546                                          void *data,
547                                          void *userdata) {
548
549         unsigned n = 0;
550         char *state, *w;
551         size_t length;
552
553         assert(filename);
554         assert(lvalue);
555         assert(rvalue);
556
557         free_join_controllers();
558
559         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
560                 char *s, **l;
561
562                 s = strndup(w, length);
563                 if (!s)
564                         return log_oom();
565
566                 l = strv_split(s, ",");
567                 free(s);
568
569                 strv_uniq(l);
570
571                 if (strv_length(l) <= 1) {
572                         strv_free(l);
573                         continue;
574                 }
575
576                 if (!arg_join_controllers) {
577                         arg_join_controllers = new(char**, 2);
578                         if (!arg_join_controllers) {
579                                 strv_free(l);
580                                 return log_oom();
581                         }
582
583                         arg_join_controllers[0] = l;
584                         arg_join_controllers[1] = NULL;
585
586                         n = 1;
587                 } else {
588                         char ***a;
589                         char ***t;
590
591                         t = new0(char**, n+2);
592                         if (!t) {
593                                 strv_free(l);
594                                 return log_oom();
595                         }
596
597                         n = 0;
598
599                         for (a = arg_join_controllers; *a; a++) {
600
601                                 if (strv_overlap(*a, l)) {
602                                         char **c;
603
604                                         if (strv_extend_strv(&l, *a) < 0) {
605                                                 strv_free(l);
606                                                 strv_free_free(t);
607                                                 return log_oom();
608                                         }
609
610                                 } else {
611                                         char **c;
612
613                                         c = strv_copy(*a);
614                                         if (!c) {
615                                                 strv_free(l);
616                                                 strv_free_free(t);
617                                                 return log_oom();
618                                         }
619
620                                         t[n++] = c;
621                                 }
622                         }
623
624                         t[n++] = strv_uniq(l);
625
626                         strv_free_free(arg_join_controllers);
627                         arg_join_controllers = t;
628                 }
629         }
630
631         return 0;
632 }
633
634 static int parse_config_file(void) {
635
636         const ConfigTableItem items[] = {
637                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
638                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
639                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
640                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
641                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
642                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
643                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
644                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
645                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
646                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
647                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
648                 { "Manager", "DefaultTimeoutStartSec", config_parse_sec,         0, &arg_default_timeout_start_usec },
649                 { "Manager", "DefaultTimeoutStopSec", config_parse_sec,          0, &arg_default_timeout_stop_usec  },
650                 { "Manager", "DefaultRestartSec",     config_parse_sec,          0, &arg_default_restart_usec  },
651                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,      0, &arg_default_start_limit_interval },
652                 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned,    0, &arg_default_start_limit_burst },
653                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
654                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
655                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
656                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
657                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
658                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
659                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
660                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
661                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
662                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
663                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
664                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
665                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
666                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
667                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
668                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
669                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
670                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
671                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
672                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
673                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
674                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
675                 { NULL, NULL, NULL, 0, NULL }
676         };
677
678         _cleanup_fclose_ FILE *f;
679         const char *fn;
680         int r;
681
682         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
683         f = fopen(fn, "re");
684         if (!f) {
685                 if (errno == ENOENT)
686                         return 0;
687
688                 log_warning("Failed to open configuration file '%s': %m", fn);
689                 return 0;
690         }
691
692         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
693         if (r < 0)
694                 log_warning("Failed to parse configuration file: %s", strerror(-r));
695
696         return 0;
697 }
698
699 static int parse_proc_cmdline(void) {
700         _cleanup_free_ char *line = NULL;
701         char *w, *state;
702         size_t l;
703         int r;
704
705         r = proc_cmdline(&line);
706         if (r < 0)
707                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
708         if (r <= 0)
709                 return 0;
710
711         FOREACH_WORD_QUOTED(w, l, line, state) {
712                 _cleanup_free_ char *word;
713
714                 word = strndup(w, l);
715                 if (!word)
716                         return log_oom();
717
718                 r = parse_proc_cmdline_word(word);
719                 if (r < 0) {
720                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
721                         return r;
722                 }
723         }
724
725         return 0;
726 }
727
728 static int parse_argv(int argc, char *argv[]) {
729
730         enum {
731                 ARG_LOG_LEVEL = 0x100,
732                 ARG_LOG_TARGET,
733                 ARG_LOG_COLOR,
734                 ARG_LOG_LOCATION,
735                 ARG_UNIT,
736                 ARG_SYSTEM,
737                 ARG_USER,
738                 ARG_TEST,
739                 ARG_VERSION,
740                 ARG_DUMP_CONFIGURATION_ITEMS,
741                 ARG_DUMP_CORE,
742                 ARG_CRASH_SHELL,
743                 ARG_CONFIRM_SPAWN,
744                 ARG_SHOW_STATUS,
745                 ARG_DESERIALIZE,
746                 ARG_SWITCHED_ROOT,
747                 ARG_DEFAULT_STD_OUTPUT,
748                 ARG_DEFAULT_STD_ERROR
749         };
750
751         static const struct option options[] = {
752                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
753                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
754                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
755                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
756                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
757                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
758                 { "user",                     no_argument,       NULL, ARG_USER                     },
759                 { "test",                     no_argument,       NULL, ARG_TEST                     },
760                 { "help",                     no_argument,       NULL, 'h'                          },
761                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
762                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
763                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
764                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
765                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
766                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
767                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
768                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
769                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
770                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
771                 { NULL,                       0,                 NULL, 0                            }
772         };
773
774         int c, r;
775
776         assert(argc >= 1);
777         assert(argv);
778
779         if (getpid() == 1)
780                 opterr = 0;
781
782         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
783
784                 switch (c) {
785
786                 case ARG_LOG_LEVEL:
787                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
788                                 log_error("Failed to parse log level %s.", optarg);
789                                 return r;
790                         }
791
792                         break;
793
794                 case ARG_LOG_TARGET:
795
796                         if ((r = log_set_target_from_string(optarg)) < 0) {
797                                 log_error("Failed to parse log target %s.", optarg);
798                                 return r;
799                         }
800
801                         break;
802
803                 case ARG_LOG_COLOR:
804
805                         if (optarg) {
806                                 if ((r = log_show_color_from_string(optarg)) < 0) {
807                                         log_error("Failed to parse log color setting %s.", optarg);
808                                         return r;
809                                 }
810                         } else
811                                 log_show_color(true);
812
813                         break;
814
815                 case ARG_LOG_LOCATION:
816
817                         if (optarg) {
818                                 if ((r = log_show_location_from_string(optarg)) < 0) {
819                                         log_error("Failed to parse log location setting %s.", optarg);
820                                         return r;
821                                 }
822                         } else
823                                 log_show_location(true);
824
825                         break;
826
827                 case ARG_DEFAULT_STD_OUTPUT:
828
829                         if ((r = exec_output_from_string(optarg)) < 0) {
830                                 log_error("Failed to parse default standard output setting %s.", optarg);
831                                 return r;
832                         } else
833                                 arg_default_std_output = r;
834                         break;
835
836                 case ARG_DEFAULT_STD_ERROR:
837
838                         if ((r = exec_output_from_string(optarg)) < 0) {
839                                 log_error("Failed to parse default standard error output setting %s.", optarg);
840                                 return r;
841                         } else
842                                 arg_default_std_error = r;
843                         break;
844
845                 case ARG_UNIT:
846
847                         if ((r = set_default_unit(optarg)) < 0) {
848                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
849                                 return r;
850                         }
851
852                         break;
853
854                 case ARG_SYSTEM:
855                         arg_running_as = SYSTEMD_SYSTEM;
856                         break;
857
858                 case ARG_USER:
859                         arg_running_as = SYSTEMD_USER;
860                         break;
861
862                 case ARG_TEST:
863                         arg_action = ACTION_TEST;
864                         break;
865
866                 case ARG_VERSION:
867                         arg_action = ACTION_VERSION;
868                         break;
869
870                 case ARG_DUMP_CONFIGURATION_ITEMS:
871                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
872                         break;
873
874                 case ARG_DUMP_CORE:
875                         r = optarg ? parse_boolean(optarg) : 1;
876                         if (r < 0) {
877                                 log_error("Failed to parse dump core boolean %s.", optarg);
878                                 return r;
879                         }
880                         arg_dump_core = r;
881                         break;
882
883                 case ARG_CRASH_SHELL:
884                         r = optarg ? parse_boolean(optarg) : 1;
885                         if (r < 0) {
886                                 log_error("Failed to parse crash shell boolean %s.", optarg);
887                                 return r;
888                         }
889                         arg_crash_shell = r;
890                         break;
891
892                 case ARG_CONFIRM_SPAWN:
893                         r = optarg ? parse_boolean(optarg) : 1;
894                         if (r < 0) {
895                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
896                                 return r;
897                         }
898                         arg_confirm_spawn = r;
899                         break;
900
901                 case ARG_SHOW_STATUS:
902                         r = optarg ? parse_boolean(optarg) : 1;
903                         if (r < 0) {
904                                 log_error("Failed to parse show status boolean %s.", optarg);
905                                 return r;
906                         }
907                         arg_show_status = r;
908                         break;
909
910                 case ARG_DESERIALIZE: {
911                         int fd;
912                         FILE *f;
913
914                         r = safe_atoi(optarg, &fd);
915                         if (r < 0 || fd < 0) {
916                                 log_error("Failed to parse deserialize option %s.", optarg);
917                                 return r < 0 ? r : -EINVAL;
918                         }
919
920                         fd_cloexec(fd, true);
921
922                         f = fdopen(fd, "r");
923                         if (!f) {
924                                 log_error("Failed to open serialization fd: %m");
925                                 return -errno;
926                         }
927
928                         if (serialization)
929                                 fclose(serialization);
930
931                         serialization = f;
932
933                         break;
934                 }
935
936                 case ARG_SWITCHED_ROOT:
937                         arg_switched_root = true;
938                         break;
939
940                 case 'h':
941                         arg_action = ACTION_HELP;
942                         break;
943
944                 case 'D':
945                         log_set_max_level(LOG_DEBUG);
946                         break;
947
948                 case 'b':
949                 case 's':
950                 case 'z':
951                         /* Just to eat away the sysvinit kernel
952                          * cmdline args without getopt() error
953                          * messages that we'll parse in
954                          * parse_proc_cmdline_word() or ignore. */
955
956                 case '?':
957                 default:
958                         if (getpid() != 1) {
959                                 log_error("Unknown option code %c", c);
960                                 return -EINVAL;
961                         }
962
963                         break;
964                 }
965
966         if (optind < argc && getpid() != 1) {
967                 /* Hmm, when we aren't run as init system
968                  * let's complain about excess arguments */
969
970                 log_error("Excess arguments.");
971                 return -EINVAL;
972         }
973
974         if (detect_container(NULL) > 0) {
975                 char **a;
976
977                 /* All /proc/cmdline arguments the kernel didn't
978                  * understand it passed to us. We're not really
979                  * interested in that usually since /proc/cmdline is
980                  * more interesting and complete. With one exception:
981                  * if we are run in a container /proc/cmdline is not
982                  * relevant for the container, hence we rely on argv[]
983                  * instead. */
984
985                 for (a = argv; a < argv + argc; a++) {
986                         r = parse_proc_cmdline_word(*a);
987                         if (r < 0) {
988                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
989                                 return r;
990                         }
991                 }
992         }
993
994         return 0;
995 }
996
997 static int help(void) {
998
999         printf("%s [OPTIONS...]\n\n"
1000                "Starts up and maintains the system or user services.\n\n"
1001                "  -h --help                      Show this help\n"
1002                "     --test                      Determine startup sequence, dump it and exit\n"
1003                "     --dump-configuration-items  Dump understood unit configuration items\n"
1004                "     --unit=UNIT                 Set default unit\n"
1005                "     --system                    Run a system instance, even if PID != 1\n"
1006                "     --user                      Run a user instance\n"
1007                "     --dump-core[=0|1]           Dump core on crash\n"
1008                "     --crash-shell[=0|1]         Run shell on crash\n"
1009                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1010                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1011                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1012                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1013                "     --log-color[=0|1]           Highlight important log messages\n"
1014                "     --log-location[=0|1]        Include code location in log messages\n"
1015                "     --default-standard-output=  Set default standard output for services\n"
1016                "     --default-standard-error=   Set default standard error output for services\n",
1017                program_invocation_short_name);
1018
1019         return 0;
1020 }
1021
1022 static int version(void) {
1023         puts(PACKAGE_STRING);
1024         puts(SYSTEMD_FEATURES);
1025
1026         return 0;
1027 }
1028
1029 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1030         FILE *f = NULL;
1031         FDSet *fds = NULL;
1032         int r;
1033
1034         assert(m);
1035         assert(_f);
1036         assert(_fds);
1037
1038         r = manager_open_serialization(m, &f);
1039         if (r < 0) {
1040                 log_error("Failed to create serialization file: %s", strerror(-r));
1041                 goto fail;
1042         }
1043
1044         /* Make sure nothing is really destructed when we shut down */
1045         m->n_reloading ++;
1046         bus_manager_send_reloading(m, true);
1047
1048         fds = fdset_new();
1049         if (!fds) {
1050                 r = -ENOMEM;
1051                 log_error("Failed to allocate fd set: %s", strerror(-r));
1052                 goto fail;
1053         }
1054
1055         r = manager_serialize(m, f, fds, switching_root);
1056         if (r < 0) {
1057                 log_error("Failed to serialize state: %s", strerror(-r));
1058                 goto fail;
1059         }
1060
1061         if (fseeko(f, 0, SEEK_SET) < 0) {
1062                 log_error("Failed to rewind serialization fd: %m");
1063                 goto fail;
1064         }
1065
1066         r = fd_cloexec(fileno(f), false);
1067         if (r < 0) {
1068                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1069                 goto fail;
1070         }
1071
1072         r = fdset_cloexec(fds, false);
1073         if (r < 0) {
1074                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1075                 goto fail;
1076         }
1077
1078         *_f = f;
1079         *_fds = fds;
1080
1081         return 0;
1082
1083 fail:
1084         fdset_free(fds);
1085
1086         if (f)
1087                 fclose(f);
1088
1089         return r;
1090 }
1091
1092 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1093         struct rlimit nl;
1094         int r;
1095
1096         assert(saved_rlimit);
1097
1098         /* Save the original RLIMIT_NOFILE so that we can reset it
1099          * later when transitioning from the initrd to the main
1100          * systemd or suchlike. */
1101         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1102                 log_error("Reading RLIMIT_NOFILE failed: %m");
1103                 return -errno;
1104         }
1105
1106         /* Make sure forked processes get the default kernel setting */
1107         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1108                 struct rlimit *rl;
1109
1110                 rl = newdup(struct rlimit, saved_rlimit, 1);
1111                 if (!rl)
1112                         return log_oom();
1113
1114                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1115         }
1116
1117         /* Bump up the resource limit for ourselves substantially */
1118         nl.rlim_cur = nl.rlim_max = 64*1024;
1119         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1120         if (r < 0) {
1121                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1122                 return r;
1123         }
1124
1125         return 0;
1126 }
1127
1128 static void test_mtab(void) {
1129         char *p;
1130
1131         /* Check that /etc/mtab is a symlink */
1132
1133         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1134                 bool b;
1135
1136                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1137                 free(p);
1138
1139                 if (b)
1140                         return;
1141         }
1142
1143         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1144                     "This is not supported anymore. "
1145                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1146 }
1147
1148 static void test_usr(void) {
1149
1150         /* Check that /usr is not a separate fs */
1151
1152         if (dir_is_empty("/usr") <= 0)
1153                 return;
1154
1155         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1156                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1157                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1158 }
1159
1160 static void test_cgroups(void) {
1161
1162         if (access("/proc/cgroups", F_OK) >= 0)
1163                 return;
1164
1165         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1166                     "Systems without control groups are not supported. "
1167                     "We will now sleep for 10s, and then continue boot-up. "
1168                     "Expect breakage and please do not file bugs. "
1169                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1170                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1171
1172         sleep(10);
1173 }
1174
1175 static int initialize_join_controllers(void) {
1176         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1177          * + "net_prio". We'd like to add "cpuset" to the mix, but
1178          * "cpuset" does't really work for groups with no initialized
1179          * attributes. */
1180
1181         arg_join_controllers = new(char**, 3);
1182         if (!arg_join_controllers)
1183                 return -ENOMEM;
1184
1185         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1186         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1187         arg_join_controllers[2] = NULL;
1188
1189         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1190                 free_join_controllers();
1191                 return -ENOMEM;
1192         }
1193
1194         return 0;
1195 }
1196
1197 int main(int argc, char *argv[]) {
1198         Manager *m = NULL;
1199         int r, retval = EXIT_FAILURE;
1200         usec_t before_startup, after_startup;
1201         char timespan[FORMAT_TIMESPAN_MAX];
1202         FDSet *fds = NULL;
1203         bool reexecute = false;
1204         const char *shutdown_verb = NULL;
1205         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1206         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1207         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1208         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1209         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1210         static char systemd[] = "systemd";
1211         bool skip_setup = false;
1212         int j;
1213         bool loaded_policy = false;
1214         bool arm_reboot_watchdog = false;
1215         bool queue_default_job = false;
1216         char *switch_root_dir = NULL, *switch_root_init = NULL;
1217         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1218
1219 #ifdef HAVE_SYSV_COMPAT
1220         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1221                 /* This is compatibility support for SysV, where
1222                  * calling init as a user is identical to telinit. */
1223
1224                 errno = -ENOENT;
1225                 execv(SYSTEMCTL_BINARY_PATH, argv);
1226                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1227                 return 1;
1228         }
1229 #endif
1230
1231         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1232         dual_timestamp_get(&userspace_timestamp);
1233
1234         /* Determine if this is a reexecution or normal bootup. We do
1235          * the full command line parsing much later, so let's just
1236          * have a quick peek here. */
1237         if (strv_find(argv+1, "--deserialize"))
1238                 skip_setup = true;
1239
1240         /* If we have switched root, do all the special setup
1241          * things */
1242         if (strv_find(argv+1, "--switched-root"))
1243                 skip_setup = false;
1244
1245         /* If we get started via the /sbin/init symlink then we are
1246            called 'init'. After a subsequent reexecution we are then
1247            called 'systemd'. That is confusing, hence let's call us
1248            systemd right-away. */
1249         program_invocation_short_name = systemd;
1250         prctl(PR_SET_NAME, systemd);
1251
1252         saved_argv = argv;
1253         saved_argc = argc;
1254
1255         log_show_color(isatty(STDERR_FILENO) > 0);
1256
1257         /* Disable the umask logic */
1258         if (getpid() == 1)
1259                 umask(0);
1260
1261         if (getpid() == 1 && detect_container(NULL) <= 0) {
1262
1263                 /* Running outside of a container as PID 1 */
1264                 arg_running_as = SYSTEMD_SYSTEM;
1265                 make_null_stdio();
1266                 log_set_target(LOG_TARGET_KMSG);
1267                 log_open();
1268
1269                 if (in_initrd())
1270                         initrd_timestamp = userspace_timestamp;
1271
1272                 if (!skip_setup) {
1273                         mount_setup_early();
1274                         dual_timestamp_get(&security_start_timestamp);
1275                         if (selinux_setup(&loaded_policy) < 0)
1276                                 goto finish;
1277                         if (ima_setup() < 0)
1278                                 goto finish;
1279                         if (smack_setup() < 0)
1280                                 goto finish;
1281                         dual_timestamp_get(&security_finish_timestamp);
1282                 }
1283
1284                 if (label_init(NULL) < 0)
1285                         goto finish;
1286
1287                 if (!skip_setup) {
1288                         if (hwclock_is_localtime() > 0) {
1289                                 int min;
1290
1291                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1292                                 r = hwclock_set_timezone(&min);
1293                                 if (r < 0)
1294                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1295                                 else
1296                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1297                         } else if (!in_initrd()) {
1298                                 /*
1299                                  * Do dummy first-time call to seal the kernel's time warp magic
1300                                  *
1301                                  * Do not call this this from inside the initrd. The initrd might not
1302                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1303                                  * that way. In such case, we need to delay the time-warp or the sealing
1304                                  * until we reach the real system.
1305                                  */
1306                                 hwclock_reset_timezone();
1307
1308                                 /* Tell the kernel our timezone */
1309                                 r = hwclock_set_timezone(NULL);
1310                                 if (r < 0)
1311                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1312                         }
1313                 }
1314
1315                 /* Set the default for later on, but don't actually
1316                  * open the logs like this for now. Note that if we
1317                  * are transitioning from the initrd there might still
1318                  * be journal fd open, and we shouldn't attempt
1319                  * opening that before we parsed /proc/cmdline which
1320                  * might redirect output elsewhere. */
1321                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1322
1323         } else if (getpid() == 1) {
1324                 /* Running inside a container, as PID 1 */
1325                 arg_running_as = SYSTEMD_SYSTEM;
1326                 log_set_target(LOG_TARGET_CONSOLE);
1327                 log_close_console(); /* force reopen of /dev/console */
1328                 log_open();
1329
1330                 /* For the later on, see above... */
1331                 log_set_target(LOG_TARGET_JOURNAL);
1332
1333                 /* clear the kernel timestamp,
1334                  * because we are in a container */
1335                 kernel_timestamp.monotonic = 0ULL;
1336                 kernel_timestamp.realtime = 0ULL;
1337
1338         } else {
1339                 /* Running as user instance */
1340                 arg_running_as = SYSTEMD_USER;
1341                 log_set_target(LOG_TARGET_AUTO);
1342                 log_open();
1343
1344                 /* clear the kernel timestamp,
1345                  * because we are not PID 1 */
1346                 kernel_timestamp.monotonic = 0ULL;
1347                 kernel_timestamp.realtime = 0ULL;
1348         }
1349
1350         /* Initialize default unit */
1351         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1352         if (r < 0) {
1353                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1354                 goto finish;
1355         }
1356
1357         r = initialize_join_controllers();
1358         if (r < 0)
1359                 goto finish;
1360
1361         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1362          * /proc/$PID/fd is available. */
1363         if (getpid() == 1) {
1364                 r = mount_setup(loaded_policy);
1365                 if (r < 0)
1366                         goto finish;
1367         }
1368
1369         /* Reset all signal handlers. */
1370         assert_se(reset_all_signal_handlers() == 0);
1371
1372         ignore_signals(SIGNALS_IGNORE, -1);
1373
1374         if (parse_config_file() < 0)
1375                 goto finish;
1376
1377         if (arg_running_as == SYSTEMD_SYSTEM)
1378                 if (parse_proc_cmdline() < 0)
1379                         goto finish;
1380
1381         log_parse_environment();
1382
1383         if (parse_argv(argc, argv) < 0)
1384                 goto finish;
1385
1386         if (arg_action == ACTION_TEST &&
1387             geteuid() == 0) {
1388                 log_error("Don't run test mode as root.");
1389                 goto finish;
1390         }
1391
1392         if (arg_running_as == SYSTEMD_USER &&
1393             arg_action == ACTION_RUN &&
1394             sd_booted() <= 0) {
1395                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1396                 goto finish;
1397         }
1398
1399         if (arg_running_as == SYSTEMD_SYSTEM &&
1400             arg_action == ACTION_RUN &&
1401             running_in_chroot() > 0) {
1402                 log_error("Cannot be run in a chroot() environment.");
1403                 goto finish;
1404         }
1405
1406         if (arg_action == ACTION_HELP) {
1407                 retval = help();
1408                 goto finish;
1409         } else if (arg_action == ACTION_VERSION) {
1410                 retval = version();
1411                 goto finish;
1412         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1413                 unit_dump_config_items(stdout);
1414                 retval = EXIT_SUCCESS;
1415                 goto finish;
1416         } else if (arg_action == ACTION_DONE) {
1417                 retval = EXIT_SUCCESS;
1418                 goto finish;
1419         }
1420
1421         if (arg_running_as == SYSTEMD_USER &&
1422             !getenv("XDG_RUNTIME_DIR")) {
1423                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1424                 goto finish;
1425         }
1426
1427         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1428
1429         /* Close logging fds, in order not to confuse fdset below */
1430         log_close();
1431
1432         /* Remember open file descriptors for later deserialization */
1433         r = fdset_new_fill(&fds);
1434         if (r < 0) {
1435                 log_error("Failed to allocate fd set: %s", strerror(-r));
1436                 goto finish;
1437         } else
1438                 fdset_cloexec(fds, true);
1439
1440         if (serialization)
1441                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1442
1443         if (arg_running_as == SYSTEMD_SYSTEM)
1444                 /* Become a session leader if we aren't one yet. */
1445                 setsid();
1446
1447         /* Move out of the way, so that we won't block unmounts */
1448         assert_se(chdir("/")  == 0);
1449
1450         /* Reset the console, but only if this is really init and we
1451          * are freshly booted */
1452         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1453                 console_setup(getpid() == 1 && !skip_setup);
1454
1455         /* Open the logging devices, if possible and necessary */
1456         log_open();
1457
1458         /* Make sure we leave a core dump without panicing the
1459          * kernel. */
1460         if (getpid() == 1) {
1461                 install_crash_handler();
1462
1463                 r = mount_cgroup_controllers(arg_join_controllers);
1464                 if (r < 0)
1465                         goto finish;
1466         }
1467
1468         if (arg_running_as == SYSTEMD_SYSTEM) {
1469                 const char *virtualization = NULL;
1470
1471                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1472
1473                 detect_virtualization(&virtualization);
1474                 if (virtualization)
1475                         log_info("Detected virtualization '%s'.", virtualization);
1476
1477                 if (in_initrd())
1478                         log_info("Running in initial RAM disk.");
1479
1480         } else
1481                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1482
1483         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1484                 if (arg_show_status || plymouth_running())
1485                         status_welcome();
1486
1487 #ifdef HAVE_KMOD
1488                 if (detect_container(NULL) <= 0)
1489                         kmod_setup();
1490 #endif
1491                 hostname_setup();
1492                 machine_id_setup();
1493                 loopback_setup();
1494
1495                 test_mtab();
1496                 test_usr();
1497                 test_cgroups();
1498         }
1499
1500         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1501                 watchdog_set_timeout(&arg_runtime_watchdog);
1502
1503         if (arg_timer_slack_nsec != (nsec_t) -1)
1504                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1505                         log_error("Failed to adjust timer slack: %m");
1506
1507         if (arg_capability_bounding_set_drop) {
1508                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1509                 if (r < 0) {
1510                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1511                         goto finish;
1512                 }
1513                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1514                 if (r < 0) {
1515                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1516                         goto finish;
1517                 }
1518         }
1519
1520         if (arg_running_as == SYSTEMD_USER) {
1521                 /* Become reaper of our children */
1522                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1523                         log_warning("Failed to make us a subreaper: %m");
1524                         if (errno == EINVAL)
1525                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1526                 }
1527         }
1528
1529         if (arg_running_as == SYSTEMD_SYSTEM)
1530                 bump_rlimit_nofile(&saved_rlimit_nofile);
1531
1532         r = manager_new(arg_running_as, &m);
1533         if (r < 0) {
1534                 log_error("Failed to allocate manager object: %s", strerror(-r));
1535                 goto finish;
1536         }
1537
1538         m->confirm_spawn = arg_confirm_spawn;
1539         m->default_std_output = arg_default_std_output;
1540         m->default_std_error = arg_default_std_error;
1541         m->default_restart_usec = arg_default_restart_usec;
1542         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1543         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1544         m->default_start_limit_interval = arg_default_start_limit_interval;
1545         m->default_start_limit_burst = arg_default_start_limit_burst;
1546         m->runtime_watchdog = arg_runtime_watchdog;
1547         m->shutdown_watchdog = arg_shutdown_watchdog;
1548         m->userspace_timestamp = userspace_timestamp;
1549         m->kernel_timestamp = kernel_timestamp;
1550         m->initrd_timestamp = initrd_timestamp;
1551         m->security_start_timestamp = security_start_timestamp;
1552         m->security_finish_timestamp = security_finish_timestamp;
1553
1554         manager_set_default_rlimits(m, arg_default_rlimit);
1555
1556         if (arg_default_environment)
1557                 manager_environment_add(m, NULL, arg_default_environment);
1558
1559         manager_set_show_status(m, arg_show_status);
1560
1561         /* Remember whether we should queue the default job */
1562         queue_default_job = !serialization || arg_switched_root;
1563
1564         before_startup = now(CLOCK_MONOTONIC);
1565
1566         r = manager_startup(m, serialization, fds);
1567         if (r < 0)
1568                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1569
1570         /* This will close all file descriptors that were opened, but
1571          * not claimed by any unit. */
1572         fdset_free(fds);
1573         fds = NULL;
1574
1575         if (serialization) {
1576                 fclose(serialization);
1577                 serialization = NULL;
1578         }
1579
1580         if (queue_default_job) {
1581                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1582                 Unit *target = NULL;
1583                 Job *default_unit_job;
1584
1585                 log_debug("Activating default unit: %s", arg_default_unit);
1586
1587                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1588                 if (r < 0)
1589                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1590                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1591                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1592                 else if (target->load_state == UNIT_MASKED)
1593                         log_error("Default target masked.");
1594
1595                 if (!target || target->load_state != UNIT_LOADED) {
1596                         log_info("Trying to load rescue target...");
1597
1598                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1599                         if (r < 0) {
1600                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1601                                 goto finish;
1602                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1603                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1604                                 goto finish;
1605                         } else if (target->load_state == UNIT_MASKED) {
1606                                 log_error("Rescue target masked.");
1607                                 goto finish;
1608                         }
1609                 }
1610
1611                 assert(target->load_state == UNIT_LOADED);
1612
1613                 if (arg_action == ACTION_TEST) {
1614                         printf("-> By units:\n");
1615                         manager_dump_units(m, stdout, "\t");
1616                 }
1617
1618                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1619                 if (r == -EPERM) {
1620                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1621
1622                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1623                         if (r < 0) {
1624                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1625                                 goto finish;
1626                         }
1627                 } else if (r < 0) {
1628                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1629                         goto finish;
1630                 }
1631
1632                 m->default_unit_job_id = default_unit_job->id;
1633
1634                 after_startup = now(CLOCK_MONOTONIC);
1635                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1636                          "Loaded units and determined initial transaction in %s.",
1637                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1638
1639                 if (arg_action == ACTION_TEST) {
1640                         printf("-> By jobs:\n");
1641                         manager_dump_jobs(m, stdout, "\t");
1642                         retval = EXIT_SUCCESS;
1643                         goto finish;
1644                 }
1645         }
1646
1647         for (;;) {
1648                 r = manager_loop(m);
1649                 if (r < 0) {
1650                         log_error("Failed to run mainloop: %s", strerror(-r));
1651                         goto finish;
1652                 }
1653
1654                 switch (m->exit_code) {
1655
1656                 case MANAGER_EXIT:
1657                         retval = EXIT_SUCCESS;
1658                         log_debug("Exit.");
1659                         goto finish;
1660
1661                 case MANAGER_RELOAD:
1662                         log_info("Reloading.");
1663                         r = manager_reload(m);
1664                         if (r < 0)
1665                                 log_error("Failed to reload: %s", strerror(-r));
1666                         break;
1667
1668                 case MANAGER_REEXECUTE:
1669
1670                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1671                                 goto finish;
1672
1673                         reexecute = true;
1674                         log_notice("Reexecuting.");
1675                         goto finish;
1676
1677                 case MANAGER_SWITCH_ROOT:
1678                         /* Steal the switch root parameters */
1679                         switch_root_dir = m->switch_root;
1680                         switch_root_init = m->switch_root_init;
1681                         m->switch_root = m->switch_root_init = NULL;
1682
1683                         if (!switch_root_init)
1684                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1685                                         goto finish;
1686
1687                         reexecute = true;
1688                         log_notice("Switching root.");
1689                         goto finish;
1690
1691                 case MANAGER_REBOOT:
1692                 case MANAGER_POWEROFF:
1693                 case MANAGER_HALT:
1694                 case MANAGER_KEXEC: {
1695                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1696                                 [MANAGER_REBOOT] = "reboot",
1697                                 [MANAGER_POWEROFF] = "poweroff",
1698                                 [MANAGER_HALT] = "halt",
1699                                 [MANAGER_KEXEC] = "kexec"
1700                         };
1701
1702                         assert_se(shutdown_verb = table[m->exit_code]);
1703                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1704
1705                         log_notice("Shutting down.");
1706                         goto finish;
1707                 }
1708
1709                 default:
1710                         assert_not_reached("Unknown exit code.");
1711                 }
1712         }
1713
1714 finish:
1715         if (m)
1716                 manager_free(m);
1717
1718         for (j = 0; j < RLIMIT_NLIMITS; j++)
1719                 free(arg_default_rlimit[j]);
1720
1721         free(arg_default_unit);
1722         free_join_controllers();
1723
1724         label_finish();
1725
1726         if (reexecute) {
1727                 const char **args;
1728                 unsigned i, args_size;
1729
1730                 /* Close and disarm the watchdog, so that the new
1731                  * instance can reinitialize it, but doesn't get
1732                  * rebooted while we do that */
1733                 watchdog_close(true);
1734
1735                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1736                  * that the new systemd can pass the kernel default to
1737                  * its child processes */
1738                 if (saved_rlimit_nofile.rlim_cur > 0)
1739                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1740
1741                 if (switch_root_dir) {
1742                         /* Kill all remaining processes from the
1743                          * initrd, but don't wait for them, so that we
1744                          * can handle the SIGCHLD for them after
1745                          * deserializing. */
1746                         broadcast_signal(SIGTERM, false, true);
1747
1748                         /* And switch root */
1749                         r = switch_root(switch_root_dir);
1750                         if (r < 0)
1751                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1752                 }
1753
1754                 args_size = MAX(6, argc+1);
1755                 args = newa(const char*, args_size);
1756
1757                 if (!switch_root_init) {
1758                         char sfd[16];
1759
1760                         /* First try to spawn ourselves with the right
1761                          * path, and with full serialization. We do
1762                          * this only if the user didn't specify an
1763                          * explicit init to spawn. */
1764
1765                         assert(serialization);
1766                         assert(fds);
1767
1768                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1769                         char_array_0(sfd);
1770
1771                         i = 0;
1772                         args[i++] = SYSTEMD_BINARY_PATH;
1773                         if (switch_root_dir)
1774                                 args[i++] = "--switched-root";
1775                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1776                         args[i++] = "--deserialize";
1777                         args[i++] = sfd;
1778                         args[i++] = NULL;
1779
1780                         /* do not pass along the environment we inherit from the kernel or initrd */
1781                         if (switch_root_dir)
1782                                 clearenv();
1783
1784                         assert(i <= args_size);
1785                         execv(args[0], (char* const*) args);
1786                 }
1787
1788                 /* Try the fallback, if there is any, without any
1789                  * serialization. We pass the original argv[] and
1790                  * envp[]. (Well, modulo the ordering changes due to
1791                  * getopt() in argv[], and some cleanups in envp[],
1792                  * but let's hope that doesn't matter.) */
1793
1794                 if (serialization) {
1795                         fclose(serialization);
1796                         serialization = NULL;
1797                 }
1798
1799                 if (fds) {
1800                         fdset_free(fds);
1801                         fds = NULL;
1802                 }
1803
1804                 /* Reopen the console */
1805                 make_console_stdio();
1806
1807                 for (j = 1, i = 1; j < argc; j++)
1808                         args[i++] = argv[j];
1809                 args[i++] = NULL;
1810                 assert(i <= args_size);
1811
1812                 if (switch_root_init) {
1813                         args[0] = switch_root_init;
1814                         execv(args[0], (char* const*) args);
1815                         log_warning("Failed to execute configured init, trying fallback: %m");
1816                 }
1817
1818                 args[0] = "/sbin/init";
1819                 execv(args[0], (char* const*) args);
1820
1821                 if (errno == ENOENT) {
1822                         log_warning("No /sbin/init, trying fallback");
1823
1824                         args[0] = "/bin/sh";
1825                         args[1] = NULL;
1826                         execv(args[0], (char* const*) args);
1827                         log_error("Failed to execute /bin/sh, giving up: %m");
1828                 } else
1829                         log_warning("Failed to execute /sbin/init, giving up: %m");
1830         }
1831
1832         if (serialization)
1833                 fclose(serialization);
1834
1835         if (fds)
1836                 fdset_free(fds);
1837
1838 #ifdef HAVE_VALGRIND_VALGRIND_H
1839         /* If we are PID 1 and running under valgrind, then let's exit
1840          * here explicitly. valgrind will only generate nice output on
1841          * exit(), not on exec(), hence let's do the former not the
1842          * latter here. */
1843         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1844                 return 0;
1845 #endif
1846
1847         if (shutdown_verb) {
1848                 const char * command_line[] = {
1849                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1850                         shutdown_verb,
1851                         NULL
1852                 };
1853                 _cleanup_strv_free_ char **env_block = NULL;
1854                 env_block = strv_copy(environ);
1855
1856                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1857                         char *e;
1858
1859                         /* If we reboot let's set the shutdown
1860                          * watchdog and tell the shutdown binary to
1861                          * repeatedly ping it */
1862                         watchdog_set_timeout(&arg_shutdown_watchdog);
1863                         watchdog_close(false);
1864
1865                         /* Tell the binary how often to ping, ignore failure */
1866                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1867                                 strv_push(&env_block, e);
1868                 } else
1869                         watchdog_close(true);
1870
1871                 /* Avoid the creation of new processes forked by the
1872                  * kernel; at this point, we will not listen to the
1873                  * signals anyway */
1874                 if (detect_container(NULL) <= 0)
1875                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1876
1877                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1878                 log_error("Failed to execute shutdown binary, freezing: %m");
1879         }
1880
1881         if (getpid() == 1)
1882                 freeze();
1883
1884         return retval;
1885 }